| 1 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 2 | // See https://llvm.org/LICENSE.txt for license information. |
| 3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 4 | |
| 5 | #include "assembly.h" |
| 6 | |
| 7 | // Out-of-line LSE atomics helpers. Ported from libgcc library. |
| 8 | // N = {1, 2, 4, 8} |
| 9 | // M = {1, 2, 4, 8, 16} |
| 10 | // ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'} |
| 11 | // Routines implemented: |
| 12 | // |
| 13 | // iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) |
| 14 | // iN __aarch64_swpN_ORDER(iN val, iN *ptr) |
| 15 | // iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) |
| 16 | // iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) |
| 17 | // iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) |
| 18 | // iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) |
| 19 | // |
| 20 | // Routines may modify temporary registers tmp0, tmp1, tmp2, |
| 21 | // return value x0 and the flags only. |
| 22 | |
| 23 | #if defined(__aarch64__) || defined(__arm64ec__) |
| 24 | |
| 25 | #ifdef HAS_ASM_LSE |
| 26 | .arch armv8-a+lse |
| 27 | #else |
| 28 | .arch armv8-a |
| 29 | #endif |
| 30 | |
| 31 | #if !defined(__APPLE__) |
| 32 | HIDDEN(__aarch64_have_lse_atomics) |
| 33 | #else |
| 34 | HIDDEN(___aarch64_have_lse_atomics) |
| 35 | #endif |
| 36 | |
| 37 | // Generate mnemonics for |
| 38 | // L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5 |
| 39 | // L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5 |
| 40 | |
| 41 | #if SIZE == 1 |
| 42 | #define S b |
| 43 | #define UXT uxtb |
| 44 | #define B 0x00000000 |
| 45 | #elif SIZE == 2 |
| 46 | #define S h |
| 47 | #define UXT uxth |
| 48 | #define B 0x40000000 |
| 49 | #elif SIZE == 4 || SIZE == 8 || SIZE == 16 |
| 50 | #define S |
| 51 | #define UXT mov |
| 52 | #if SIZE == 4 |
| 53 | #define B 0x80000000 |
| 54 | #elif SIZE == 8 |
| 55 | #define B 0xc0000000 |
| 56 | #endif |
| 57 | #else |
| 58 | #error |
| 59 | #endif // SIZE |
| 60 | |
| 61 | #if MODEL == 1 |
| 62 | #define SUFF _relax |
| 63 | #define A |
| 64 | #define L |
| 65 | #define M 0x000000 |
| 66 | #define N 0x000000 |
| 67 | #define BARRIER |
| 68 | #elif MODEL == 2 |
| 69 | #define SUFF _acq |
| 70 | #define A a |
| 71 | #define L |
| 72 | #define M 0x400000 |
| 73 | #define N 0x800000 |
| 74 | #define BARRIER |
| 75 | #elif MODEL == 3 |
| 76 | #define SUFF _rel |
| 77 | #define A |
| 78 | #define L l |
| 79 | #define M 0x008000 |
| 80 | #define N 0x400000 |
| 81 | #define BARRIER |
| 82 | #elif MODEL == 4 |
| 83 | #define SUFF _acq_rel |
| 84 | #define A a |
| 85 | #define L l |
| 86 | #define M 0x408000 |
| 87 | #define N 0xc00000 |
| 88 | #define BARRIER |
| 89 | #elif MODEL == 5 |
| 90 | #define SUFF _sync |
| 91 | #ifdef L_swp |
| 92 | // swp has _acq semantics. |
| 93 | #define A a |
| 94 | #define L |
| 95 | #define M 0x400000 |
| 96 | #define N 0x800000 |
| 97 | #else |
| 98 | // All other _sync functions have _seq semantics. |
| 99 | #define A a |
| 100 | #define L l |
| 101 | #define M 0x408000 |
| 102 | #define N 0xc00000 |
| 103 | #endif |
| 104 | #define BARRIER dmb ish |
| 105 | #else |
| 106 | #error |
| 107 | #endif // MODEL |
| 108 | |
| 109 | // Define register size. |
| 110 | #define x(N) GLUE2(x, N) |
| 111 | #define w(N) GLUE2(w, N) |
| 112 | #if SIZE < 8 |
| 113 | #define s(N) w(N) |
| 114 | #else |
| 115 | #define s(N) x(N) |
| 116 | #endif |
| 117 | |
| 118 | #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) |
| 119 | #if MODEL == 5 |
| 120 | // Drop A for _sync functions. |
| 121 | #define LDXR GLUE3(ld, xr, S) |
| 122 | #else |
| 123 | #define LDXR GLUE4(ld, A, xr, S) |
| 124 | #endif |
| 125 | #define STXR GLUE4(st, L, xr, S) |
| 126 | |
| 127 | // Define temporary registers. |
| 128 | #define tmp0 16 |
| 129 | #define tmp1 17 |
| 130 | #define tmp2 15 |
| 131 | |
| 132 | // Macro for branch to label if no LSE available |
| 133 | .macro JUMP_IF_NOT_LSE label |
| 134 | #if !defined(__APPLE__) |
| 135 | adrp x(tmp0), __aarch64_have_lse_atomics |
| 136 | ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] |
| 137 | #else |
| 138 | adrp x(tmp0), ___aarch64_have_lse_atomics@page |
| 139 | ldrb w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff] |
| 140 | #endif |
| 141 | cbz w(tmp0), \label |
| 142 | .endm |
| 143 | |
| 144 | #ifdef L_cas |
| 145 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) |
| 146 | JUMP_IF_NOT_LSE 8f |
| 147 | #if SIZE < 16 |
| 148 | #ifdef HAS_ASM_LSE |
| 149 | #define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2] |
| 150 | #else |
| 151 | #define CAS .inst 0x08a07c41 + B + M |
| 152 | #endif |
| 153 | CAS // s(0), s(1), [x2] |
| 154 | ret |
| 155 | 8: |
| 156 | UXT s(tmp0), s(0) |
| 157 | 0: |
| 158 | LDXR s(0), [x2] |
| 159 | cmp s(0), s(tmp0) |
| 160 | bne 1f |
| 161 | STXR w(tmp1), s(1), [x2] |
| 162 | cbnz w(tmp1), 0b |
| 163 | 1: |
| 164 | BARRIER |
| 165 | ret |
| 166 | #else |
| 167 | #if MODEL == 5 |
| 168 | // Drop A for _sync functions. |
| 169 | #define LDXP GLUE2(ld, xp) |
| 170 | #else |
| 171 | #define LDXP GLUE3(ld, A, xp) |
| 172 | #endif |
| 173 | #define STXP GLUE3(st, L, xp) |
| 174 | #ifdef HAS_ASM_LSE |
| 175 | #define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] |
| 176 | #else |
| 177 | #define CASP .inst 0x48207c82 + M |
| 178 | #endif |
| 179 | |
| 180 | CASP // x0, x1, x2, x3, [x4] |
| 181 | ret |
| 182 | 8: |
| 183 | mov x(tmp0), x0 |
| 184 | mov x(tmp1), x1 |
| 185 | 0: |
| 186 | LDXP x0, x1, [x4] |
| 187 | cmp x0, x(tmp0) |
| 188 | ccmp x1, x(tmp1), #0, eq |
| 189 | bne 1f |
| 190 | STXP w(tmp2), x2, x3, [x4] |
| 191 | cbnz w(tmp2), 0b |
| 192 | 1: |
| 193 | BARRIER |
| 194 | ret |
| 195 | #endif |
| 196 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) |
| 197 | #endif // L_cas |
| 198 | |
| 199 | #ifdef L_swp |
| 200 | #ifdef HAS_ASM_LSE |
| 201 | #define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1] |
| 202 | #else |
| 203 | #define SWP .inst 0x38208020 + B + N |
| 204 | #endif |
| 205 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) |
| 206 | JUMP_IF_NOT_LSE 8f |
| 207 | SWP // s(0), s(0), [x1] |
| 208 | ret |
| 209 | 8: |
| 210 | mov s(tmp0), s(0) |
| 211 | 0: |
| 212 | LDXR s(0), [x1] |
| 213 | STXR w(tmp1), s(tmp0), [x1] |
| 214 | cbnz w(tmp1), 0b |
| 215 | BARRIER |
| 216 | ret |
| 217 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) |
| 218 | #endif // L_swp |
| 219 | |
| 220 | #if defined(L_ldadd) || defined(L_ldclr) || \ |
| 221 | defined(L_ldeor) || defined(L_ldset) |
| 222 | |
| 223 | #ifdef L_ldadd |
| 224 | #define LDNM ldadd |
| 225 | #define OP add |
| 226 | #define OPN 0x0000 |
| 227 | #elif defined(L_ldclr) |
| 228 | #define LDNM ldclr |
| 229 | #define OP bic |
| 230 | #define OPN 0x1000 |
| 231 | #elif defined(L_ldeor) |
| 232 | #define LDNM ldeor |
| 233 | #define OP eor |
| 234 | #define OPN 0x2000 |
| 235 | #elif defined(L_ldset) |
| 236 | #define LDNM ldset |
| 237 | #define OP orr |
| 238 | #define OPN 0x3000 |
| 239 | #else |
| 240 | #error |
| 241 | #endif |
| 242 | |
| 243 | #ifdef HAS_ASM_LSE |
| 244 | #define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1] |
| 245 | #else |
| 246 | #define LDOP .inst 0x38200020 + OPN + B + N |
| 247 | #endif |
| 248 | |
| 249 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) |
| 250 | JUMP_IF_NOT_LSE 8f |
| 251 | LDOP // s(0), s(0), [x1] |
| 252 | ret |
| 253 | 8: |
| 254 | mov s(tmp0), s(0) |
| 255 | 0: |
| 256 | LDXR s(0), [x1] |
| 257 | OP s(tmp1), s(0), s(tmp0) |
| 258 | STXR w(tmp2), s(tmp1), [x1] |
| 259 | cbnz w(tmp2), 0b |
| 260 | BARRIER |
| 261 | ret |
| 262 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) |
| 263 | #endif // L_ldadd L_ldclr L_ldeor L_ldset |
| 264 | |
| 265 | NO_EXEC_STACK_DIRECTIVE |
| 266 | |
| 267 | // GNU property note for BTI and PAC |
| 268 | GNU_PROPERTY_BTI_PAC |
| 269 | |
| 270 | #endif // defined(__aarch64__) || defined(__arm64ec__) |
| 271 | |