1 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
2 | // See https://llvm.org/LICENSE.txt for license information. |
3 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
4 | |
5 | #include "assembly.h" |
6 | |
7 | // Out-of-line LSE atomics helpers. Ported from libgcc library. |
8 | // N = {1, 2, 4, 8} |
9 | // M = {1, 2, 4, 8, 16} |
10 | // ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'} |
11 | // Routines implemented: |
12 | // |
13 | // iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) |
14 | // iN __aarch64_swpN_ORDER(iN val, iN *ptr) |
15 | // iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) |
16 | // iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) |
17 | // iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) |
18 | // iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) |
19 | // |
20 | // Routines may modify temporary registers tmp0, tmp1, tmp2, |
21 | // return value x0 and the flags only. |
22 | |
23 | #ifdef __aarch64__ |
24 | |
25 | #ifdef HAS_ASM_LSE |
26 | .arch armv8-a+lse |
27 | #else |
28 | .arch armv8-a |
29 | #endif |
30 | |
31 | #if !defined(__APPLE__) |
32 | HIDDEN(__aarch64_have_lse_atomics) |
33 | #else |
34 | HIDDEN(___aarch64_have_lse_atomics) |
35 | #endif |
36 | |
37 | // Generate mnemonics for |
38 | // L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5 |
39 | // L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5 |
40 | |
41 | #if SIZE == 1 |
42 | #define S b |
43 | #define UXT uxtb |
44 | #define B 0x00000000 |
45 | #elif SIZE == 2 |
46 | #define S h |
47 | #define UXT uxth |
48 | #define B 0x40000000 |
49 | #elif SIZE == 4 || SIZE == 8 || SIZE == 16 |
50 | #define S |
51 | #define UXT mov |
52 | #if SIZE == 4 |
53 | #define B 0x80000000 |
54 | #elif SIZE == 8 |
55 | #define B 0xc0000000 |
56 | #endif |
57 | #else |
58 | #error |
59 | #endif // SIZE |
60 | |
61 | #if MODEL == 1 |
62 | #define SUFF _relax |
63 | #define A |
64 | #define L |
65 | #define M 0x000000 |
66 | #define N 0x000000 |
67 | #define BARRIER |
68 | #elif MODEL == 2 |
69 | #define SUFF _acq |
70 | #define A a |
71 | #define L |
72 | #define M 0x400000 |
73 | #define N 0x800000 |
74 | #define BARRIER |
75 | #elif MODEL == 3 |
76 | #define SUFF _rel |
77 | #define A |
78 | #define L l |
79 | #define M 0x008000 |
80 | #define N 0x400000 |
81 | #define BARRIER |
82 | #elif MODEL == 4 |
83 | #define SUFF _acq_rel |
84 | #define A a |
85 | #define L l |
86 | #define M 0x408000 |
87 | #define N 0xc00000 |
88 | #define BARRIER |
89 | #elif MODEL == 5 |
90 | #define SUFF _sync |
91 | #ifdef L_swp |
92 | // swp has _acq semantics. |
93 | #define A a |
94 | #define L |
95 | #define M 0x400000 |
96 | #define N 0x800000 |
97 | #else |
98 | // All other _sync functions have _seq semantics. |
99 | #define A a |
100 | #define L l |
101 | #define M 0x408000 |
102 | #define N 0xc00000 |
103 | #endif |
104 | #define BARRIER dmb ish |
105 | #else |
106 | #error |
107 | #endif // MODEL |
108 | |
109 | // Define register size. |
110 | #define x(N) GLUE2(x, N) |
111 | #define w(N) GLUE2(w, N) |
112 | #if SIZE < 8 |
113 | #define s(N) w(N) |
114 | #else |
115 | #define s(N) x(N) |
116 | #endif |
117 | |
118 | #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) |
119 | #if MODEL == 5 |
120 | // Drop A for _sync functions. |
121 | #define LDXR GLUE3(ld, xr, S) |
122 | #else |
123 | #define LDXR GLUE4(ld, A, xr, S) |
124 | #endif |
125 | #define STXR GLUE4(st, L, xr, S) |
126 | |
127 | // Define temporary registers. |
128 | #define tmp0 16 |
129 | #define tmp1 17 |
130 | #define tmp2 15 |
131 | |
132 | // Macro for branch to label if no LSE available |
133 | .macro JUMP_IF_NOT_LSE label |
134 | #if !defined(__APPLE__) |
135 | adrp x(tmp0), __aarch64_have_lse_atomics |
136 | ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] |
137 | #else |
138 | adrp x(tmp0), ___aarch64_have_lse_atomics@page |
139 | ldrb w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff] |
140 | #endif |
141 | cbz w(tmp0), \label |
142 | .endm |
143 | |
144 | #ifdef L_cas |
145 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) |
146 | JUMP_IF_NOT_LSE 8f |
147 | #if SIZE < 16 |
148 | #ifdef HAS_ASM_LSE |
149 | #define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2] |
150 | #else |
151 | #define CAS .inst 0x08a07c41 + B + M |
152 | #endif |
153 | CAS // s(0), s(1), [x2] |
154 | ret |
155 | 8: |
156 | UXT s(tmp0), s(0) |
157 | 0: |
158 | LDXR s(0), [x2] |
159 | cmp s(0), s(tmp0) |
160 | bne 1f |
161 | STXR w(tmp1), s(1), [x2] |
162 | cbnz w(tmp1), 0b |
163 | 1: |
164 | BARRIER |
165 | ret |
166 | #else |
167 | #if MODEL == 5 |
168 | // Drop A for _sync functions. |
169 | #define LDXP GLUE2(ld, xp) |
170 | #else |
171 | #define LDXP GLUE3(ld, A, xp) |
172 | #endif |
173 | #define STXP GLUE3(st, L, xp) |
174 | #ifdef HAS_ASM_LSE |
175 | #define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] |
176 | #else |
177 | #define CASP .inst 0x48207c82 + M |
178 | #endif |
179 | |
180 | CASP // x0, x1, x2, x3, [x4] |
181 | ret |
182 | 8: |
183 | mov x(tmp0), x0 |
184 | mov x(tmp1), x1 |
185 | 0: |
186 | LDXP x0, x1, [x4] |
187 | cmp x0, x(tmp0) |
188 | ccmp x1, x(tmp1), #0, eq |
189 | bne 1f |
190 | STXP w(tmp2), x2, x3, [x4] |
191 | cbnz w(tmp2), 0b |
192 | 1: |
193 | BARRIER |
194 | ret |
195 | #endif |
196 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) |
197 | #endif // L_cas |
198 | |
199 | #ifdef L_swp |
200 | #ifdef HAS_ASM_LSE |
201 | #define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1] |
202 | #else |
203 | #define SWP .inst 0x38208020 + B + N |
204 | #endif |
205 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) |
206 | JUMP_IF_NOT_LSE 8f |
207 | SWP // s(0), s(0), [x1] |
208 | ret |
209 | 8: |
210 | mov s(tmp0), s(0) |
211 | 0: |
212 | LDXR s(0), [x1] |
213 | STXR w(tmp1), s(tmp0), [x1] |
214 | cbnz w(tmp1), 0b |
215 | BARRIER |
216 | ret |
217 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) |
218 | #endif // L_swp |
219 | |
220 | #if defined(L_ldadd) || defined(L_ldclr) || \ |
221 | defined(L_ldeor) || defined(L_ldset) |
222 | |
223 | #ifdef L_ldadd |
224 | #define LDNM ldadd |
225 | #define OP add |
226 | #define OPN 0x0000 |
227 | #elif defined(L_ldclr) |
228 | #define LDNM ldclr |
229 | #define OP bic |
230 | #define OPN 0x1000 |
231 | #elif defined(L_ldeor) |
232 | #define LDNM ldeor |
233 | #define OP eor |
234 | #define OPN 0x2000 |
235 | #elif defined(L_ldset) |
236 | #define LDNM ldset |
237 | #define OP orr |
238 | #define OPN 0x3000 |
239 | #else |
240 | #error |
241 | #endif |
242 | |
243 | #ifdef HAS_ASM_LSE |
244 | #define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1] |
245 | #else |
246 | #define LDOP .inst 0x38200020 + OPN + B + N |
247 | #endif |
248 | |
249 | DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) |
250 | JUMP_IF_NOT_LSE 8f |
251 | LDOP // s(0), s(0), [x1] |
252 | ret |
253 | 8: |
254 | mov s(tmp0), s(0) |
255 | 0: |
256 | LDXR s(0), [x1] |
257 | OP s(tmp1), s(0), s(tmp0) |
258 | STXR w(tmp2), s(tmp1), [x1] |
259 | cbnz w(tmp2), 0b |
260 | BARRIER |
261 | ret |
262 | END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) |
263 | #endif // L_ldadd L_ldclr L_ldeor L_ldset |
264 | |
265 | NO_EXEC_STACK_DIRECTIVE |
266 | |
267 | // GNU property note for BTI and PAC |
268 | GNU_PROPERTY_BTI_PAC |
269 | |
270 | #endif // __aarch64__ |
271 | |