1// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2// See https://llvm.org/LICENSE.txt for license information.
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
5#include "assembly.h"
6
7// Out-of-line LSE atomics helpers. Ported from libgcc library.
8// N = {1, 2, 4, 8}
9// M = {1, 2, 4, 8, 16}
10// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'}
11// Routines implemented:
12//
13// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
14// iN __aarch64_swpN_ORDER(iN val, iN *ptr)
15// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
16// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
17// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
18// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
19//
20// Routines may modify temporary registers tmp0, tmp1, tmp2,
21// return value x0 and the flags only.
22
23#ifdef __aarch64__
24
25#ifdef HAS_ASM_LSE
26.arch armv8-a+lse
27#else
28.arch armv8-a
29#endif
30
31#if !defined(__APPLE__)
32HIDDEN(__aarch64_have_lse_atomics)
33#else
34HIDDEN(___aarch64_have_lse_atomics)
35#endif
36
37// Generate mnemonics for
38// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5
39// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5
40
41#if SIZE == 1
42#define S b
43#define UXT uxtb
44#define B 0x00000000
45#elif SIZE == 2
46#define S h
47#define UXT uxth
48#define B 0x40000000
49#elif SIZE == 4 || SIZE == 8 || SIZE == 16
50#define S
51#define UXT mov
52#if SIZE == 4
53#define B 0x80000000
54#elif SIZE == 8
55#define B 0xc0000000
56#endif
57#else
58#error
59#endif // SIZE
60
61#if MODEL == 1
62#define SUFF _relax
63#define A
64#define L
65#define M 0x000000
66#define N 0x000000
67#define BARRIER
68#elif MODEL == 2
69#define SUFF _acq
70#define A a
71#define L
72#define M 0x400000
73#define N 0x800000
74#define BARRIER
75#elif MODEL == 3
76#define SUFF _rel
77#define A
78#define L l
79#define M 0x008000
80#define N 0x400000
81#define BARRIER
82#elif MODEL == 4
83#define SUFF _acq_rel
84#define A a
85#define L l
86#define M 0x408000
87#define N 0xc00000
88#define BARRIER
89#elif MODEL == 5
90#define SUFF _sync
91#ifdef L_swp
92// swp has _acq semantics.
93#define A a
94#define L
95#define M 0x400000
96#define N 0x800000
97#else
98// All other _sync functions have _seq semantics.
99#define A a
100#define L l
101#define M 0x408000
102#define N 0xc00000
103#endif
104#define BARRIER dmb ish
105#else
106#error
107#endif // MODEL
108
109// Define register size.
110#define x(N) GLUE2(x, N)
111#define w(N) GLUE2(w, N)
112#if SIZE < 8
113#define s(N) w(N)
114#else
115#define s(N) x(N)
116#endif
117
118#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
119#if MODEL == 5
120// Drop A for _sync functions.
121#define LDXR GLUE3(ld, xr, S)
122#else
123#define LDXR GLUE4(ld, A, xr, S)
124#endif
125#define STXR GLUE4(st, L, xr, S)
126
127// Define temporary registers.
128#define tmp0 16
129#define tmp1 17
130#define tmp2 15
131
132// Macro for branch to label if no LSE available
133.macro JUMP_IF_NOT_LSE label
134#if !defined(__APPLE__)
135 adrp x(tmp0), __aarch64_have_lse_atomics
136 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
137#else
138 adrp x(tmp0), ___aarch64_have_lse_atomics@page
139 ldrb w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff]
140#endif
141 cbz w(tmp0), \label
142.endm
143
144#ifdef L_cas
145DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
146 JUMP_IF_NOT_LSE 8f
147#if SIZE < 16
148#ifdef HAS_ASM_LSE
149#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
150#else
151#define CAS .inst 0x08a07c41 + B + M
152#endif
153 CAS // s(0), s(1), [x2]
154 ret
1558:
156 UXT s(tmp0), s(0)
1570:
158 LDXR s(0), [x2]
159 cmp s(0), s(tmp0)
160 bne 1f
161 STXR w(tmp1), s(1), [x2]
162 cbnz w(tmp1), 0b
1631:
164 BARRIER
165 ret
166#else
167#if MODEL == 5
168// Drop A for _sync functions.
169#define LDXP GLUE2(ld, xp)
170#else
171#define LDXP GLUE3(ld, A, xp)
172#endif
173#define STXP GLUE3(st, L, xp)
174#ifdef HAS_ASM_LSE
175#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4]
176#else
177#define CASP .inst 0x48207c82 + M
178#endif
179
180 CASP // x0, x1, x2, x3, [x4]
181 ret
1828:
183 mov x(tmp0), x0
184 mov x(tmp1), x1
1850:
186 LDXP x0, x1, [x4]
187 cmp x0, x(tmp0)
188 ccmp x1, x(tmp1), #0, eq
189 bne 1f
190 STXP w(tmp2), x2, x3, [x4]
191 cbnz w(tmp2), 0b
1921:
193 BARRIER
194 ret
195#endif
196END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
197#endif // L_cas
198
199#ifdef L_swp
200#ifdef HAS_ASM_LSE
201#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1]
202#else
203#define SWP .inst 0x38208020 + B + N
204#endif
205DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
206 JUMP_IF_NOT_LSE 8f
207 SWP // s(0), s(0), [x1]
208 ret
2098:
210 mov s(tmp0), s(0)
2110:
212 LDXR s(0), [x1]
213 STXR w(tmp1), s(tmp0), [x1]
214 cbnz w(tmp1), 0b
215 BARRIER
216 ret
217END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
218#endif // L_swp
219
220#if defined(L_ldadd) || defined(L_ldclr) || \
221 defined(L_ldeor) || defined(L_ldset)
222
223#ifdef L_ldadd
224#define LDNM ldadd
225#define OP add
226#define OPN 0x0000
227#elif defined(L_ldclr)
228#define LDNM ldclr
229#define OP bic
230#define OPN 0x1000
231#elif defined(L_ldeor)
232#define LDNM ldeor
233#define OP eor
234#define OPN 0x2000
235#elif defined(L_ldset)
236#define LDNM ldset
237#define OP orr
238#define OPN 0x3000
239#else
240#error
241#endif
242
243#ifdef HAS_ASM_LSE
244#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
245#else
246#define LDOP .inst 0x38200020 + OPN + B + N
247#endif
248
249DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
250 JUMP_IF_NOT_LSE 8f
251 LDOP // s(0), s(0), [x1]
252 ret
2538:
254 mov s(tmp0), s(0)
2550:
256 LDXR s(0), [x1]
257 OP s(tmp1), s(0), s(tmp0)
258 STXR w(tmp2), s(tmp1), [x1]
259 cbnz w(tmp2), 0b
260 BARRIER
261 ret
262END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
263#endif // L_ldadd L_ldclr L_ldeor L_ldset
264
265NO_EXEC_STACK_DIRECTIVE
266
267// GNU property note for BTI and PAC
268GNU_PROPERTY_BTI_PAC
269
270#endif // __aarch64__
271

source code of compiler-rt/lib/builtins/aarch64/lse.S