1 | //===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
10 | // |
11 | // This implements the X86-specific assembler for the trampolines. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "../builtins/assembly.h" |
16 | #include "../sanitizer_common/sanitizer_asm.h" |
17 | |
18 | // XRay trampolines which are not produced by intrinsics are not System V AMD64 |
19 | // ABI compliant because they are called with a stack that is always misaligned |
20 | // by 8 bytes with respect to a 16 bytes alignment. This is because they are |
21 | // called immediately after the call to, or immediately before returning from, |
22 | // the function being instrumented. This saves space in the patch point, but |
23 | // misaligns the stack by 8 bytes. |
24 | |
25 | .macro ALIGN_STACK_16B |
26 | #if defined(__APPLE__) |
27 | subq $$8, %rsp |
28 | #else |
29 | subq $8, %rsp |
30 | #endif |
31 | CFI_ADJUST_CFA_OFFSET(8) |
32 | .endm |
33 | |
34 | .macro RESTORE_STACK_ALIGNMENT |
35 | #if defined(__APPLE__) |
36 | addq $$8, %rsp |
37 | #else |
38 | addq $8, %rsp |
39 | #endif |
40 | CFI_ADJUST_CFA_OFFSET(-8) |
41 | .endm |
42 | |
43 | // This macro should keep the stack aligned to 16 bytes. |
44 | .macro SAVE_REGISTERS |
45 | pushfq |
46 | CFI_ADJUST_CFA_OFFSET(8) |
47 | subq $240, %rsp |
48 | CFI_ADJUST_CFA_OFFSET(240) |
49 | movq %rbp, 232(%rsp) |
50 | movupd %xmm0, 216(%rsp) |
51 | movupd %xmm1, 200(%rsp) |
52 | movupd %xmm2, 184(%rsp) |
53 | movupd %xmm3, 168(%rsp) |
54 | movupd %xmm4, 152(%rsp) |
55 | movupd %xmm5, 136(%rsp) |
56 | movupd %xmm6, 120(%rsp) |
57 | movupd %xmm7, 104(%rsp) |
58 | movq %rdi, 96(%rsp) |
59 | movq %rax, 88(%rsp) |
60 | movq %rdx, 80(%rsp) |
61 | movq %rsi, 72(%rsp) |
62 | movq %rcx, 64(%rsp) |
63 | movq %r8, 56(%rsp) |
64 | movq %r9, 48(%rsp) |
65 | movq %r10, 40(%rsp) |
66 | movq %r11, 32(%rsp) |
67 | movq %r12, 24(%rsp) |
68 | movq %r13, 16(%rsp) |
69 | movq %r14, 8(%rsp) |
70 | movq %r15, 0(%rsp) |
71 | .endm |
72 | |
73 | // This macro should keep the stack aligned to 16 bytes. |
74 | .macro RESTORE_REGISTERS |
75 | movq 232(%rsp), %rbp |
76 | movupd 216(%rsp), %xmm0 |
77 | movupd 200(%rsp), %xmm1 |
78 | movupd 184(%rsp), %xmm2 |
79 | movupd 168(%rsp), %xmm3 |
80 | movupd 152(%rsp), %xmm4 |
81 | movupd 136(%rsp), %xmm5 |
82 | movupd 120(%rsp) , %xmm6 |
83 | movupd 104(%rsp) , %xmm7 |
84 | movq 96(%rsp), %rdi |
85 | movq 88(%rsp), %rax |
86 | movq 80(%rsp), %rdx |
87 | movq 72(%rsp), %rsi |
88 | movq 64(%rsp), %rcx |
89 | movq 56(%rsp), %r8 |
90 | movq 48(%rsp), %r9 |
91 | movq 40(%rsp), %r10 |
92 | movq 32(%rsp), %r11 |
93 | movq 24(%rsp), %r12 |
94 | movq 16(%rsp), %r13 |
95 | movq 8(%rsp), %r14 |
96 | movq 0(%rsp), %r15 |
97 | addq $240, %rsp |
98 | CFI_ADJUST_CFA_OFFSET(-240) |
99 | popfq |
100 | CFI_ADJUST_CFA_OFFSET(-8) |
101 | .endm |
102 | |
103 | .text |
104 | #if !defined(__APPLE__) |
105 | .section .text |
106 | .file "xray_trampoline_x86.S" |
107 | #else |
108 | .section __TEXT,__text |
109 | #endif |
110 | |
111 | //===----------------------------------------------------------------------===// |
112 | |
113 | .globl ASM_SYMBOL(__xray_FunctionEntry) |
114 | ASM_HIDDEN(__xray_FunctionEntry) |
115 | .align 16, 0x90 |
116 | ASM_TYPE_FUNCTION(__xray_FunctionEntry) |
117 | # LLVM-MCA-BEGIN __xray_FunctionEntry |
118 | ASM_SYMBOL(__xray_FunctionEntry): |
119 | CFI_STARTPROC |
120 | ALIGN_STACK_16B |
121 | SAVE_REGISTERS |
122 | |
123 | // This load has to be atomic, it's concurrent with __xray_patch(). |
124 | // On x86/amd64, a simple (type-aligned) MOV instruction is enough. |
125 | movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax |
126 | testq %rax, %rax |
127 | je LOCAL_LABEL(tmp0) |
128 | |
129 | // The patched function prologue puts its xray_instr_map index into %r10d. |
130 | movl %r10d, %edi |
131 | xor %esi,%esi |
132 | callq *%rax |
133 | |
134 | LOCAL_LABEL(tmp0): |
135 | RESTORE_REGISTERS |
136 | RESTORE_STACK_ALIGNMENT |
137 | retq |
138 | # LLVM-MCA-END |
139 | ASM_SIZE(__xray_FunctionEntry) |
140 | CFI_ENDPROC |
141 | |
142 | //===----------------------------------------------------------------------===// |
143 | |
144 | .globl ASM_SYMBOL(__xray_FunctionExit) |
145 | ASM_HIDDEN(__xray_FunctionExit) |
146 | .align 16, 0x90 |
147 | ASM_TYPE_FUNCTION(__xray_FunctionExit) |
148 | # LLVM-MCA-BEGIN __xray_FunctionExit |
149 | ASM_SYMBOL(__xray_FunctionExit): |
150 | CFI_STARTPROC |
151 | ALIGN_STACK_16B |
152 | |
153 | // Save the important registers first. Since we're assuming that this |
154 | // function is only jumped into, we only preserve the registers for |
155 | // returning. |
156 | subq $64, %rsp |
157 | CFI_ADJUST_CFA_OFFSET(64) |
158 | movq %rbp, 48(%rsp) |
159 | movupd %xmm0, 32(%rsp) |
160 | movupd %xmm1, 16(%rsp) |
161 | movq %rax, 8(%rsp) |
162 | movq %rdx, 0(%rsp) |
163 | movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax |
164 | testq %rax,%rax |
165 | je LOCAL_LABEL(tmp2) |
166 | |
167 | movl %r10d, %edi |
168 | movl $1, %esi |
169 | callq *%rax |
170 | |
171 | LOCAL_LABEL(tmp2): |
172 | // Restore the important registers. |
173 | movq 48(%rsp), %rbp |
174 | movupd 32(%rsp), %xmm0 |
175 | movupd 16(%rsp), %xmm1 |
176 | movq 8(%rsp), %rax |
177 | movq 0(%rsp), %rdx |
178 | addq $64, %rsp |
179 | CFI_ADJUST_CFA_OFFSET(-64) |
180 | |
181 | RESTORE_STACK_ALIGNMENT |
182 | retq |
183 | # LLVM-MCA-END |
184 | ASM_SIZE(__xray_FunctionExit) |
185 | CFI_ENDPROC |
186 | |
187 | //===----------------------------------------------------------------------===// |
188 | |
189 | .globl ASM_SYMBOL(__xray_FunctionTailExit) |
190 | ASM_HIDDEN(__xray_FunctionTailExit) |
191 | .align 16, 0x90 |
192 | ASM_TYPE_FUNCTION(__xray_FunctionTailExit) |
193 | # LLVM-MCA-BEGIN __xray_FunctionTailExit |
194 | ASM_SYMBOL(__xray_FunctionTailExit): |
195 | CFI_STARTPROC |
196 | ALIGN_STACK_16B |
197 | SAVE_REGISTERS |
198 | |
199 | movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax |
200 | testq %rax,%rax |
201 | je LOCAL_LABEL(tmp4) |
202 | |
203 | movl %r10d, %edi |
204 | movl $2, %esi |
205 | callq *%rax |
206 | |
207 | LOCAL_LABEL(tmp4): |
208 | RESTORE_REGISTERS |
209 | RESTORE_STACK_ALIGNMENT |
210 | retq |
211 | # LLVM-MCA-END |
212 | ASM_SIZE(__xray_FunctionTailExit) |
213 | CFI_ENDPROC |
214 | |
215 | //===----------------------------------------------------------------------===// |
216 | |
217 | .globl ASM_SYMBOL(__xray_ArgLoggerEntry) |
218 | ASM_HIDDEN(__xray_ArgLoggerEntry) |
219 | .align 16, 0x90 |
220 | ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) |
221 | # LLVM-MCA-BEGIN __xray_ArgLoggerEntry |
222 | ASM_SYMBOL(__xray_ArgLoggerEntry): |
223 | CFI_STARTPROC |
224 | ALIGN_STACK_16B |
225 | SAVE_REGISTERS |
226 | |
227 | // Again, these function pointer loads must be atomic; MOV is fine. |
228 | movq ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax |
229 | testq %rax, %rax |
230 | jne LOCAL_LABEL(arg1entryLog) |
231 | |
232 | // If [arg1 logging handler] not set, defer to no-arg logging. |
233 | movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax |
234 | testq %rax, %rax |
235 | je LOCAL_LABEL(arg1entryFail) |
236 | |
237 | LOCAL_LABEL(arg1entryLog): |
238 | |
239 | // First argument will become the third |
240 | movq %rdi, %rdx |
241 | |
242 | // XRayEntryType::LOG_ARGS_ENTRY into the second |
243 | mov $0x3, %esi |
244 | |
245 | // 32-bit function ID becomes the first |
246 | movl %r10d, %edi |
247 | |
248 | callq *%rax |
249 | |
250 | LOCAL_LABEL(arg1entryFail): |
251 | RESTORE_REGISTERS |
252 | RESTORE_STACK_ALIGNMENT |
253 | retq |
254 | # LLVM-MCA-END |
255 | ASM_SIZE(__xray_ArgLoggerEntry) |
256 | CFI_ENDPROC |
257 | |
258 | //===----------------------------------------------------------------------===// |
259 | |
260 | // __xray_*Event have default visibility so that they can be referenced by user |
261 | // DSOs that do not link against the runtime. |
262 | .global ASM_SYMBOL(__xray_CustomEvent) |
263 | .align 16, 0x90 |
264 | ASM_TYPE_FUNCTION(__xray_CustomEvent) |
265 | # LLVM-MCA-BEGIN __xray_CustomEvent |
266 | ASM_SYMBOL(__xray_CustomEvent): |
267 | CFI_STARTPROC |
268 | SAVE_REGISTERS |
269 | |
270 | // We take two arguments to this trampoline, which should be in rdi and rsi |
271 | // already. |
272 | movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax |
273 | testq %rax,%rax |
274 | je LOCAL_LABEL(customEventCleanup) |
275 | |
276 | callq *%rax |
277 | |
278 | LOCAL_LABEL(customEventCleanup): |
279 | RESTORE_REGISTERS |
280 | retq |
281 | # LLVM-MCA-END |
282 | ASM_SIZE(__xray_CustomEvent) |
283 | CFI_ENDPROC |
284 | |
285 | //===----------------------------------------------------------------------===// |
286 | |
287 | .global ASM_SYMBOL(__xray_TypedEvent) |
288 | .align 16, 0x90 |
289 | ASM_TYPE_FUNCTION(__xray_TypedEvent) |
290 | # LLVM-MCA-BEGIN __xray_TypedEvent |
291 | ASM_SYMBOL(__xray_TypedEvent): |
292 | CFI_STARTPROC |
293 | SAVE_REGISTERS |
294 | |
295 | // We pass three arguments to this trampoline, which should be in rdi, rsi |
296 | // and rdx without our intervention. |
297 | movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax |
298 | testq %rax,%rax |
299 | je LOCAL_LABEL(typedEventCleanup) |
300 | |
301 | callq *%rax |
302 | |
303 | LOCAL_LABEL(typedEventCleanup): |
304 | RESTORE_REGISTERS |
305 | retq |
306 | # LLVM-MCA-END |
307 | ASM_SIZE(__xray_TypedEvent) |
308 | CFI_ENDPROC |
309 | |
310 | //===----------------------------------------------------------------------===// |
311 | |
312 | NO_EXEC_STACK_DIRECTIVE |
313 | |