1 | // Copyright 2017 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! This module defines the `__rust_probestack` intrinsic which is used in the |
12 | //! implementation of "stack probes" on certain platforms. |
13 | //! |
14 | //! The purpose of a stack probe is to provide a static guarantee that if a |
15 | //! thread has a guard page then a stack overflow is guaranteed to hit that |
16 | //! guard page. If a function did not have a stack probe then there's a risk of |
17 | //! having a stack frame *larger* than the guard page, so a function call could |
18 | //! skip over the guard page entirely and then later hit maybe the heap or |
19 | //! another thread, possibly leading to security vulnerabilities such as [The |
20 | //! Stack Clash], for example. |
21 | //! |
22 | //! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash |
23 | //! |
24 | //! The `__rust_probestack` is called in the prologue of functions whose stack |
25 | //! size is larger than the guard page, for example larger than 4096 bytes on |
26 | //! x86. This function is then responsible for "touching" all pages relevant to |
27 | //! the stack to ensure that that if any of them are the guard page we'll hit |
28 | //! them guaranteed. |
29 | //! |
30 | //! The precise ABI for how this function operates is defined by LLVM. There's |
31 | //! no real documentation as to what this is, so you'd basically need to read |
32 | //! the LLVM source code for reference. Often though the test cases can be |
33 | //! illuminating as to the ABI that's generated, or just looking at the output |
34 | //! of `llc`. |
35 | //! |
36 | //! Note that `#[naked]` is typically used here for the stack probe because the |
37 | //! ABI corresponds to no actual ABI. |
38 | //! |
39 | //! Finally it's worth noting that at the time of this writing LLVM only has |
40 | //! support for stack probes on x86 and x86_64. There's no support for stack |
41 | //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would |
42 | //! be more than welcome to accept such a change! |
43 | |
44 | #![cfg (not(feature = "mangled-names" ))] |
45 | // Windows already has builtins to do this. |
46 | #![cfg (not(windows))] |
47 | // All these builtins require assembly |
48 | #![cfg (not(feature = "no-asm" ))] |
49 | // We only define stack probing for these architectures today. |
50 | #![cfg (any(target_arch = "x86_64" , target_arch = "x86" ))] |
51 | |
52 | extern "C" { |
53 | pub fn __rust_probestack(); |
54 | } |
55 | |
56 | // A wrapper for our implementation of __rust_probestack, which allows us to |
57 | // keep the assembly inline while controlling all CFI directives in the assembly |
58 | // emitted for the function. |
59 | // |
60 | // This is the ELF version. |
61 | #[cfg (not(any(target_vendor = "apple" , target_os = "uefi" )))] |
62 | macro_rules! define_rust_probestack { |
63 | ($body: expr) => { |
64 | concat!( |
65 | " |
66 | .pushsection .text.__rust_probestack |
67 | .globl __rust_probestack |
68 | .type __rust_probestack, @function |
69 | .hidden __rust_probestack |
70 | __rust_probestack: |
71 | " , |
72 | $body, |
73 | " |
74 | .size __rust_probestack, . - __rust_probestack |
75 | .popsection |
76 | " |
77 | ) |
78 | }; |
79 | } |
80 | |
81 | #[cfg (all(target_os = "uefi" , target_arch = "x86_64" ))] |
82 | macro_rules! define_rust_probestack { |
83 | ($body: expr) => { |
84 | concat!( |
85 | " |
86 | .globl __rust_probestack |
87 | __rust_probestack: |
88 | " , |
89 | $body |
90 | ) |
91 | }; |
92 | } |
93 | |
94 | // Same as above, but for Mach-O. Note that the triple underscore |
95 | // is deliberate |
96 | #[cfg (target_vendor = "apple" )] |
97 | macro_rules! define_rust_probestack { |
98 | ($body: expr) => { |
99 | concat!( |
100 | " |
101 | .globl ___rust_probestack |
102 | ___rust_probestack: |
103 | " , |
104 | $body |
105 | ) |
106 | }; |
107 | } |
108 | |
109 | // In UEFI x86 arch, triple underscore is deliberate. |
110 | #[cfg (all(target_os = "uefi" , target_arch = "x86" ))] |
111 | macro_rules! define_rust_probestack { |
112 | ($body: expr) => { |
113 | concat!( |
114 | " |
115 | .globl ___rust_probestack |
116 | ___rust_probestack: |
117 | " , |
118 | $body |
119 | ) |
120 | }; |
121 | } |
122 | |
123 | // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, |
124 | // ensuring that if any pages are unmapped we'll make a page fault. |
125 | // |
126 | // The ABI here is that the stack frame size is located in `%rax`. Upon |
127 | // return we're not supposed to modify `%rsp` or `%rax`. |
128 | // |
129 | // Any changes to this function should be replicated to the SGX version below. |
130 | #[cfg (all( |
131 | target_arch = "x86_64" , |
132 | not(all(target_env = "sgx" , target_vendor = "fortanix" )) |
133 | ))] |
134 | core::arch::global_asm!( |
135 | define_rust_probestack!( |
136 | " |
137 | .cfi_startproc |
138 | pushq %rbp |
139 | .cfi_adjust_cfa_offset 8 |
140 | .cfi_offset %rbp, -16 |
141 | movq %rsp, %rbp |
142 | .cfi_def_cfa_register %rbp |
143 | |
144 | mov %rax,%r11 // duplicate %rax as we're clobbering %r11 |
145 | |
146 | // Main loop, taken in one page increments. We're decrementing rsp by |
147 | // a page each time until there's less than a page remaining. We're |
148 | // guaranteed that this function isn't called unless there's more than a |
149 | // page needed. |
150 | // |
151 | // Note that we're also testing against `8(%rsp)` to account for the 8 |
152 | // bytes pushed on the stack orginally with our return address. Using |
153 | // `8(%rsp)` simulates us testing the stack pointer in the caller's |
154 | // context. |
155 | |
156 | // It's usually called when %rax >= 0x1000, but that's not always true. |
157 | // Dynamic stack allocation, which is needed to implement unsized |
158 | // rvalues, triggers stackprobe even if %rax < 0x1000. |
159 | // Thus we have to check %r11 first to avoid segfault. |
160 | cmp $0x1000,%r11 |
161 | jna 3f |
162 | 2: |
163 | sub $0x1000,%rsp |
164 | test %rsp,8(%rsp) |
165 | sub $0x1000,%r11 |
166 | cmp $0x1000,%r11 |
167 | ja 2b |
168 | |
169 | 3: |
170 | // Finish up the last remaining stack space requested, getting the last |
171 | // bits out of r11 |
172 | sub %r11,%rsp |
173 | test %rsp,8(%rsp) |
174 | |
175 | // Restore the stack pointer to what it previously was when entering |
176 | // this function. The caller will readjust the stack pointer after we |
177 | // return. |
178 | add %rax,%rsp |
179 | |
180 | leave |
181 | .cfi_def_cfa_register %rsp |
182 | .cfi_adjust_cfa_offset -8 |
183 | ret |
184 | .cfi_endproc |
185 | " |
186 | ), |
187 | options(att_syntax) |
188 | ); |
189 | |
190 | // This function is the same as above, except that some instructions are |
191 | // [manually patched for LVI]. |
192 | // |
193 | // [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions |
194 | #[cfg (all( |
195 | target_arch = "x86_64" , |
196 | all(target_env = "sgx" , target_vendor = "fortanix" ) |
197 | ))] |
198 | core::arch::global_asm!( |
199 | define_rust_probestack!( |
200 | " |
201 | .cfi_startproc |
202 | pushq %rbp |
203 | .cfi_adjust_cfa_offset 8 |
204 | .cfi_offset %rbp, -16 |
205 | movq %rsp, %rbp |
206 | .cfi_def_cfa_register %rbp |
207 | |
208 | mov %rax,%r11 // duplicate %rax as we're clobbering %r11 |
209 | |
210 | // Main loop, taken in one page increments. We're decrementing rsp by |
211 | // a page each time until there's less than a page remaining. We're |
212 | // guaranteed that this function isn't called unless there's more than a |
213 | // page needed. |
214 | // |
215 | // Note that we're also testing against `8(%rsp)` to account for the 8 |
216 | // bytes pushed on the stack orginally with our return address. Using |
217 | // `8(%rsp)` simulates us testing the stack pointer in the caller's |
218 | // context. |
219 | |
220 | // It's usually called when %rax >= 0x1000, but that's not always true. |
221 | // Dynamic stack allocation, which is needed to implement unsized |
222 | // rvalues, triggers stackprobe even if %rax < 0x1000. |
223 | // Thus we have to check %r11 first to avoid segfault. |
224 | cmp $0x1000,%r11 |
225 | jna 3f |
226 | 2: |
227 | sub $0x1000,%rsp |
228 | test %rsp,8(%rsp) |
229 | sub $0x1000,%r11 |
230 | cmp $0x1000,%r11 |
231 | ja 2b |
232 | |
233 | 3: |
234 | // Finish up the last remaining stack space requested, getting the last |
235 | // bits out of r11 |
236 | sub %r11,%rsp |
237 | test %rsp,8(%rsp) |
238 | |
239 | // Restore the stack pointer to what it previously was when entering |
240 | // this function. The caller will readjust the stack pointer after we |
241 | // return. |
242 | add %rax,%rsp |
243 | |
244 | leave |
245 | .cfi_def_cfa_register %rsp |
246 | .cfi_adjust_cfa_offset -8 |
247 | pop %r11 |
248 | lfence |
249 | jmp *%r11 |
250 | .cfi_endproc |
251 | " |
252 | ), |
253 | options(att_syntax) |
254 | ); |
255 | |
256 | #[cfg (all(target_arch = "x86" , not(target_os = "uefi" )))] |
257 | // This is the same as x86_64 above, only translated for 32-bit sizes. Note |
258 | // that on Unix we're expected to restore everything as it was, this |
259 | // function basically can't tamper with anything. |
260 | // |
261 | // The ABI here is the same as x86_64, except everything is 32-bits large. |
262 | core::arch::global_asm!( |
263 | define_rust_probestack!( |
264 | " |
265 | .cfi_startproc |
266 | push %ebp |
267 | .cfi_adjust_cfa_offset 4 |
268 | .cfi_offset %ebp, -8 |
269 | mov %esp, %ebp |
270 | .cfi_def_cfa_register %ebp |
271 | push %ecx |
272 | mov %eax,%ecx |
273 | |
274 | cmp $0x1000,%ecx |
275 | jna 3f |
276 | 2: |
277 | sub $0x1000,%esp |
278 | test %esp,8(%esp) |
279 | sub $0x1000,%ecx |
280 | cmp $0x1000,%ecx |
281 | ja 2b |
282 | |
283 | 3: |
284 | sub %ecx,%esp |
285 | test %esp,8(%esp) |
286 | |
287 | add %eax,%esp |
288 | pop %ecx |
289 | leave |
290 | .cfi_def_cfa_register %esp |
291 | .cfi_adjust_cfa_offset -4 |
292 | ret |
293 | .cfi_endproc |
294 | " |
295 | ), |
296 | options(att_syntax) |
297 | ); |
298 | |
299 | #[cfg (all(target_arch = "x86" , target_os = "uefi" ))] |
300 | // UEFI target is windows like target. LLVM will do _chkstk things like windows. |
301 | // probestack function will also do things like _chkstk in MSVC. |
302 | // So we need to sub %ax %sp in probestack when arch is x86. |
303 | // |
304 | // REF: Rust commit(74e80468347) |
305 | // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 |
306 | // Comments in LLVM: |
307 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. |
308 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp |
309 | // themselves. |
310 | core::arch::global_asm!( |
311 | define_rust_probestack!( |
312 | " |
313 | .cfi_startproc |
314 | push %ebp |
315 | .cfi_adjust_cfa_offset 4 |
316 | .cfi_offset %ebp, -8 |
317 | mov %esp, %ebp |
318 | .cfi_def_cfa_register %ebp |
319 | push %ecx |
320 | push %edx |
321 | mov %eax,%ecx |
322 | |
323 | cmp $0x1000,%ecx |
324 | jna 3f |
325 | 2: |
326 | sub $0x1000,%esp |
327 | test %esp,8(%esp) |
328 | sub $0x1000,%ecx |
329 | cmp $0x1000,%ecx |
330 | ja 2b |
331 | |
332 | 3: |
333 | sub %ecx,%esp |
334 | test %esp,8(%esp) |
335 | mov 4(%ebp),%edx |
336 | mov %edx, 12(%esp) |
337 | add %eax,%esp |
338 | pop %edx |
339 | pop %ecx |
340 | leave |
341 | |
342 | sub %eax, %esp |
343 | .cfi_def_cfa_register %esp |
344 | .cfi_adjust_cfa_offset -4 |
345 | ret |
346 | .cfi_endproc |
347 | " |
348 | ), |
349 | options(att_syntax) |
350 | ); |
351 | |