1// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! This module defines the `__rust_probestack` intrinsic which is used in the
12//! implementation of "stack probes" on certain platforms.
13//!
14//! The purpose of a stack probe is to provide a static guarantee that if a
15//! thread has a guard page then a stack overflow is guaranteed to hit that
16//! guard page. If a function did not have a stack probe then there's a risk of
17//! having a stack frame *larger* than the guard page, so a function call could
18//! skip over the guard page entirely and then later hit maybe the heap or
19//! another thread, possibly leading to security vulnerabilities such as [The
20//! Stack Clash], for example.
21//!
22//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
23//!
24//! The `__rust_probestack` is called in the prologue of functions whose stack
25//! size is larger than the guard page, for example larger than 4096 bytes on
26//! x86. This function is then responsible for "touching" all pages relevant to
27//! the stack to ensure that that if any of them are the guard page we'll hit
28//! them guaranteed.
29//!
30//! The precise ABI for how this function operates is defined by LLVM. There's
31//! no real documentation as to what this is, so you'd basically need to read
32//! the LLVM source code for reference. Often though the test cases can be
33//! illuminating as to the ABI that's generated, or just looking at the output
34//! of `llc`.
35//!
36//! Note that `#[naked]` is typically used here for the stack probe because the
37//! ABI corresponds to no actual ABI.
38//!
39//! Finally it's worth noting that at the time of this writing LLVM only has
40//! support for stack probes on x86 and x86_64. There's no support for stack
41//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
42//! be more than welcome to accept such a change!
43
44#![cfg(not(feature = "mangled-names"))]
45// Windows already has builtins to do this.
46#![cfg(not(windows))]
47// All these builtins require assembly
48#![cfg(not(feature = "no-asm"))]
49// We only define stack probing for these architectures today.
50#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
51
52extern "C" {
53 pub fn __rust_probestack();
54}
55
56// A wrapper for our implementation of __rust_probestack, which allows us to
57// keep the assembly inline while controlling all CFI directives in the assembly
58// emitted for the function.
59//
60// This is the ELF version.
61#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
62macro_rules! define_rust_probestack {
63 ($body: expr) => {
64 concat!(
65 "
66 .pushsection .text.__rust_probestack
67 .globl __rust_probestack
68 .type __rust_probestack, @function
69 .hidden __rust_probestack
70 __rust_probestack:
71 ",
72 $body,
73 "
74 .size __rust_probestack, . - __rust_probestack
75 .popsection
76 "
77 )
78 };
79}
80
81#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
82macro_rules! define_rust_probestack {
83 ($body: expr) => {
84 concat!(
85 "
86 .globl __rust_probestack
87 __rust_probestack:
88 ",
89 $body
90 )
91 };
92}
93
94// Same as above, but for Mach-O. Note that the triple underscore
95// is deliberate
96#[cfg(target_vendor = "apple")]
97macro_rules! define_rust_probestack {
98 ($body: expr) => {
99 concat!(
100 "
101 .globl ___rust_probestack
102 ___rust_probestack:
103 ",
104 $body
105 )
106 };
107}
108
109// In UEFI x86 arch, triple underscore is deliberate.
110#[cfg(all(target_os = "uefi", target_arch = "x86"))]
111macro_rules! define_rust_probestack {
112 ($body: expr) => {
113 concat!(
114 "
115 .globl ___rust_probestack
116 ___rust_probestack:
117 ",
118 $body
119 )
120 };
121}
122
123// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
124// ensuring that if any pages are unmapped we'll make a page fault.
125//
126// The ABI here is that the stack frame size is located in `%rax`. Upon
127// return we're not supposed to modify `%rsp` or `%rax`.
128//
129// Any changes to this function should be replicated to the SGX version below.
130#[cfg(all(
131 target_arch = "x86_64",
132 not(all(target_env = "sgx", target_vendor = "fortanix"))
133))]
134core::arch::global_asm!(
135 define_rust_probestack!(
136 "
137 .cfi_startproc
138 pushq %rbp
139 .cfi_adjust_cfa_offset 8
140 .cfi_offset %rbp, -16
141 movq %rsp, %rbp
142 .cfi_def_cfa_register %rbp
143
144 mov %rax,%r11 // duplicate %rax as we're clobbering %r11
145
146 // Main loop, taken in one page increments. We're decrementing rsp by
147 // a page each time until there's less than a page remaining. We're
148 // guaranteed that this function isn't called unless there's more than a
149 // page needed.
150 //
151 // Note that we're also testing against `8(%rsp)` to account for the 8
152 // bytes pushed on the stack orginally with our return address. Using
153 // `8(%rsp)` simulates us testing the stack pointer in the caller's
154 // context.
155
156 // It's usually called when %rax >= 0x1000, but that's not always true.
157 // Dynamic stack allocation, which is needed to implement unsized
158 // rvalues, triggers stackprobe even if %rax < 0x1000.
159 // Thus we have to check %r11 first to avoid segfault.
160 cmp $0x1000,%r11
161 jna 3f
1622:
163 sub $0x1000,%rsp
164 test %rsp,8(%rsp)
165 sub $0x1000,%r11
166 cmp $0x1000,%r11
167 ja 2b
168
1693:
170 // Finish up the last remaining stack space requested, getting the last
171 // bits out of r11
172 sub %r11,%rsp
173 test %rsp,8(%rsp)
174
175 // Restore the stack pointer to what it previously was when entering
176 // this function. The caller will readjust the stack pointer after we
177 // return.
178 add %rax,%rsp
179
180 leave
181 .cfi_def_cfa_register %rsp
182 .cfi_adjust_cfa_offset -8
183 ret
184 .cfi_endproc
185 "
186 ),
187 options(att_syntax)
188);
189
190// This function is the same as above, except that some instructions are
191// [manually patched for LVI].
192//
193// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
194#[cfg(all(
195 target_arch = "x86_64",
196 all(target_env = "sgx", target_vendor = "fortanix")
197))]
198core::arch::global_asm!(
199 define_rust_probestack!(
200 "
201 .cfi_startproc
202 pushq %rbp
203 .cfi_adjust_cfa_offset 8
204 .cfi_offset %rbp, -16
205 movq %rsp, %rbp
206 .cfi_def_cfa_register %rbp
207
208 mov %rax,%r11 // duplicate %rax as we're clobbering %r11
209
210 // Main loop, taken in one page increments. We're decrementing rsp by
211 // a page each time until there's less than a page remaining. We're
212 // guaranteed that this function isn't called unless there's more than a
213 // page needed.
214 //
215 // Note that we're also testing against `8(%rsp)` to account for the 8
216 // bytes pushed on the stack orginally with our return address. Using
217 // `8(%rsp)` simulates us testing the stack pointer in the caller's
218 // context.
219
220 // It's usually called when %rax >= 0x1000, but that's not always true.
221 // Dynamic stack allocation, which is needed to implement unsized
222 // rvalues, triggers stackprobe even if %rax < 0x1000.
223 // Thus we have to check %r11 first to avoid segfault.
224 cmp $0x1000,%r11
225 jna 3f
2262:
227 sub $0x1000,%rsp
228 test %rsp,8(%rsp)
229 sub $0x1000,%r11
230 cmp $0x1000,%r11
231 ja 2b
232
2333:
234 // Finish up the last remaining stack space requested, getting the last
235 // bits out of r11
236 sub %r11,%rsp
237 test %rsp,8(%rsp)
238
239 // Restore the stack pointer to what it previously was when entering
240 // this function. The caller will readjust the stack pointer after we
241 // return.
242 add %rax,%rsp
243
244 leave
245 .cfi_def_cfa_register %rsp
246 .cfi_adjust_cfa_offset -8
247 pop %r11
248 lfence
249 jmp *%r11
250 .cfi_endproc
251 "
252 ),
253 options(att_syntax)
254);
255
256#[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
257// This is the same as x86_64 above, only translated for 32-bit sizes. Note
258// that on Unix we're expected to restore everything as it was, this
259// function basically can't tamper with anything.
260//
261// The ABI here is the same as x86_64, except everything is 32-bits large.
262core::arch::global_asm!(
263 define_rust_probestack!(
264 "
265 .cfi_startproc
266 push %ebp
267 .cfi_adjust_cfa_offset 4
268 .cfi_offset %ebp, -8
269 mov %esp, %ebp
270 .cfi_def_cfa_register %ebp
271 push %ecx
272 mov %eax,%ecx
273
274 cmp $0x1000,%ecx
275 jna 3f
2762:
277 sub $0x1000,%esp
278 test %esp,8(%esp)
279 sub $0x1000,%ecx
280 cmp $0x1000,%ecx
281 ja 2b
282
2833:
284 sub %ecx,%esp
285 test %esp,8(%esp)
286
287 add %eax,%esp
288 pop %ecx
289 leave
290 .cfi_def_cfa_register %esp
291 .cfi_adjust_cfa_offset -4
292 ret
293 .cfi_endproc
294 "
295 ),
296 options(att_syntax)
297);
298
299#[cfg(all(target_arch = "x86", target_os = "uefi"))]
300// UEFI target is windows like target. LLVM will do _chkstk things like windows.
301// probestack function will also do things like _chkstk in MSVC.
302// So we need to sub %ax %sp in probestack when arch is x86.
303//
304// REF: Rust commit(74e80468347)
305// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
306// Comments in LLVM:
307// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
308// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
309// themselves.
310core::arch::global_asm!(
311 define_rust_probestack!(
312 "
313 .cfi_startproc
314 push %ebp
315 .cfi_adjust_cfa_offset 4
316 .cfi_offset %ebp, -8
317 mov %esp, %ebp
318 .cfi_def_cfa_register %ebp
319 push %ecx
320 push %edx
321 mov %eax,%ecx
322
323 cmp $0x1000,%ecx
324 jna 3f
3252:
326 sub $0x1000,%esp
327 test %esp,8(%esp)
328 sub $0x1000,%ecx
329 cmp $0x1000,%ecx
330 ja 2b
331
3323:
333 sub %ecx,%esp
334 test %esp,8(%esp)
335 mov 4(%ebp),%edx
336 mov %edx, 12(%esp)
337 add %eax,%esp
338 pop %edx
339 pop %ecx
340 leave
341
342 sub %eax, %esp
343 .cfi_def_cfa_register %esp
344 .cfi_adjust_cfa_offset -4
345 ret
346 .cfi_endproc
347 "
348 ),
349 options(att_syntax)
350);
351