| 1 | // Copyright 2017 The Rust Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution and at |
| 3 | // http://rust-lang.org/COPYRIGHT. |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | // option. This file may not be copied, modified, or distributed |
| 9 | // except according to those terms. |
| 10 | |
| 11 | //! This module defines the `__rust_probestack` intrinsic which is used in the |
| 12 | //! implementation of "stack probes" on certain platforms. |
| 13 | //! |
| 14 | //! The purpose of a stack probe is to provide a static guarantee that if a |
| 15 | //! thread has a guard page then a stack overflow is guaranteed to hit that |
| 16 | //! guard page. If a function did not have a stack probe then there's a risk of |
| 17 | //! having a stack frame *larger* than the guard page, so a function call could |
| 18 | //! skip over the guard page entirely and then later hit maybe the heap or |
| 19 | //! another thread, possibly leading to security vulnerabilities such as [The |
| 20 | //! Stack Clash], for example. |
| 21 | //! |
| 22 | //! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash |
| 23 | //! |
| 24 | //! The `__rust_probestack` is called in the prologue of functions whose stack |
| 25 | //! size is larger than the guard page, for example larger than 4096 bytes on |
| 26 | //! x86. This function is then responsible for "touching" all pages relevant to |
| 27 | //! the stack to ensure that that if any of them are the guard page we'll hit |
| 28 | //! them guaranteed. |
| 29 | //! |
| 30 | //! The precise ABI for how this function operates is defined by LLVM. There's |
| 31 | //! no real documentation as to what this is, so you'd basically need to read |
| 32 | //! the LLVM source code for reference. Often though the test cases can be |
| 33 | //! illuminating as to the ABI that's generated, or just looking at the output |
| 34 | //! of `llc`. |
| 35 | //! |
| 36 | //! Note that `#[naked]` is typically used here for the stack probe because the |
| 37 | //! ABI corresponds to no actual ABI. |
| 38 | //! |
| 39 | //! Finally it's worth noting that at the time of this writing LLVM only has |
| 40 | //! support for stack probes on x86 and x86_64. There's no support for stack |
| 41 | //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would |
| 42 | //! be more than welcome to accept such a change! |
| 43 | |
| 44 | #![cfg (not(feature = "mangled-names" ))] |
| 45 | // Windows and Cygwin already has builtins to do this. |
| 46 | #![cfg (not(any(windows, target_os = "cygwin" )))] |
| 47 | // All these builtins require assembly |
| 48 | #![cfg (not(feature = "no-asm" ))] |
| 49 | // We only define stack probing for these architectures today. |
| 50 | #![cfg (any(target_arch = "x86_64" , target_arch = "x86" ))] |
| 51 | |
| 52 | unsafeextern "C" { |
| 53 | pub unsafefn __rust_probestack(); |
| 54 | } |
| 55 | |
| 56 | // A wrapper for our implementation of __rust_probestack, which allows us to |
| 57 | // keep the assembly inline while controlling all CFI directives in the assembly |
| 58 | // emitted for the function. |
| 59 | // |
| 60 | // This is the ELF version. |
| 61 | #[cfg (not(any(target_vendor = "apple" , target_os = "uefi" )))] |
| 62 | macro_rules! define_rust_probestack { |
| 63 | ($body: expr) => { |
| 64 | concat!( |
| 65 | " |
| 66 | .pushsection .text.__rust_probestack |
| 67 | .globl __rust_probestack |
| 68 | .type __rust_probestack, @function |
| 69 | .hidden __rust_probestack |
| 70 | __rust_probestack: |
| 71 | " , |
| 72 | $body, |
| 73 | " |
| 74 | .size __rust_probestack, . - __rust_probestack |
| 75 | .popsection |
| 76 | " |
| 77 | ) |
| 78 | }; |
| 79 | } |
| 80 | |
| 81 | #[cfg (all(target_os = "uefi" , target_arch = "x86_64" ))] |
| 82 | macro_rules! define_rust_probestack { |
| 83 | ($body: expr) => { |
| 84 | concat!( |
| 85 | " |
| 86 | .globl __rust_probestack |
| 87 | __rust_probestack: |
| 88 | " , |
| 89 | $body |
| 90 | ) |
| 91 | }; |
| 92 | } |
| 93 | |
| 94 | // Same as above, but for Mach-O. Note that the triple underscore |
| 95 | // is deliberate |
| 96 | #[cfg (target_vendor = "apple" )] |
| 97 | macro_rules! define_rust_probestack { |
| 98 | ($body: expr) => { |
| 99 | concat!( |
| 100 | " |
| 101 | .globl ___rust_probestack |
| 102 | ___rust_probestack: |
| 103 | " , |
| 104 | $body |
| 105 | ) |
| 106 | }; |
| 107 | } |
| 108 | |
| 109 | // In UEFI x86 arch, triple underscore is deliberate. |
| 110 | #[cfg (all(target_os = "uefi" , target_arch = "x86" ))] |
| 111 | macro_rules! define_rust_probestack { |
| 112 | ($body: expr) => { |
| 113 | concat!( |
| 114 | " |
| 115 | .globl ___rust_probestack |
| 116 | ___rust_probestack: |
| 117 | " , |
| 118 | $body |
| 119 | ) |
| 120 | }; |
| 121 | } |
| 122 | |
| 123 | // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, |
| 124 | // ensuring that if any pages are unmapped we'll make a page fault. |
| 125 | // |
| 126 | // The ABI here is that the stack frame size is located in `%rax`. Upon |
| 127 | // return we're not supposed to modify `%rsp` or `%rax`. |
| 128 | // |
| 129 | // Any changes to this function should be replicated to the SGX version below. |
| 130 | #[cfg (all( |
| 131 | target_arch = "x86_64" , |
| 132 | not(all(target_env = "sgx" , target_vendor = "fortanix" )) |
| 133 | ))] |
| 134 | core::arch::global_asm!( |
| 135 | define_rust_probestack!( |
| 136 | " |
| 137 | .cfi_startproc |
| 138 | pushq %rbp |
| 139 | .cfi_adjust_cfa_offset 8 |
| 140 | .cfi_offset %rbp, -16 |
| 141 | movq %rsp, %rbp |
| 142 | .cfi_def_cfa_register %rbp |
| 143 | |
| 144 | mov %rax,%r11 // duplicate %rax as we're clobbering %r11 |
| 145 | |
| 146 | // Main loop, taken in one page increments. We're decrementing rsp by |
| 147 | // a page each time until there's less than a page remaining. We're |
| 148 | // guaranteed that this function isn't called unless there's more than a |
| 149 | // page needed. |
| 150 | // |
| 151 | // Note that we're also testing against `8(%rsp)` to account for the 8 |
| 152 | // bytes pushed on the stack orginally with our return address. Using |
| 153 | // `8(%rsp)` simulates us testing the stack pointer in the caller's |
| 154 | // context. |
| 155 | |
| 156 | // It's usually called when %rax >= 0x1000, but that's not always true. |
| 157 | // Dynamic stack allocation, which is needed to implement unsized |
| 158 | // rvalues, triggers stackprobe even if %rax < 0x1000. |
| 159 | // Thus we have to check %r11 first to avoid segfault. |
| 160 | cmp $0x1000,%r11 |
| 161 | jna 3f |
| 162 | 2: |
| 163 | sub $0x1000,%rsp |
| 164 | test %rsp,8(%rsp) |
| 165 | sub $0x1000,%r11 |
| 166 | cmp $0x1000,%r11 |
| 167 | ja 2b |
| 168 | |
| 169 | 3: |
| 170 | // Finish up the last remaining stack space requested, getting the last |
| 171 | // bits out of r11 |
| 172 | sub %r11,%rsp |
| 173 | test %rsp,8(%rsp) |
| 174 | |
| 175 | // Restore the stack pointer to what it previously was when entering |
| 176 | // this function. The caller will readjust the stack pointer after we |
| 177 | // return. |
| 178 | add %rax,%rsp |
| 179 | |
| 180 | leave |
| 181 | .cfi_def_cfa_register %rsp |
| 182 | .cfi_adjust_cfa_offset -8 |
| 183 | ret |
| 184 | .cfi_endproc |
| 185 | " |
| 186 | ), |
| 187 | options(att_syntax) |
| 188 | ); |
| 189 | |
| 190 | // This function is the same as above, except that some instructions are |
| 191 | // [manually patched for LVI]. |
| 192 | // |
| 193 | // [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions |
| 194 | #[cfg (all( |
| 195 | target_arch = "x86_64" , |
| 196 | all(target_env = "sgx" , target_vendor = "fortanix" ) |
| 197 | ))] |
| 198 | core::arch::global_asm!( |
| 199 | define_rust_probestack!( |
| 200 | " |
| 201 | .cfi_startproc |
| 202 | pushq %rbp |
| 203 | .cfi_adjust_cfa_offset 8 |
| 204 | .cfi_offset %rbp, -16 |
| 205 | movq %rsp, %rbp |
| 206 | .cfi_def_cfa_register %rbp |
| 207 | |
| 208 | mov %rax,%r11 // duplicate %rax as we're clobbering %r11 |
| 209 | |
| 210 | // Main loop, taken in one page increments. We're decrementing rsp by |
| 211 | // a page each time until there's less than a page remaining. We're |
| 212 | // guaranteed that this function isn't called unless there's more than a |
| 213 | // page needed. |
| 214 | // |
| 215 | // Note that we're also testing against `8(%rsp)` to account for the 8 |
| 216 | // bytes pushed on the stack orginally with our return address. Using |
| 217 | // `8(%rsp)` simulates us testing the stack pointer in the caller's |
| 218 | // context. |
| 219 | |
| 220 | // It's usually called when %rax >= 0x1000, but that's not always true. |
| 221 | // Dynamic stack allocation, which is needed to implement unsized |
| 222 | // rvalues, triggers stackprobe even if %rax < 0x1000. |
| 223 | // Thus we have to check %r11 first to avoid segfault. |
| 224 | cmp $0x1000,%r11 |
| 225 | jna 3f |
| 226 | 2: |
| 227 | sub $0x1000,%rsp |
| 228 | test %rsp,8(%rsp) |
| 229 | sub $0x1000,%r11 |
| 230 | cmp $0x1000,%r11 |
| 231 | ja 2b |
| 232 | |
| 233 | 3: |
| 234 | // Finish up the last remaining stack space requested, getting the last |
| 235 | // bits out of r11 |
| 236 | sub %r11,%rsp |
| 237 | test %rsp,8(%rsp) |
| 238 | |
| 239 | // Restore the stack pointer to what it previously was when entering |
| 240 | // this function. The caller will readjust the stack pointer after we |
| 241 | // return. |
| 242 | add %rax,%rsp |
| 243 | |
| 244 | leave |
| 245 | .cfi_def_cfa_register %rsp |
| 246 | .cfi_adjust_cfa_offset -8 |
| 247 | pop %r11 |
| 248 | lfence |
| 249 | jmp *%r11 |
| 250 | .cfi_endproc |
| 251 | " |
| 252 | ), |
| 253 | options(att_syntax) |
| 254 | ); |
| 255 | |
| 256 | #[cfg (all(target_arch = "x86" , not(target_os = "uefi" )))] |
| 257 | // This is the same as x86_64 above, only translated for 32-bit sizes. Note |
| 258 | // that on Unix we're expected to restore everything as it was, this |
| 259 | // function basically can't tamper with anything. |
| 260 | // |
| 261 | // The ABI here is the same as x86_64, except everything is 32-bits large. |
| 262 | core::arch::global_asm!( |
| 263 | define_rust_probestack!( |
| 264 | " |
| 265 | .cfi_startproc |
| 266 | push %ebp |
| 267 | .cfi_adjust_cfa_offset 4 |
| 268 | .cfi_offset %ebp, -8 |
| 269 | mov %esp, %ebp |
| 270 | .cfi_def_cfa_register %ebp |
| 271 | push %ecx |
| 272 | mov %eax,%ecx |
| 273 | |
| 274 | cmp $0x1000,%ecx |
| 275 | jna 3f |
| 276 | 2: |
| 277 | sub $0x1000,%esp |
| 278 | test %esp,8(%esp) |
| 279 | sub $0x1000,%ecx |
| 280 | cmp $0x1000,%ecx |
| 281 | ja 2b |
| 282 | |
| 283 | 3: |
| 284 | sub %ecx,%esp |
| 285 | test %esp,8(%esp) |
| 286 | |
| 287 | add %eax,%esp |
| 288 | pop %ecx |
| 289 | leave |
| 290 | .cfi_def_cfa_register %esp |
| 291 | .cfi_adjust_cfa_offset -4 |
| 292 | ret |
| 293 | .cfi_endproc |
| 294 | " |
| 295 | ), |
| 296 | options(att_syntax) |
| 297 | ); |
| 298 | |
| 299 | #[cfg (all(target_arch = "x86" , target_os = "uefi" ))] |
| 300 | // UEFI target is windows like target. LLVM will do _chkstk things like windows. |
| 301 | // probestack function will also do things like _chkstk in MSVC. |
| 302 | // So we need to sub %ax %sp in probestack when arch is x86. |
| 303 | // |
| 304 | // REF: Rust commit(74e80468347) |
| 305 | // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 |
| 306 | // Comments in LLVM: |
| 307 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. |
| 308 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp |
| 309 | // themselves. |
| 310 | core::arch::global_asm!( |
| 311 | define_rust_probestack!( |
| 312 | " |
| 313 | .cfi_startproc |
| 314 | push %ebp |
| 315 | .cfi_adjust_cfa_offset 4 |
| 316 | .cfi_offset %ebp, -8 |
| 317 | mov %esp, %ebp |
| 318 | .cfi_def_cfa_register %ebp |
| 319 | push %ecx |
| 320 | push %edx |
| 321 | mov %eax,%ecx |
| 322 | |
| 323 | cmp $0x1000,%ecx |
| 324 | jna 3f |
| 325 | 2: |
| 326 | sub $0x1000,%esp |
| 327 | test %esp,8(%esp) |
| 328 | sub $0x1000,%ecx |
| 329 | cmp $0x1000,%ecx |
| 330 | ja 2b |
| 331 | |
| 332 | 3: |
| 333 | sub %ecx,%esp |
| 334 | test %esp,8(%esp) |
| 335 | mov 4(%ebp),%edx |
| 336 | mov %edx, 12(%esp) |
| 337 | add %eax,%esp |
| 338 | pop %edx |
| 339 | pop %ecx |
| 340 | leave |
| 341 | |
| 342 | sub %eax, %esp |
| 343 | .cfi_def_cfa_register %esp |
| 344 | .cfi_adjust_cfa_offset -4 |
| 345 | ret |
| 346 | .cfi_endproc |
| 347 | " |
| 348 | ), |
| 349 | options(att_syntax) |
| 350 | ); |
| 351 | |