probestack.rs source code [crates/compiler_builtins/src/probestack.rs]

1	// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2	// file at the top-level directory of this distribution and at
3	// http://rust-lang.org/COPYRIGHT.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10
11	//! This module defines the `__rust_probestack` intrinsic which is used in the
12	//! implementation of "stack probes" on certain platforms.
13	//!
14	//! The purpose of a stack probe is to provide a static guarantee that if a
15	//! thread has a guard page then a stack overflow is guaranteed to hit that
16	//! guard page. If a function did not have a stack probe then there's a risk of
17	//! having a stack frame larger* than the guard page, so a function call could*
18	//! skip over the guard page entirely and then later hit maybe the heap or
19	//! another thread, possibly leading to security vulnerabilities such as [The
20	//! Stack Clash], for example.
21	//!
22	//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
23	//!
24	//! The `__rust_probestack` is called in the prologue of functions whose stack
25	//! size is larger than the guard page, for example larger than 4096 bytes on
26	//! x86. This function is then responsible for "touching" all pages relevant to
27	//! the stack to ensure that that if any of them are the guard page we'll hit
28	//! them guaranteed.
29	//!
30	//! The precise ABI for how this function operates is defined by LLVM. There's
31	//! no real documentation as to what this is, so you'd basically need to read
32	//! the LLVM source code for reference. Often though the test cases can be
33	//! illuminating as to the ABI that's generated, or just looking at the output
34	//! of `llc`.
35	//!
36	//! Note that `#[naked]` is typically used here for the stack probe because the
37	//! ABI corresponds to no actual ABI.
38	//!
39	//! Finally it's worth noting that at the time of this writing LLVM only has
40	//! support for stack probes on x86 and x86_64. There's no support for stack
41	//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
42	//! be more than welcome to accept such a change!
43
44	#![cfg(not(feature = "mangled-names"))]
45	// Windows already has builtins to do this.
46	#![cfg(not(windows))]
47	// All these builtins require assembly
48	#![cfg(not(feature = "no-asm"))]
49	// We only define stack probing for these architectures today.
50	#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
51
52	extern "C" {
53	pub fn __rust_probestack();
54	}
55
56	// A wrapper for our implementation of __rust_probestack, which allows us to
57	// keep the assembly inline while controlling all CFI directives in the assembly
58	// emitted for the function.
59	//
60	// This is the ELF version.
61	#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
62	macro_rules! define_rust_probestack {
63	($body: expr) => {
64	concat!(
65	"
66	.pushsection .text.__rust_probestack
67	.globl __rust_probestack
68	.type __rust_probestack, @function
69	.hidden __rust_probestack
70	__rust_probestack:
71	",
72	$body,
73	"
74	.size __rust_probestack, . - __rust_probestack
75	.popsection
76	"
77	)
78	};
79	}
80
81	#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
82	macro_rules! define_rust_probestack {
83	($body: expr) => {
84	concat!(
85	"
86	.globl __rust_probestack
87	__rust_probestack:
88	",
89	$body
90	)
91	};
92	}
93
94	// Same as above, but for Mach-O. Note that the triple underscore
95	// is deliberate
96	#[cfg(target_vendor = "apple")]
97	macro_rules! define_rust_probestack {
98	($body: expr) => {
99	concat!(
100	"
101	.globl ___rust_probestack
102	___rust_probestack:
103	",
104	$body
105	)
106	};
107	}
108
109	// In UEFI x86 arch, triple underscore is deliberate.
110	#[cfg(all(target_os = "uefi", target_arch = "x86"))]
111	macro_rules! define_rust_probestack {
112	($body: expr) => {
113	concat!(
114	"
115	.globl ___rust_probestack
116	___rust_probestack:
117	",
118	$body
119	)
120	};
121	}
122
123	// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
124	// ensuring that if any pages are unmapped we'll make a page fault.
125	//
126	// The ABI here is that the stack frame size is located in `%rax`. Upon
127	// return we're not supposed to modify `%rsp` or `%rax`.
128	//
129	// Any changes to this function should be replicated to the SGX version below.
130	#[cfg(all(
131	target_arch = "x86_64",
132	not(all(target_env = "sgx", target_vendor = "fortanix"))
133	))]
134	core::arch::global_asm!(
135	define_rust_probestack!(
136	"
137	.cfi_startproc
138	pushq %rbp
139	.cfi_adjust_cfa_offset 8
140	.cfi_offset %rbp, -16
141	movq %rsp, %rbp
142	.cfi_def_cfa_register %rbp
143
144	mov %rax,%r11 // duplicate %rax as we're clobbering %r11
145
146	// Main loop, taken in one page increments. We're decrementing rsp by
147	// a page each time until there's less than a page remaining. We're
148	// guaranteed that this function isn't called unless there's more than a
149	// page needed.
150	//
151	// Note that we're also testing against `8(%rsp)` to account for the 8
152	// bytes pushed on the stack orginally with our return address. Using
153	// `8(%rsp)` simulates us testing the stack pointer in the caller's
154	// context.
155
156	// It's usually called when %rax >= 0x1000, but that's not always true.
157	// Dynamic stack allocation, which is needed to implement unsized
158	// rvalues, triggers stackprobe even if %rax < 0x1000.
159	// Thus we have to check %r11 first to avoid segfault.
160	cmp $0x1000,%r11
161	jna 3f
162	2:
163	sub $0x1000,%rsp
164	test %rsp,8(%rsp)
165	sub $0x1000,%r11
166	cmp $0x1000,%r11
167	ja 2b
168
169	3:
170	// Finish up the last remaining stack space requested, getting the last
171	// bits out of r11
172	sub %r11,%rsp
173	test %rsp,8(%rsp)
174
175	// Restore the stack pointer to what it previously was when entering
176	// this function. The caller will readjust the stack pointer after we
177	// return.
178	add %rax,%rsp
179
180	leave
181	.cfi_def_cfa_register %rsp
182	.cfi_adjust_cfa_offset -8
183	ret
184	.cfi_endproc
185	"
186	),
187	options(att_syntax)
188	);
189
190	// This function is the same as above, except that some instructions are
191	// [manually patched for LVI].
192	//
193	// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
194	#[cfg(all(
195	target_arch = "x86_64",
196	all(target_env = "sgx", target_vendor = "fortanix")
197	))]
198	core::arch::global_asm!(
199	define_rust_probestack!(
200	"
201	.cfi_startproc
202	pushq %rbp
203	.cfi_adjust_cfa_offset 8
204	.cfi_offset %rbp, -16
205	movq %rsp, %rbp
206	.cfi_def_cfa_register %rbp
207
208	mov %rax,%r11 // duplicate %rax as we're clobbering %r11
209
210	// Main loop, taken in one page increments. We're decrementing rsp by
211	// a page each time until there's less than a page remaining. We're
212	// guaranteed that this function isn't called unless there's more than a
213	// page needed.
214	//
215	// Note that we're also testing against `8(%rsp)` to account for the 8
216	// bytes pushed on the stack orginally with our return address. Using
217	// `8(%rsp)` simulates us testing the stack pointer in the caller's
218	// context.
219
220	// It's usually called when %rax >= 0x1000, but that's not always true.
221	// Dynamic stack allocation, which is needed to implement unsized
222	// rvalues, triggers stackprobe even if %rax < 0x1000.
223	// Thus we have to check %r11 first to avoid segfault.
224	cmp $0x1000,%r11
225	jna 3f
226	2:
227	sub $0x1000,%rsp
228	test %rsp,8(%rsp)
229	sub $0x1000,%r11
230	cmp $0x1000,%r11
231	ja 2b
232
233	3:
234	// Finish up the last remaining stack space requested, getting the last
235	// bits out of r11
236	sub %r11,%rsp
237	test %rsp,8(%rsp)
238
239	// Restore the stack pointer to what it previously was when entering
240	// this function. The caller will readjust the stack pointer after we
241	// return.
242	add %rax,%rsp
243
244	leave
245	.cfi_def_cfa_register %rsp
246	.cfi_adjust_cfa_offset -8
247	pop %r11
248	lfence
249	jmp *%r11
250	.cfi_endproc
251	"
252	),
253	options(att_syntax)
254	);
255
256	#[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
257	// This is the same as x86_64 above, only translated for 32-bit sizes. Note
258	// that on Unix we're expected to restore everything as it was, this
259	// function basically can't tamper with anything.
260	//
261	// The ABI here is the same as x86_64, except everything is 32-bits large.
262	core::arch::global_asm!(
263	define_rust_probestack!(
264	"
265	.cfi_startproc
266	push %ebp
267	.cfi_adjust_cfa_offset 4
268	.cfi_offset %ebp, -8
269	mov %esp, %ebp
270	.cfi_def_cfa_register %ebp
271	push %ecx
272	mov %eax,%ecx
273
274	cmp $0x1000,%ecx
275	jna 3f
276	2:
277	sub $0x1000,%esp
278	test %esp,8(%esp)
279	sub $0x1000,%ecx
280	cmp $0x1000,%ecx
281	ja 2b
282
283	3:
284	sub %ecx,%esp
285	test %esp,8(%esp)
286
287	add %eax,%esp
288	pop %ecx
289	leave
290	.cfi_def_cfa_register %esp
291	.cfi_adjust_cfa_offset -4
292	ret
293	.cfi_endproc
294	"
295	),
296	options(att_syntax)
297	);
298
299	#[cfg(all(target_arch = "x86", target_os = "uefi"))]
300	// UEFI target is windows like target. LLVM will do _chkstk things like windows.
301	// probestack function will also do things like _chkstk in MSVC.
302	// So we need to sub %ax %sp in probestack when arch is x86.
303	//
304	// REF: Rust commit(74e80468347)
305	// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
306	// Comments in LLVM:
307	// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
308	// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
309	// themselves.
310	core::arch::global_asm!(
311	define_rust_probestack!(
312	"
313	.cfi_startproc
314	push %ebp
315	.cfi_adjust_cfa_offset 4
316	.cfi_offset %ebp, -8
317	mov %esp, %ebp
318	.cfi_def_cfa_register %ebp
319	push %ecx
320	push %edx
321	mov %eax,%ecx
322
323	cmp $0x1000,%ecx
324	jna 3f
325	2:
326	sub $0x1000,%esp
327	test %esp,8(%esp)
328	sub $0x1000,%ecx
329	cmp $0x1000,%ecx
330	ja 2b
331
332	3:
333	sub %ecx,%esp
334	test %esp,8(%esp)
335	mov 4(%ebp),%edx
336	mov %edx, 12(%esp)
337	add %eax,%esp
338	pop %edx
339	pop %ecx
340	leave
341
342	sub %eax, %esp
343	.cfi_def_cfa_register %esp
344	.cfi_adjust_cfa_offset -4
345	ret
346	.cfi_endproc
347	"
348	),
349	options(att_syntax)
350	);
351