1 | //! A library to help grow the stack when it runs out of space. |
2 | //! |
3 | //! This is an implementation of manually instrumented segmented stacks where points in a program's |
4 | //! control flow are annotated with "maybe grow the stack here". Each point of annotation indicates |
5 | //! how far away from the end of the stack it's allowed to be, plus the amount of stack to allocate |
6 | //! if it does reach the end. |
7 | //! |
8 | //! Once a program has reached the end of its stack, a temporary stack on the heap is allocated and |
9 | //! is switched to for the duration of a closure. |
10 | //! |
11 | //! For a set of lower-level primitives, consider the `psm` crate. |
12 | //! |
13 | //! # Examples |
14 | //! |
15 | //! ``` |
16 | //! // Grow the stack if we are within the "red zone" of 32K, and if we allocate |
17 | //! // a new stack allocate 1MB of stack space. |
18 | //! // |
19 | //! // If we're already in bounds, just run the provided closure on current stack. |
20 | //! stacker::maybe_grow(32 * 1024, 1024 * 1024, || { |
21 | //! // guaranteed to have at least 32K of stack |
22 | //! }); |
23 | //! ``` |
24 | |
25 | #![allow (improper_ctypes)] |
26 | |
27 | #[macro_use ] |
28 | extern crate cfg_if; |
29 | extern crate libc; |
30 | #[cfg (windows)] |
31 | extern crate winapi; |
32 | #[macro_use ] |
33 | extern crate psm; |
34 | |
35 | use std::cell::Cell; |
36 | |
37 | /// Grows the call stack if necessary. |
38 | /// |
39 | /// This function is intended to be called at manually instrumented points in a program where |
40 | /// recursion is known to happen quite a bit. This function will check to see if we're within |
41 | /// `red_zone` bytes of the end of the stack, and if so it will allocate a new stack of at least |
42 | /// `stack_size` bytes. |
43 | /// |
44 | /// The closure `f` is guaranteed to run on a stack with at least `red_zone` bytes, and it will be |
45 | /// run on the current stack if there's space available. |
46 | #[inline (always)] |
47 | pub fn maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R { |
48 | // if we can't guess the remaining stack (unsupported on some platforms) we immediately grow |
49 | // the stack and then cache the new stack size (which we do know now because we allocated it. |
50 | let enough_space: bool = match remaining_stack() { |
51 | Some(remaining: usize) => remaining >= red_zone, |
52 | None => false, |
53 | }; |
54 | if enough_space { |
55 | callback() |
56 | } else { |
57 | grow(stack_size, callback) |
58 | } |
59 | } |
60 | |
61 | /// Always creates a new stack for the passed closure to run on. |
62 | /// The closure will still be on the same thread as the caller of `grow`. |
63 | /// This will allocate a new stack with at least `stack_size` bytes. |
64 | pub fn grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R { |
65 | // To avoid monomorphizing `_grow()` and everything it calls, |
66 | // we convert the generic callback to a dynamic one. |
67 | let mut opt_callback: Option = Some(callback); |
68 | let mut ret: Option = None; |
69 | let ret_ref: &mut Option = &mut ret; |
70 | |
71 | // This wrapper around `callback` achieves two things: |
72 | // * It converts the `impl FnOnce` to a `dyn FnMut`. |
73 | // `dyn` because we want it to not be generic, and |
74 | // `FnMut` because we can't pass a `dyn FnOnce` around without boxing it. |
75 | // * It eliminates the generic return value, by writing it to the stack of this function. |
76 | // Otherwise the closure would have to return an unsized value, which isn't possible. |
77 | let dyn_callback: &mut dyn FnMut() = &mut || { |
78 | let taken_callback: F = opt_callback.take().unwrap(); |
79 | *ret_ref = Some(taken_callback()); |
80 | }; |
81 | |
82 | _grow(stack_size, dyn_callback); |
83 | ret.unwrap() |
84 | } |
85 | |
86 | /// Queries the amount of remaining stack as interpreted by this library. |
87 | /// |
88 | /// This function will return the amount of stack space left which will be used |
89 | /// to determine whether a stack switch should be made or not. |
90 | pub fn remaining_stack() -> Option<usize> { |
91 | let current_ptr: usize = current_stack_ptr(); |
92 | get_stack_limit().map(|limit: usize| current_ptr - limit) |
93 | } |
94 | |
95 | psm_stack_information! ( |
96 | yes { |
97 | fn current_stack_ptr() -> usize { |
98 | psm::stack_pointer() as usize |
99 | } |
100 | } |
101 | no { |
102 | #[inline(always)] |
103 | fn current_stack_ptr() -> usize { |
104 | unsafe { |
105 | let mut x = std::mem::MaybeUninit::<u8>::uninit(); |
106 | // Unlikely to be ever exercised. As a fallback we execute a volatile read to a |
107 | // local (to hopefully defeat the optimisations that would make this local a static |
108 | // global) and take its address. This way we get a very approximate address of the |
109 | // current frame. |
110 | x.as_mut_ptr().write_volatile(42); |
111 | x.as_ptr() as usize |
112 | } |
113 | } |
114 | } |
115 | ); |
116 | |
117 | thread_local! { |
118 | static STACK_LIMIT: Cell<Option<usize>> = Cell::new(unsafe { |
119 | guess_os_stack_limit() |
120 | }) |
121 | } |
122 | |
123 | #[inline (always)] |
124 | fn get_stack_limit() -> Option<usize> { |
125 | STACK_LIMIT.with(|s: &Cell| s.get()) |
126 | } |
127 | |
128 | #[inline (always)] |
129 | #[allow (unused)] |
130 | fn set_stack_limit(l: Option<usize>) { |
131 | STACK_LIMIT.with(|s: &Cell| s.set(val:l)) |
132 | } |
133 | |
134 | psm_stack_manipulation! { |
135 | yes { |
136 | struct StackRestoreGuard { |
137 | new_stack: *mut std::ffi::c_void, |
138 | stack_bytes: usize, |
139 | old_stack_limit: Option<usize>, |
140 | } |
141 | |
142 | impl StackRestoreGuard { |
143 | #[cfg (target_arch = "wasm32" )] |
144 | unsafe fn new(stack_bytes: usize, _page_size: usize) -> StackRestoreGuard { |
145 | let layout = std::alloc::Layout::from_size_align(stack_bytes, 16).unwrap(); |
146 | let ptr = std::alloc::alloc(layout); |
147 | assert!(!ptr.is_null(), "unable to allocate stack" ); |
148 | StackRestoreGuard { |
149 | new_stack: ptr as *mut _, |
150 | stack_bytes, |
151 | old_stack_limit: get_stack_limit(), |
152 | } |
153 | } |
154 | |
155 | #[cfg (not(target_arch = "wasm32" ))] |
156 | unsafe fn new(stack_bytes: usize, page_size: usize) -> StackRestoreGuard { |
157 | let new_stack = libc::mmap( |
158 | std::ptr::null_mut(), |
159 | stack_bytes, |
160 | libc::PROT_NONE, |
161 | libc::MAP_PRIVATE | |
162 | libc::MAP_ANON, |
163 | -1, // Some implementations assert fd = -1 if MAP_ANON is specified |
164 | 0 |
165 | ); |
166 | if new_stack == libc::MAP_FAILED { |
167 | let error = std::io::Error::last_os_error(); |
168 | panic!("allocating stack failed with: {}" , error) |
169 | } |
170 | let guard = StackRestoreGuard { |
171 | new_stack, |
172 | stack_bytes, |
173 | old_stack_limit: get_stack_limit(), |
174 | }; |
175 | let above_guard_page = new_stack.add(page_size); |
176 | #[cfg (not(target_os = "openbsd" ))] |
177 | let result = libc::mprotect( |
178 | above_guard_page, |
179 | stack_bytes - page_size, |
180 | libc::PROT_READ | libc::PROT_WRITE |
181 | ); |
182 | #[cfg (target_os = "openbsd" )] |
183 | let result = if libc::mmap( |
184 | above_guard_page, |
185 | stack_bytes - page_size, |
186 | libc::PROT_READ | libc::PROT_WRITE, |
187 | libc::MAP_FIXED | libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_STACK, |
188 | -1, |
189 | 0) == above_guard_page { |
190 | 0 |
191 | } else { |
192 | -1 |
193 | }; |
194 | if result == -1 { |
195 | let error = std::io::Error::last_os_error(); |
196 | drop(guard); |
197 | panic!("setting stack permissions failed with: {}" , error) |
198 | } |
199 | guard |
200 | } |
201 | } |
202 | |
203 | impl Drop for StackRestoreGuard { |
204 | fn drop(&mut self) { |
205 | #[cfg (target_arch = "wasm32" )] |
206 | unsafe { |
207 | std::alloc::dealloc( |
208 | self.new_stack as *mut u8, |
209 | std::alloc::Layout::from_size_align_unchecked(self.stack_bytes, 16), |
210 | ); |
211 | } |
212 | #[cfg (not(target_arch = "wasm32" ))] |
213 | unsafe { |
214 | // FIXME: check the error code and decide what to do with it. |
215 | // Perhaps a debug_assertion? |
216 | libc::munmap(self.new_stack, self.stack_bytes); |
217 | } |
218 | set_stack_limit(self.old_stack_limit); |
219 | } |
220 | } |
221 | |
222 | fn _grow(stack_size: usize, callback: &mut dyn FnMut()) { |
223 | // Calculate a number of pages we want to allocate for the new stack. |
224 | // For maximum portability we want to produce a stack that is aligned to a page and has |
225 | // a size that’s a multiple of page size. Furthermore we want to allocate two extras pages |
226 | // for the stack guard. To achieve that we do our calculations in number of pages and |
227 | // convert to bytes last. |
228 | let page_size = page_size(); |
229 | let requested_pages = stack_size |
230 | .checked_add(page_size - 1) |
231 | .expect("unreasonably large stack requested" ) / page_size; |
232 | let stack_pages = std::cmp::max(1, requested_pages) + 2; |
233 | let stack_bytes = stack_pages.checked_mul(page_size) |
234 | .expect("unreasonably large stack requesteed" ); |
235 | |
236 | // Next, there are a couple of approaches to how we allocate the new stack. We take the |
237 | // most obvious path and use `mmap`. We also `mprotect` a guard page into our |
238 | // allocation. |
239 | // |
240 | // We use a guard pattern to ensure we deallocate the allocated stack when we leave |
241 | // this function and also try to uphold various safety invariants required by `psm` |
242 | // (such as not unwinding from the callback we pass to it). |
243 | // |
244 | // Other than that this code has no meaningful gotchas. |
245 | unsafe { |
246 | let guard = StackRestoreGuard::new(stack_bytes, page_size); |
247 | let above_guard_page = guard.new_stack.add(page_size); |
248 | set_stack_limit(Some(above_guard_page as usize)); |
249 | let panic = psm::on_stack(above_guard_page as *mut _, stack_size, move || { |
250 | std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback)).err() |
251 | }); |
252 | drop(guard); |
253 | if let Some(p) = panic { |
254 | std::panic::resume_unwind(p); |
255 | } |
256 | } |
257 | } |
258 | |
259 | fn page_size() -> usize { |
260 | // FIXME: consider caching the page size. |
261 | #[cfg (not(target_arch = "wasm32" ))] |
262 | unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) as usize } |
263 | #[cfg (target_arch = "wasm32" )] |
264 | { 65536 } |
265 | } |
266 | } |
267 | |
268 | no { |
269 | #[cfg(not(windows))] |
270 | fn _grow(stack_size: usize, callback: &mut dyn FnMut()) { |
271 | drop(stack_size); |
272 | callback(); |
273 | } |
274 | } |
275 | } |
276 | |
277 | cfg_if! { |
278 | if #[cfg(windows)] { |
279 | use std::ptr; |
280 | use std::io; |
281 | |
282 | use winapi::shared::basetsd::*; |
283 | use winapi::shared::minwindef::{LPVOID, BOOL}; |
284 | use winapi::shared::ntdef::*; |
285 | use winapi::um::fibersapi::*; |
286 | use winapi::um::memoryapi::*; |
287 | use winapi::um::processthreadsapi::*; |
288 | use winapi::um::winbase::*; |
289 | |
290 | // Make sure the libstacker.a (implemented in C) is linked. |
291 | // See https://github.com/rust-lang/rust/issues/65610 |
292 | #[link (name="stacker" )] |
293 | extern { |
294 | fn __stacker_get_current_fiber() -> PVOID; |
295 | } |
296 | |
297 | struct FiberInfo<F> { |
298 | callback: std::mem::MaybeUninit<F>, |
299 | panic: Option<Box<dyn std::any::Any + Send + 'static>>, |
300 | parent_fiber: LPVOID, |
301 | } |
302 | |
303 | unsafe extern "system" fn fiber_proc<F: FnOnce()>(data: LPVOID) { |
304 | // This function is the entry point to our inner fiber, and as argument we get an |
305 | // instance of `FiberInfo`. We will set-up the "runtime" for the callback and execute |
306 | // it. |
307 | let data = &mut *(data as *mut FiberInfo<F>); |
308 | let old_stack_limit = get_stack_limit(); |
309 | set_stack_limit(guess_os_stack_limit()); |
310 | let callback = data.callback.as_ptr(); |
311 | data.panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback.read())).err(); |
312 | |
313 | // Restore to the previous Fiber |
314 | set_stack_limit(old_stack_limit); |
315 | SwitchToFiber(data.parent_fiber); |
316 | return; |
317 | } |
318 | |
319 | fn _grow(stack_size: usize, callback: &mut dyn FnMut()) { |
320 | // Fibers (or stackful coroutines) is the only official way to create new stacks on the |
321 | // same thread on Windows. So in order to extend the stack we create fiber and switch |
322 | // to it so we can use it's stack. After running `callback` within our fiber, we switch |
323 | // back to the current stack and destroy the fiber and its associated stack. |
324 | unsafe { |
325 | let was_fiber = IsThreadAFiber() == TRUE as BOOL; |
326 | let mut data = FiberInfo { |
327 | callback: std::mem::MaybeUninit::new(callback), |
328 | panic: None, |
329 | parent_fiber: { |
330 | if was_fiber { |
331 | // Get a handle to the current fiber. We need to use a C implementation |
332 | // for this as GetCurrentFiber is an header only function. |
333 | __stacker_get_current_fiber() |
334 | } else { |
335 | // Convert the current thread to a fiber, so we are able to switch back |
336 | // to the current stack. Threads coverted to fibers still act like |
337 | // regular threads, but they have associated fiber data. We later |
338 | // convert it back to a regular thread and free the fiber data. |
339 | ConvertThreadToFiber(ptr::null_mut()) |
340 | } |
341 | }, |
342 | }; |
343 | |
344 | if data.parent_fiber.is_null() { |
345 | panic!("unable to convert thread to fiber: {}" , io::Error::last_os_error()); |
346 | } |
347 | |
348 | let fiber = CreateFiber( |
349 | stack_size as SIZE_T, |
350 | Some(fiber_proc::<&mut dyn FnMut()>), |
351 | &mut data as *mut FiberInfo<&mut dyn FnMut()> as *mut _, |
352 | ); |
353 | if fiber.is_null() { |
354 | panic!("unable to allocate fiber: {}" , io::Error::last_os_error()); |
355 | } |
356 | |
357 | // Switch to the fiber we created. This changes stacks and starts executing |
358 | // fiber_proc on it. fiber_proc will run `callback` and then switch back to run the |
359 | // next statement. |
360 | SwitchToFiber(fiber); |
361 | DeleteFiber(fiber); |
362 | |
363 | // Clean-up. |
364 | if !was_fiber { |
365 | if ConvertFiberToThread() == 0 { |
366 | // FIXME: Perhaps should not panic here? |
367 | panic!("unable to convert back to thread: {}" , io::Error::last_os_error()); |
368 | } |
369 | } |
370 | if let Some(p) = data.panic { |
371 | std::panic::resume_unwind(p); |
372 | } |
373 | } |
374 | } |
375 | |
376 | #[inline (always)] |
377 | fn get_thread_stack_guarantee() -> usize { |
378 | let min_guarantee = if cfg!(target_pointer_width = "32" ) { |
379 | 0x1000 |
380 | } else { |
381 | 0x2000 |
382 | }; |
383 | let mut stack_guarantee = 0; |
384 | unsafe { |
385 | // Read the current thread stack guarantee |
386 | // This is the stack reserved for stack overflow |
387 | // exception handling. |
388 | // This doesn't return the true value so we need |
389 | // some further logic to calculate the real stack |
390 | // guarantee. This logic is what is used on x86-32 and |
391 | // x86-64 Windows 10. Other versions and platforms may differ |
392 | SetThreadStackGuarantee(&mut stack_guarantee) |
393 | }; |
394 | std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000 |
395 | } |
396 | |
397 | #[inline (always)] |
398 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
399 | // Query the allocation which contains our stack pointer in order |
400 | // to discover the size of the stack |
401 | // |
402 | // FIXME: we could read stack base from the TIB, specifically the 3rd element of it. |
403 | type QueryT = winapi::um::winnt::MEMORY_BASIC_INFORMATION; |
404 | let mut mi = std::mem::MaybeUninit::<QueryT>::uninit(); |
405 | VirtualQuery( |
406 | psm::stack_pointer() as *const _, |
407 | mi.as_mut_ptr(), |
408 | std::mem::size_of::<QueryT>() as SIZE_T, |
409 | ); |
410 | Some(mi.assume_init().AllocationBase as usize + get_thread_stack_guarantee() + 0x1000) |
411 | } |
412 | } else if #[cfg(any(target_os = "linux" , target_os="solaris" , target_os = "netbsd" ))] { |
413 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
414 | let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit(); |
415 | assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0); |
416 | assert_eq!(libc::pthread_getattr_np(libc::pthread_self(), |
417 | attr.as_mut_ptr()), 0); |
418 | let mut stackaddr = std::ptr::null_mut(); |
419 | let mut stacksize = 0; |
420 | assert_eq!(libc::pthread_attr_getstack( |
421 | attr.as_ptr(), &mut stackaddr, &mut stacksize |
422 | ), 0); |
423 | assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0); |
424 | Some(stackaddr as usize) |
425 | } |
426 | } else if #[cfg(any(target_os = "freebsd" , target_os = "dragonfly" ))] { |
427 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
428 | let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit(); |
429 | assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0); |
430 | assert_eq!(libc::pthread_attr_get_np(libc::pthread_self(), attr.as_mut_ptr()), 0); |
431 | let mut stackaddr = std::ptr::null_mut(); |
432 | let mut stacksize = 0; |
433 | assert_eq!(libc::pthread_attr_getstack( |
434 | attr.as_ptr(), &mut stackaddr, &mut stacksize |
435 | ), 0); |
436 | assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0); |
437 | Some(stackaddr as usize) |
438 | } |
439 | } else if #[cfg(target_os = "openbsd" )] { |
440 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
441 | let mut stackinfo = std::mem::MaybeUninit::<libc::stack_t>::uninit(); |
442 | assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), stackinfo.as_mut_ptr()), 0); |
443 | Some(stackinfo.assume_init().ss_sp as usize - stackinfo.assume_init().ss_size) |
444 | } |
445 | } else if #[cfg(target_os = "macos" )] { |
446 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
447 | Some(libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize - |
448 | libc::pthread_get_stacksize_np(libc::pthread_self()) as usize) |
449 | } |
450 | } else { |
451 | // fallback for other platforms is to always increase the stack if we're on |
452 | // the root stack. After we increased the stack once, we know the new stack |
453 | // size and don't need this pessimization anymore |
454 | #[inline (always)] |
455 | unsafe fn guess_os_stack_limit() -> Option<usize> { |
456 | None |
457 | } |
458 | } |
459 | } |
460 | |