1 | use crate::cmp; |
2 | use crate::ffi::CStr; |
3 | use crate::io; |
4 | use crate::mem; |
5 | use crate::num::NonZeroUsize; |
6 | use crate::ptr; |
7 | use crate::sys::{os, stack_overflow}; |
8 | use crate::time::Duration; |
9 | |
10 | #[cfg (all(target_os = "linux" , target_env = "gnu" ))] |
11 | use crate::sys::weak::dlsym; |
12 | #[cfg (any(target_os = "solaris" , target_os = "illumos" , target_os = "nto" ))] |
13 | use crate::sys::weak::weak; |
14 | #[cfg (not(any(target_os = "l4re" , target_os = "vxworks" , target_os = "espidf" )))] |
15 | pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024; |
16 | #[cfg (target_os = "l4re" )] |
17 | pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024; |
18 | #[cfg (target_os = "vxworks" )] |
19 | pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024; |
20 | #[cfg (target_os = "espidf" )] |
21 | pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF menuconfig system should be used |
22 | |
23 | #[cfg (target_os = "fuchsia" )] |
24 | mod zircon { |
25 | type zx_handle_t = u32; |
26 | type zx_status_t = i32; |
27 | pub const ZX_PROP_NAME: u32 = 3; |
28 | |
29 | extern "C" { |
30 | pub fn zx_object_set_property( |
31 | handle: zx_handle_t, |
32 | property: u32, |
33 | value: *const libc::c_void, |
34 | value_size: libc::size_t, |
35 | ) -> zx_status_t; |
36 | pub fn zx_thread_self() -> zx_handle_t; |
37 | } |
38 | } |
39 | |
40 | pub struct Thread { |
41 | id: libc::pthread_t, |
42 | } |
43 | |
44 | // Some platforms may have pthread_t as a pointer in which case we still want |
45 | // a thread to be Send/Sync |
46 | unsafe impl Send for Thread {} |
47 | unsafe impl Sync for Thread {} |
48 | |
49 | impl Thread { |
50 | // unsafe: see thread::Builder::spawn_unchecked for safety requirements |
51 | pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> { |
52 | let p = Box::into_raw(Box::new(p)); |
53 | let mut native: libc::pthread_t = mem::zeroed(); |
54 | let mut attr: libc::pthread_attr_t = mem::zeroed(); |
55 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
56 | |
57 | #[cfg (target_os = "espidf" )] |
58 | if stack > 0 { |
59 | // Only set the stack if a non-zero value is passed |
60 | // 0 is used as an indication that the default stack size configured in the ESP-IDF menuconfig system should be used |
61 | assert_eq!( |
62 | libc::pthread_attr_setstacksize(&mut attr, cmp::max(stack, min_stack_size(&attr))), |
63 | 0 |
64 | ); |
65 | } |
66 | |
67 | #[cfg (not(target_os = "espidf" ))] |
68 | { |
69 | let stack_size = cmp::max(stack, min_stack_size(&attr)); |
70 | |
71 | match libc::pthread_attr_setstacksize(&mut attr, stack_size) { |
72 | 0 => {} |
73 | n => { |
74 | assert_eq!(n, libc::EINVAL); |
75 | // EINVAL means |stack_size| is either too small or not a |
76 | // multiple of the system page size. Because it's definitely |
77 | // >= PTHREAD_STACK_MIN, it must be an alignment issue. |
78 | // Round up to the nearest page and try again. |
79 | let page_size = os::page_size(); |
80 | let stack_size = |
81 | (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1); |
82 | assert_eq!(libc::pthread_attr_setstacksize(&mut attr, stack_size), 0); |
83 | } |
84 | }; |
85 | } |
86 | |
87 | let ret = libc::pthread_create(&mut native, &attr, thread_start, p as *mut _); |
88 | // Note: if the thread creation fails and this assert fails, then p will |
89 | // be leaked. However, an alternative design could cause double-free |
90 | // which is clearly worse. |
91 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); |
92 | |
93 | return if ret != 0 { |
94 | // The thread failed to start and as a result p was not consumed. Therefore, it is |
95 | // safe to reconstruct the box so that it gets deallocated. |
96 | drop(Box::from_raw(p)); |
97 | Err(io::Error::from_raw_os_error(ret)) |
98 | } else { |
99 | Ok(Thread { id: native }) |
100 | }; |
101 | |
102 | extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { |
103 | unsafe { |
104 | // Next, set up our stack overflow handler which may get triggered if we run |
105 | // out of stack. |
106 | let _handler = stack_overflow::Handler::new(); |
107 | // Finally, let's run some code. |
108 | Box::from_raw(main as *mut Box<dyn FnOnce()>)(); |
109 | } |
110 | ptr::null_mut() |
111 | } |
112 | } |
113 | |
114 | pub fn yield_now() { |
115 | let ret = unsafe { libc::sched_yield() }; |
116 | debug_assert_eq!(ret, 0); |
117 | } |
118 | |
119 | #[cfg (target_os = "android" )] |
120 | pub fn set_name(name: &CStr) { |
121 | const PR_SET_NAME: libc::c_int = 15; |
122 | unsafe { |
123 | libc::prctl( |
124 | PR_SET_NAME, |
125 | name.as_ptr(), |
126 | 0 as libc::c_ulong, |
127 | 0 as libc::c_ulong, |
128 | 0 as libc::c_ulong, |
129 | ); |
130 | } |
131 | } |
132 | |
133 | #[cfg (target_os = "linux" )] |
134 | pub fn set_name(name: &CStr) { |
135 | const TASK_COMM_LEN: usize = 16; |
136 | |
137 | unsafe { |
138 | // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20. |
139 | let name = truncate_cstr::<{ TASK_COMM_LEN }>(name); |
140 | let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr()); |
141 | // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked. |
142 | debug_assert_eq!(res, 0); |
143 | } |
144 | } |
145 | |
146 | #[cfg (any(target_os = "freebsd" , target_os = "dragonfly" , target_os = "openbsd" ))] |
147 | pub fn set_name(name: &CStr) { |
148 | unsafe { |
149 | libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr()); |
150 | } |
151 | } |
152 | |
153 | #[cfg (any(target_os = "macos" , target_os = "ios" , target_os = "tvos" , target_os = "watchos" ))] |
154 | pub fn set_name(name: &CStr) { |
155 | unsafe { |
156 | let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name); |
157 | let res = libc::pthread_setname_np(name.as_ptr()); |
158 | // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked. |
159 | debug_assert_eq!(res, 0); |
160 | } |
161 | } |
162 | |
163 | #[cfg (target_os = "netbsd" )] |
164 | pub fn set_name(name: &CStr) { |
165 | unsafe { |
166 | let res = libc::pthread_setname_np( |
167 | libc::pthread_self(), |
168 | c"%s" .as_ptr(), |
169 | name.as_ptr() as *mut libc::c_void, |
170 | ); |
171 | debug_assert_eq!(res, 0); |
172 | } |
173 | } |
174 | |
175 | #[cfg (any(target_os = "solaris" , target_os = "illumos" , target_os = "nto" ))] |
176 | pub fn set_name(name: &CStr) { |
177 | weak! { |
178 | fn pthread_setname_np( |
179 | libc::pthread_t, *const libc::c_char |
180 | ) -> libc::c_int |
181 | } |
182 | |
183 | if let Some(f) = pthread_setname_np.get() { |
184 | #[cfg (target_os = "nto" )] |
185 | let name = truncate_cstr::<{ libc::_NTO_THREAD_NAME_MAX as usize }>(name); |
186 | |
187 | let res = unsafe { f(libc::pthread_self(), name.as_ptr()) }; |
188 | debug_assert_eq!(res, 0); |
189 | } |
190 | } |
191 | |
192 | #[cfg (target_os = "fuchsia" )] |
193 | pub fn set_name(name: &CStr) { |
194 | use self::zircon::*; |
195 | unsafe { |
196 | zx_object_set_property( |
197 | zx_thread_self(), |
198 | ZX_PROP_NAME, |
199 | name.as_ptr() as *const libc::c_void, |
200 | name.to_bytes().len(), |
201 | ); |
202 | } |
203 | } |
204 | |
205 | #[cfg (target_os = "haiku" )] |
206 | pub fn set_name(name: &CStr) { |
207 | unsafe { |
208 | let thread_self = libc::find_thread(ptr::null_mut()); |
209 | let res = libc::rename_thread(thread_self, name.as_ptr()); |
210 | // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked. |
211 | debug_assert_eq!(res, libc::B_OK); |
212 | } |
213 | } |
214 | |
215 | #[cfg (any( |
216 | target_env = "newlib" , |
217 | target_os = "l4re" , |
218 | target_os = "emscripten" , |
219 | target_os = "redox" , |
220 | target_os = "vxworks" , |
221 | target_os = "hurd" , |
222 | target_os = "aix" , |
223 | ))] |
224 | pub fn set_name(_name: &CStr) { |
225 | // Newlib, Emscripten, and VxWorks have no way to set a thread name. |
226 | } |
227 | |
228 | #[cfg (not(target_os = "espidf" ))] |
229 | pub fn sleep(dur: Duration) { |
230 | let mut secs = dur.as_secs(); |
231 | let mut nsecs = dur.subsec_nanos() as _; |
232 | |
233 | // If we're awoken with a signal then the return value will be -1 and |
234 | // nanosleep will fill in `ts` with the remaining time. |
235 | unsafe { |
236 | while secs > 0 || nsecs > 0 { |
237 | let mut ts = libc::timespec { |
238 | tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t, |
239 | tv_nsec: nsecs, |
240 | }; |
241 | secs -= ts.tv_sec as u64; |
242 | let ts_ptr = &mut ts as *mut _; |
243 | if libc::nanosleep(ts_ptr, ts_ptr) == -1 { |
244 | assert_eq!(os::errno(), libc::EINTR); |
245 | secs += ts.tv_sec as u64; |
246 | nsecs = ts.tv_nsec; |
247 | } else { |
248 | nsecs = 0; |
249 | } |
250 | } |
251 | } |
252 | } |
253 | |
254 | #[cfg (target_os = "espidf" )] |
255 | pub fn sleep(dur: Duration) { |
256 | let mut micros = dur.as_micros(); |
257 | unsafe { |
258 | while micros > 0 { |
259 | let st = if micros > u32::MAX as u128 { u32::MAX } else { micros as u32 }; |
260 | libc::usleep(st); |
261 | |
262 | micros -= st as u128; |
263 | } |
264 | } |
265 | } |
266 | |
267 | pub fn join(self) { |
268 | unsafe { |
269 | let ret = libc::pthread_join(self.id, ptr::null_mut()); |
270 | mem::forget(self); |
271 | assert!(ret == 0, "failed to join thread: {}" , io::Error::from_raw_os_error(ret)); |
272 | } |
273 | } |
274 | |
275 | pub fn id(&self) -> libc::pthread_t { |
276 | self.id |
277 | } |
278 | |
279 | pub fn into_id(self) -> libc::pthread_t { |
280 | let id = self.id; |
281 | mem::forget(self); |
282 | id |
283 | } |
284 | } |
285 | |
286 | impl Drop for Thread { |
287 | fn drop(&mut self) { |
288 | let ret = unsafe { libc::pthread_detach(self.id) }; |
289 | debug_assert_eq!(ret, 0); |
290 | } |
291 | } |
292 | |
293 | #[cfg (any( |
294 | target_os = "linux" , |
295 | target_os = "macos" , |
296 | target_os = "ios" , |
297 | target_os = "tvos" , |
298 | target_os = "watchos" , |
299 | target_os = "nto" , |
300 | ))] |
301 | fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] { |
302 | let mut result: [i32; MAX_WITH_NUL] = [0; MAX_WITH_NUL]; |
303 | for (src: &u8, dst: &mut i32) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) { |
304 | *dst = *src as libc::c_char; |
305 | } |
306 | result |
307 | } |
308 | |
309 | pub fn available_parallelism() -> io::Result<NonZeroUsize> { |
310 | cfg_if::cfg_if! { |
311 | if #[cfg(any( |
312 | target_os = "android" , |
313 | target_os = "emscripten" , |
314 | target_os = "fuchsia" , |
315 | target_os = "hurd" , |
316 | target_os = "ios" , |
317 | target_os = "tvos" , |
318 | target_os = "linux" , |
319 | target_os = "macos" , |
320 | target_os = "solaris" , |
321 | target_os = "illumos" , |
322 | target_os = "aix" , |
323 | ))] { |
324 | #[allow (unused_assignments)] |
325 | #[allow (unused_mut)] |
326 | let mut quota = usize::MAX; |
327 | |
328 | #[cfg (any(target_os = "android" , target_os = "linux" ))] |
329 | { |
330 | quota = cgroups::quota().max(1); |
331 | let mut set: libc::cpu_set_t = unsafe { mem::zeroed() }; |
332 | unsafe { |
333 | if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 { |
334 | let count = libc::CPU_COUNT(&set) as usize; |
335 | let count = count.min(quota); |
336 | |
337 | // According to sched_getaffinity's API it should always be non-zero, but |
338 | // some old MIPS kernels were buggy and zero-initialized the mask if |
339 | // none was explicitly set. |
340 | // In that case we use the sysconf fallback. |
341 | if let Some(count) = NonZeroUsize::new(count) { |
342 | return Ok(count) |
343 | } |
344 | } |
345 | } |
346 | } |
347 | match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } { |
348 | -1 => Err(io::Error::last_os_error()), |
349 | 0 => Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform" )), |
350 | cpus => { |
351 | let count = cpus as usize; |
352 | // Cover the unusual situation where we were able to get the quota but not the affinity mask |
353 | let count = count.min(quota); |
354 | Ok(unsafe { NonZeroUsize::new_unchecked(count) }) |
355 | } |
356 | } |
357 | } else if #[cfg(any( |
358 | target_os = "freebsd" , |
359 | target_os = "dragonfly" , |
360 | target_os = "openbsd" , |
361 | target_os = "netbsd" , |
362 | ))] { |
363 | use crate::ptr; |
364 | |
365 | #[cfg (target_os = "freebsd" )] |
366 | { |
367 | let mut set: libc::cpuset_t = unsafe { mem::zeroed() }; |
368 | unsafe { |
369 | if libc::cpuset_getaffinity( |
370 | libc::CPU_LEVEL_WHICH, |
371 | libc::CPU_WHICH_PID, |
372 | -1, |
373 | mem::size_of::<libc::cpuset_t>(), |
374 | &mut set, |
375 | ) == 0 { |
376 | let count = libc::CPU_COUNT(&set) as usize; |
377 | if count > 0 { |
378 | return Ok(NonZeroUsize::new_unchecked(count)); |
379 | } |
380 | } |
381 | } |
382 | } |
383 | |
384 | #[cfg (target_os = "netbsd" )] |
385 | { |
386 | unsafe { |
387 | let set = libc::_cpuset_create(); |
388 | if !set.is_null() { |
389 | let mut count: usize = 0; |
390 | if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 { |
391 | for i in 0..u64::MAX { |
392 | match libc::_cpuset_isset(i, set) { |
393 | -1 => break, |
394 | 0 => continue, |
395 | _ => count = count + 1, |
396 | } |
397 | } |
398 | } |
399 | libc::_cpuset_destroy(set); |
400 | if let Some(count) = NonZeroUsize::new(count) { |
401 | return Ok(count); |
402 | } |
403 | } |
404 | } |
405 | } |
406 | |
407 | let mut cpus: libc::c_uint = 0; |
408 | let mut cpus_size = crate::mem::size_of_val(&cpus); |
409 | |
410 | unsafe { |
411 | cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint; |
412 | } |
413 | |
414 | // Fallback approach in case of errors or no hardware threads. |
415 | if cpus < 1 { |
416 | let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0]; |
417 | let res = unsafe { |
418 | libc::sysctl( |
419 | mib.as_mut_ptr(), |
420 | 2, |
421 | &mut cpus as *mut _ as *mut _, |
422 | &mut cpus_size as *mut _ as *mut _, |
423 | ptr::null_mut(), |
424 | 0, |
425 | ) |
426 | }; |
427 | |
428 | // Handle errors if any. |
429 | if res == -1 { |
430 | return Err(io::Error::last_os_error()); |
431 | } else if cpus == 0 { |
432 | return Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform" )); |
433 | } |
434 | } |
435 | |
436 | Ok(unsafe { NonZeroUsize::new_unchecked(cpus as usize) }) |
437 | } else if #[cfg(target_os = "nto" )] { |
438 | unsafe { |
439 | use libc::_syspage_ptr; |
440 | if _syspage_ptr.is_null() { |
441 | Err(io::const_io_error!(io::ErrorKind::NotFound, "No syspage available" )) |
442 | } else { |
443 | let cpus = (*_syspage_ptr).num_cpu; |
444 | NonZeroUsize::new(cpus as usize) |
445 | .ok_or(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform" )) |
446 | } |
447 | } |
448 | } else if #[cfg(target_os = "haiku" )] { |
449 | // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus` |
450 | // `get_system_info` calls then `smp_get_num_cpus` |
451 | unsafe { |
452 | let mut sinfo: libc::system_info = crate::mem::zeroed(); |
453 | let res = libc::get_system_info(&mut sinfo); |
454 | |
455 | if res != libc::B_OK { |
456 | return Err(io::const_io_error!(io::ErrorKind::NotFound, "The number of hardware threads is not known for the target platform" )); |
457 | } |
458 | |
459 | Ok(NonZeroUsize::new_unchecked(sinfo.cpu_count as usize)) |
460 | } |
461 | } else { |
462 | // FIXME: implement on vxWorks, Redox, l4re |
463 | Err(io::const_io_error!(io::ErrorKind::Unsupported, "Getting the number of hardware threads is not supported on the target platform" )) |
464 | } |
465 | } |
466 | } |
467 | |
468 | #[cfg (any(target_os = "android" , target_os = "linux" ))] |
469 | mod cgroups { |
470 | //! Currently not covered |
471 | //! * cgroup v2 in non-standard mountpoints |
472 | //! * paths containing control characters or spaces, since those would be escaped in procfs |
473 | //! output and we don't unescape |
474 | use crate::borrow::Cow; |
475 | use crate::ffi::OsString; |
476 | use crate::fs::{try_exists, File}; |
477 | use crate::io::Read; |
478 | use crate::io::{BufRead, BufReader}; |
479 | use crate::os::unix::ffi::OsStringExt; |
480 | use crate::path::Path; |
481 | use crate::path::PathBuf; |
482 | use crate::str::from_utf8; |
483 | |
484 | #[derive (PartialEq)] |
485 | enum Cgroup { |
486 | V1, |
487 | V2, |
488 | } |
489 | |
490 | /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot |
491 | /// be determined or is not set. |
492 | pub(super) fn quota() -> usize { |
493 | let mut quota = usize::MAX; |
494 | if cfg!(miri) { |
495 | // Attempting to open a file fails under default flags due to isolation. |
496 | // And Miri does not have parallelism anyway. |
497 | return quota; |
498 | } |
499 | |
500 | let _: Option<()> = try { |
501 | let mut buf = Vec::with_capacity(128); |
502 | // find our place in the cgroup hierarchy |
503 | File::open("/proc/self/cgroup" ).ok()?.read_to_end(&mut buf).ok()?; |
504 | let (cgroup_path, version) = |
505 | buf.split(|&c| c == b' \n' ).fold(None, |previous, line| { |
506 | let mut fields = line.splitn(3, |&c| c == b':' ); |
507 | // 2nd field is a list of controllers for v1 or empty for v2 |
508 | let version = match fields.nth(1) { |
509 | Some(b"" ) => Cgroup::V2, |
510 | Some(controllers) |
511 | if from_utf8(controllers) |
512 | .is_ok_and(|c| c.split(',' ).any(|c| c == "cpu" )) => |
513 | { |
514 | Cgroup::V1 |
515 | } |
516 | _ => return previous, |
517 | }; |
518 | |
519 | // already-found v1 trumps v2 since it explicitly specifies its controllers |
520 | if previous.is_some() && version == Cgroup::V2 { |
521 | return previous; |
522 | } |
523 | |
524 | let path = fields.last()?; |
525 | // skip leading slash |
526 | Some((path[1..].to_owned(), version)) |
527 | })?; |
528 | let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path)); |
529 | |
530 | quota = match version { |
531 | Cgroup::V1 => quota_v1(cgroup_path), |
532 | Cgroup::V2 => quota_v2(cgroup_path), |
533 | }; |
534 | }; |
535 | |
536 | quota |
537 | } |
538 | |
539 | fn quota_v2(group_path: PathBuf) -> usize { |
540 | let mut quota = usize::MAX; |
541 | |
542 | let mut path = PathBuf::with_capacity(128); |
543 | let mut read_buf = String::with_capacity(20); |
544 | |
545 | // standard mount location defined in file-hierarchy(7) manpage |
546 | let cgroup_mount = "/sys/fs/cgroup" ; |
547 | |
548 | path.push(cgroup_mount); |
549 | path.push(&group_path); |
550 | |
551 | path.push("cgroup.controllers" ); |
552 | |
553 | // skip if we're not looking at cgroup2 |
554 | if matches!(try_exists(&path), Err(_) | Ok(false)) { |
555 | return usize::MAX; |
556 | }; |
557 | |
558 | path.pop(); |
559 | |
560 | let _: Option<()> = try { |
561 | while path.starts_with(cgroup_mount) { |
562 | path.push("cpu.max" ); |
563 | |
564 | read_buf.clear(); |
565 | |
566 | if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() { |
567 | let raw_quota = read_buf.lines().next()?; |
568 | let mut raw_quota = raw_quota.split(' ' ); |
569 | let limit = raw_quota.next()?; |
570 | let period = raw_quota.next()?; |
571 | match (limit.parse::<usize>(), period.parse::<usize>()) { |
572 | (Ok(limit), Ok(period)) if period > 0 => { |
573 | quota = quota.min(limit / period); |
574 | } |
575 | _ => {} |
576 | } |
577 | } |
578 | |
579 | path.pop(); // pop filename |
580 | path.pop(); // pop dir |
581 | } |
582 | }; |
583 | |
584 | quota |
585 | } |
586 | |
587 | fn quota_v1(group_path: PathBuf) -> usize { |
588 | let mut quota = usize::MAX; |
589 | let mut path = PathBuf::with_capacity(128); |
590 | let mut read_buf = String::with_capacity(20); |
591 | |
592 | // Hardcode commonly used locations mentioned in the cgroups(7) manpage |
593 | // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts |
594 | let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[ |
595 | |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu" ), p)), |
596 | |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct" ), p)), |
597 | // this can be expensive on systems with tons of mountpoints |
598 | // but we only get to this point when /proc/self/cgroups explicitly indicated |
599 | // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work |
600 | find_mountpoint, |
601 | ]; |
602 | |
603 | for mount in mounts { |
604 | let Some((mount, group_path)) = mount(&group_path) else { continue }; |
605 | |
606 | path.clear(); |
607 | path.push(mount.as_ref()); |
608 | path.push(&group_path); |
609 | |
610 | // skip if we guessed the mount incorrectly |
611 | if matches!(try_exists(&path), Err(_) | Ok(false)) { |
612 | continue; |
613 | } |
614 | |
615 | while path.starts_with(mount.as_ref()) { |
616 | let mut parse_file = |name| { |
617 | path.push(name); |
618 | read_buf.clear(); |
619 | |
620 | let f = File::open(&path); |
621 | path.pop(); // restore buffer before any early returns |
622 | f.ok()?.read_to_string(&mut read_buf).ok()?; |
623 | let parsed = read_buf.trim().parse::<usize>().ok()?; |
624 | |
625 | Some(parsed) |
626 | }; |
627 | |
628 | let limit = parse_file("cpu.cfs_quota_us" ); |
629 | let period = parse_file("cpu.cfs_period_us" ); |
630 | |
631 | match (limit, period) { |
632 | (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period), |
633 | _ => {} |
634 | } |
635 | |
636 | path.pop(); |
637 | } |
638 | |
639 | // we passed the try_exists above so we should have traversed the correct hierarchy |
640 | // when reaching this line |
641 | break; |
642 | } |
643 | |
644 | quota |
645 | } |
646 | |
647 | /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller |
648 | /// |
649 | /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip |
650 | /// over the already-included prefix |
651 | fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> { |
652 | let mut reader = BufReader::new(File::open("/proc/self/mountinfo" ).ok()?); |
653 | let mut line = String::with_capacity(256); |
654 | loop { |
655 | line.clear(); |
656 | if reader.read_line(&mut line).ok()? == 0 { |
657 | break; |
658 | } |
659 | |
660 | let line = line.trim(); |
661 | let mut items = line.split(' ' ); |
662 | |
663 | let sub_path = items.nth(3)?; |
664 | let mount_point = items.next()?; |
665 | let mount_opts = items.next_back()?; |
666 | let filesystem_type = items.nth_back(1)?; |
667 | |
668 | if filesystem_type != "cgroup" || !mount_opts.split(',' ).any(|opt| opt == "cpu" ) { |
669 | // not a cgroup / not a cpu-controller |
670 | continue; |
671 | } |
672 | |
673 | let sub_path = Path::new(sub_path).strip_prefix("/" ).ok()?; |
674 | |
675 | if !group_path.starts_with(sub_path) { |
676 | // this is a bind-mount and the bound subdirectory |
677 | // does not contain the cgroup this process belongs to |
678 | continue; |
679 | } |
680 | |
681 | let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?; |
682 | |
683 | return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path)); |
684 | } |
685 | |
686 | None |
687 | } |
688 | } |
689 | |
690 | #[cfg (all( |
691 | not(target_os = "linux" ), |
692 | not(target_os = "freebsd" ), |
693 | not(target_os = "hurd" ), |
694 | not(target_os = "macos" ), |
695 | not(target_os = "netbsd" ), |
696 | not(target_os = "openbsd" ), |
697 | not(target_os = "solaris" ) |
698 | ))] |
699 | #[cfg_attr (test, allow(dead_code))] |
700 | pub mod guard { |
701 | use crate::ops::Range; |
702 | pub type Guard = Range<usize>; |
703 | pub unsafe fn current() -> Option<Guard> { |
704 | None |
705 | } |
706 | pub unsafe fn init() -> Option<Guard> { |
707 | None |
708 | } |
709 | } |
710 | |
711 | #[cfg (any( |
712 | target_os = "linux" , |
713 | target_os = "freebsd" , |
714 | target_os = "hurd" , |
715 | target_os = "macos" , |
716 | target_os = "netbsd" , |
717 | target_os = "openbsd" , |
718 | target_os = "solaris" |
719 | ))] |
720 | #[cfg_attr (test, allow(dead_code))] |
721 | pub mod guard { |
722 | #[cfg (not(all(target_os = "linux" , target_env = "gnu" )))] |
723 | use libc::{mmap as mmap64, mprotect}; |
724 | #[cfg (all(target_os = "linux" , target_env = "gnu" ))] |
725 | use libc::{mmap64, mprotect}; |
726 | use libc::{MAP_ANON, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, PROT_NONE, PROT_READ, PROT_WRITE}; |
727 | |
728 | use crate::io; |
729 | use crate::ops::Range; |
730 | use crate::sync::atomic::{AtomicUsize, Ordering}; |
731 | use crate::sys::os; |
732 | |
733 | // This is initialized in init() and only read from after |
734 | static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0); |
735 | |
736 | pub type Guard = Range<usize>; |
737 | |
738 | #[cfg (target_os = "solaris" )] |
739 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
740 | let mut current_stack: libc::stack_t = crate::mem::zeroed(); |
741 | assert_eq!(libc::stack_getbounds(&mut current_stack), 0); |
742 | Some(current_stack.ss_sp) |
743 | } |
744 | |
745 | #[cfg (target_os = "macos" )] |
746 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
747 | let th = libc::pthread_self(); |
748 | let stackptr = libc::pthread_get_stackaddr_np(th); |
749 | Some(stackptr.map_addr(|addr| addr - libc::pthread_get_stacksize_np(th))) |
750 | } |
751 | |
752 | #[cfg (target_os = "openbsd" )] |
753 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
754 | let mut current_stack: libc::stack_t = crate::mem::zeroed(); |
755 | assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), &mut current_stack), 0); |
756 | |
757 | let stack_ptr = current_stack.ss_sp; |
758 | let stackaddr = if libc::pthread_main_np() == 1 { |
759 | // main thread |
760 | stack_ptr.addr() - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) |
761 | } else { |
762 | // new thread |
763 | stack_ptr.addr() - current_stack.ss_size |
764 | }; |
765 | Some(stack_ptr.with_addr(stackaddr)) |
766 | } |
767 | |
768 | #[cfg (any( |
769 | target_os = "android" , |
770 | target_os = "freebsd" , |
771 | target_os = "hurd" , |
772 | target_os = "linux" , |
773 | target_os = "netbsd" , |
774 | target_os = "l4re" |
775 | ))] |
776 | unsafe fn get_stack_start() -> Option<*mut libc::c_void> { |
777 | let mut ret = None; |
778 | let mut attr: libc::pthread_attr_t = crate::mem::zeroed(); |
779 | #[cfg (target_os = "freebsd" )] |
780 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
781 | #[cfg (target_os = "freebsd" )] |
782 | let e = libc::pthread_attr_get_np(libc::pthread_self(), &mut attr); |
783 | #[cfg (not(target_os = "freebsd" ))] |
784 | let e = libc::pthread_getattr_np(libc::pthread_self(), &mut attr); |
785 | if e == 0 { |
786 | let mut stackaddr = crate::ptr::null_mut(); |
787 | let mut stacksize = 0; |
788 | assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut stacksize), 0); |
789 | ret = Some(stackaddr); |
790 | } |
791 | if e == 0 || cfg!(target_os = "freebsd" ) { |
792 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); |
793 | } |
794 | ret |
795 | } |
796 | |
797 | // Precondition: PAGE_SIZE is initialized. |
798 | unsafe fn get_stack_start_aligned() -> Option<*mut libc::c_void> { |
799 | let page_size = PAGE_SIZE.load(Ordering::Relaxed); |
800 | assert!(page_size != 0); |
801 | let stackptr = get_stack_start()?; |
802 | let stackaddr = stackptr.addr(); |
803 | |
804 | // Ensure stackaddr is page aligned! A parent process might |
805 | // have reset RLIMIT_STACK to be non-page aligned. The |
806 | // pthread_attr_getstack() reports the usable stack area |
807 | // stackaddr < stackaddr + stacksize, so if stackaddr is not |
808 | // page-aligned, calculate the fix such that stackaddr < |
809 | // new_page_aligned_stackaddr < stackaddr + stacksize |
810 | let remainder = stackaddr % page_size; |
811 | Some(if remainder == 0 { |
812 | stackptr |
813 | } else { |
814 | stackptr.with_addr(stackaddr + page_size - remainder) |
815 | }) |
816 | } |
817 | |
818 | pub unsafe fn init() -> Option<Guard> { |
819 | let page_size = os::page_size(); |
820 | PAGE_SIZE.store(page_size, Ordering::Relaxed); |
821 | |
822 | if cfg!(all(target_os = "linux" , not(target_env = "musl" ))) { |
823 | // Linux doesn't allocate the whole stack right away, and |
824 | // the kernel has its own stack-guard mechanism to fault |
825 | // when growing too close to an existing mapping. If we map |
826 | // our own guard, then the kernel starts enforcing a rather |
827 | // large gap above that, rendering much of the possible |
828 | // stack space useless. See #43052. |
829 | // |
830 | // Instead, we'll just note where we expect rlimit to start |
831 | // faulting, so our handler can report "stack overflow", and |
832 | // trust that the kernel's own stack guard will work. |
833 | let stackptr = get_stack_start_aligned()?; |
834 | let stackaddr = stackptr.addr(); |
835 | Some(stackaddr - page_size..stackaddr) |
836 | } else if cfg!(all(target_os = "linux" , target_env = "musl" )) { |
837 | // For the main thread, the musl's pthread_attr_getstack |
838 | // returns the current stack size, rather than maximum size |
839 | // it can eventually grow to. It cannot be used to determine |
840 | // the position of kernel's stack guard. |
841 | None |
842 | } else if cfg!(target_os = "freebsd" ) { |
843 | // FreeBSD's stack autogrows, and optionally includes a guard page |
844 | // at the bottom. If we try to remap the bottom of the stack |
845 | // ourselves, FreeBSD's guard page moves upwards. So we'll just use |
846 | // the builtin guard page. |
847 | let stackptr = get_stack_start_aligned()?; |
848 | let guardaddr = stackptr.addr(); |
849 | // Technically the number of guard pages is tunable and controlled |
850 | // by the security.bsd.stack_guard_page sysctl, but there are |
851 | // few reasons to change it from the default. The default value has |
852 | // been 1 ever since FreeBSD 11.1 and 10.4. |
853 | const GUARD_PAGES: usize = 1; |
854 | let guard = guardaddr..guardaddr + GUARD_PAGES * page_size; |
855 | Some(guard) |
856 | } else if cfg!(target_os = "openbsd" ) { |
857 | // OpenBSD stack already includes a guard page, and stack is |
858 | // immutable. |
859 | // |
860 | // We'll just note where we expect rlimit to start |
861 | // faulting, so our handler can report "stack overflow", and |
862 | // trust that the kernel's own stack guard will work. |
863 | let stackptr = get_stack_start_aligned()?; |
864 | let stackaddr = stackptr.addr(); |
865 | Some(stackaddr - page_size..stackaddr) |
866 | } else { |
867 | // Reallocate the last page of the stack. |
868 | // This ensures SIGBUS will be raised on |
869 | // stack overflow. |
870 | // Systems which enforce strict PAX MPROTECT do not allow |
871 | // to mprotect() a mapping with less restrictive permissions |
872 | // than the initial mmap() used, so we mmap() here with |
873 | // read/write permissions and only then mprotect() it to |
874 | // no permissions at all. See issue #50313. |
875 | let stackptr = get_stack_start_aligned()?; |
876 | let result = mmap64( |
877 | stackptr, |
878 | page_size, |
879 | PROT_READ | PROT_WRITE, |
880 | MAP_PRIVATE | MAP_ANON | MAP_FIXED, |
881 | -1, |
882 | 0, |
883 | ); |
884 | if result != stackptr || result == MAP_FAILED { |
885 | panic!("failed to allocate a guard page: {}" , io::Error::last_os_error()); |
886 | } |
887 | |
888 | let result = mprotect(stackptr, page_size, PROT_NONE); |
889 | if result != 0 { |
890 | panic!("failed to protect the guard page: {}" , io::Error::last_os_error()); |
891 | } |
892 | |
893 | let guardaddr = stackptr.addr(); |
894 | |
895 | Some(guardaddr..guardaddr + page_size) |
896 | } |
897 | } |
898 | |
899 | #[cfg (any(target_os = "macos" , target_os = "openbsd" , target_os = "solaris" ))] |
900 | pub unsafe fn current() -> Option<Guard> { |
901 | let stackptr = get_stack_start()?; |
902 | let stackaddr = stackptr.addr(); |
903 | Some(stackaddr - PAGE_SIZE.load(Ordering::Relaxed)..stackaddr) |
904 | } |
905 | |
906 | #[cfg (any( |
907 | target_os = "android" , |
908 | target_os = "freebsd" , |
909 | target_os = "hurd" , |
910 | target_os = "linux" , |
911 | target_os = "netbsd" , |
912 | target_os = "l4re" |
913 | ))] |
914 | pub unsafe fn current() -> Option<Guard> { |
915 | let mut ret = None; |
916 | let mut attr: libc::pthread_attr_t = crate::mem::zeroed(); |
917 | #[cfg (target_os = "freebsd" )] |
918 | assert_eq!(libc::pthread_attr_init(&mut attr), 0); |
919 | #[cfg (target_os = "freebsd" )] |
920 | let e = libc::pthread_attr_get_np(libc::pthread_self(), &mut attr); |
921 | #[cfg (not(target_os = "freebsd" ))] |
922 | let e = libc::pthread_getattr_np(libc::pthread_self(), &mut attr); |
923 | if e == 0 { |
924 | let mut guardsize = 0; |
925 | assert_eq!(libc::pthread_attr_getguardsize(&attr, &mut guardsize), 0); |
926 | if guardsize == 0 { |
927 | if cfg!(all(target_os = "linux" , target_env = "musl" )) { |
928 | // musl versions before 1.1.19 always reported guard |
929 | // size obtained from pthread_attr_get_np as zero. |
930 | // Use page size as a fallback. |
931 | guardsize = PAGE_SIZE.load(Ordering::Relaxed); |
932 | } else { |
933 | panic!("there is no guard page" ); |
934 | } |
935 | } |
936 | let mut stackptr = crate::ptr::null_mut::<libc::c_void>(); |
937 | let mut size = 0; |
938 | assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackptr, &mut size), 0); |
939 | |
940 | let stackaddr = stackptr.addr(); |
941 | ret = if cfg!(any(target_os = "freebsd" , target_os = "netbsd" , target_os = "hurd" )) { |
942 | Some(stackaddr - guardsize..stackaddr) |
943 | } else if cfg!(all(target_os = "linux" , target_env = "musl" )) { |
944 | Some(stackaddr - guardsize..stackaddr) |
945 | } else if cfg!(all(target_os = "linux" , any(target_env = "gnu" , target_env = "uclibc" ))) |
946 | { |
947 | // glibc used to include the guard area within the stack, as noted in the BUGS |
948 | // section of `man pthread_attr_getguardsize`. This has been corrected starting |
949 | // with glibc 2.27, and in some distro backports, so the guard is now placed at the |
950 | // end (below) the stack. There's no easy way for us to know which we have at |
951 | // runtime, so we'll just match any fault in the range right above or below the |
952 | // stack base to call that fault a stack overflow. |
953 | Some(stackaddr - guardsize..stackaddr + guardsize) |
954 | } else { |
955 | Some(stackaddr..stackaddr + guardsize) |
956 | }; |
957 | } |
958 | if e == 0 || cfg!(target_os = "freebsd" ) { |
959 | assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); |
960 | } |
961 | ret |
962 | } |
963 | } |
964 | |
965 | // glibc >= 2.15 has a __pthread_get_minstack() function that returns |
966 | // PTHREAD_STACK_MIN plus bytes needed for thread-local storage. |
967 | // We need that information to avoid blowing up when a small stack |
968 | // is created in an application with big thread-local storage requirements. |
969 | // See #6233 for rationale and details. |
970 | #[cfg (all(target_os = "linux" , target_env = "gnu" ))] |
971 | fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize { |
972 | // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628) |
973 | // We shouldn't really be using such an internal symbol, but there's currently |
974 | // no other way to account for the TLS size. |
975 | dlsym!(fn __pthread_get_minstack(*const libc::pthread_attr_t) -> libc::size_t); |
976 | |
977 | match __pthread_get_minstack.get() { |
978 | None => libc::PTHREAD_STACK_MIN, |
979 | Some(f) => unsafe { f(attr) }, |
980 | } |
981 | } |
982 | |
983 | // No point in looking up __pthread_get_minstack() on non-glibc platforms. |
984 | #[cfg (all(not(all(target_os = "linux" , target_env = "gnu" )), not(target_os = "netbsd" )))] |
985 | fn min_stack_size(_: *const libc::pthread_attr_t) -> usize { |
986 | libc::PTHREAD_STACK_MIN |
987 | } |
988 | |
989 | #[cfg (target_os = "netbsd" )] |
990 | fn min_stack_size(_: *const libc::pthread_attr_t) -> usize { |
991 | 2048 // just a guess |
992 | } |
993 | |