1 | //! linux_raw syscalls supporting `rustix::thread`. |
2 | //! |
3 | //! # Safety |
4 | //! |
5 | //! See the `rustix::backend` module documentation for details. |
6 | #![allow (unsafe_code, clippy::undocumented_unsafe_blocks)] |
7 | |
8 | use crate::backend::c; |
9 | use crate::backend::conv::{ |
10 | by_mut, by_ref, c_int, c_uint, ret, ret_c_int, ret_c_int_infallible, ret_usize, slice, |
11 | slice_just_addr, slice_just_addr_mut, zero, |
12 | }; |
13 | use crate::fd::BorrowedFd; |
14 | use crate::io; |
15 | use crate::pid::Pid; |
16 | use crate::thread::{futex, ClockId, NanosleepRelativeResult, Timespec}; |
17 | use core::mem::MaybeUninit; |
18 | use core::sync::atomic::AtomicU32; |
19 | use linux_raw_sys::general::{__kernel_timespec, TIMER_ABSTIME}; |
20 | #[cfg (target_pointer_width = "32" )] |
21 | use {crate::utils::option_as_ptr, linux_raw_sys::general::timespec as __kernel_old_timespec}; |
22 | |
23 | #[inline ] |
24 | pub(crate) fn clock_nanosleep_relative( |
25 | id: ClockId, |
26 | req: &__kernel_timespec, |
27 | ) -> NanosleepRelativeResult { |
28 | #[cfg (target_pointer_width = "32" )] |
29 | unsafe { |
30 | let mut rem = MaybeUninit::<__kernel_timespec>::uninit(); |
31 | match ret(syscall!( |
32 | __NR_clock_nanosleep_time64, |
33 | id, |
34 | c_int(0), |
35 | by_ref(req), |
36 | &mut rem |
37 | )) |
38 | .or_else(|err| { |
39 | // See the comments in `rustix_clock_gettime_via_syscall` about |
40 | // emulation. |
41 | if err == io::Errno::NOSYS { |
42 | clock_nanosleep_relative_old(id, req, &mut rem) |
43 | } else { |
44 | Err(err) |
45 | } |
46 | }) { |
47 | Ok(()) => NanosleepRelativeResult::Ok, |
48 | Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), |
49 | Err(err) => NanosleepRelativeResult::Err(err), |
50 | } |
51 | } |
52 | #[cfg (target_pointer_width = "64" )] |
53 | unsafe { |
54 | let mut rem = MaybeUninit::<__kernel_timespec>::uninit(); |
55 | match ret(syscall!( |
56 | __NR_clock_nanosleep, |
57 | id, |
58 | c_int(0), |
59 | by_ref(req), |
60 | &mut rem |
61 | )) { |
62 | Ok(()) => NanosleepRelativeResult::Ok, |
63 | Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), |
64 | Err(err) => NanosleepRelativeResult::Err(err), |
65 | } |
66 | } |
67 | } |
68 | |
69 | #[cfg (target_pointer_width = "32" )] |
70 | unsafe fn clock_nanosleep_relative_old( |
71 | id: ClockId, |
72 | req: &__kernel_timespec, |
73 | rem: &mut MaybeUninit<__kernel_timespec>, |
74 | ) -> io::Result<()> { |
75 | let old_req = __kernel_old_timespec { |
76 | tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, |
77 | tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, |
78 | }; |
79 | let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit(); |
80 | ret(syscall!( |
81 | __NR_clock_nanosleep, |
82 | id, |
83 | c_int(0), |
84 | by_ref(&old_req), |
85 | &mut old_rem |
86 | ))?; |
87 | let old_rem = old_rem.assume_init(); |
88 | rem.write(__kernel_timespec { |
89 | tv_sec: old_rem.tv_sec.into(), |
90 | tv_nsec: old_rem.tv_nsec.into(), |
91 | }); |
92 | Ok(()) |
93 | } |
94 | |
95 | #[inline ] |
96 | pub(crate) fn clock_nanosleep_absolute(id: ClockId, req: &__kernel_timespec) -> io::Result<()> { |
97 | #[cfg (target_pointer_width = "32" )] |
98 | unsafe { |
99 | ret(syscall_readonly!( |
100 | __NR_clock_nanosleep_time64, |
101 | id, |
102 | c_uint(TIMER_ABSTIME), |
103 | by_ref(req), |
104 | zero() |
105 | )) |
106 | .or_else(|err| { |
107 | // See the comments in `rustix_clock_gettime_via_syscall` about |
108 | // emulation. |
109 | if err == io::Errno::NOSYS { |
110 | clock_nanosleep_absolute_old(id, req) |
111 | } else { |
112 | Err(err) |
113 | } |
114 | }) |
115 | } |
116 | #[cfg (target_pointer_width = "64" )] |
117 | unsafe { |
118 | ret(syscall_readonly!( |
119 | __NR_clock_nanosleep, |
120 | id, |
121 | c_uint(TIMER_ABSTIME), |
122 | by_ref(req), |
123 | zero() |
124 | )) |
125 | } |
126 | } |
127 | |
128 | #[cfg (target_pointer_width = "32" )] |
129 | unsafe fn clock_nanosleep_absolute_old(id: ClockId, req: &__kernel_timespec) -> io::Result<()> { |
130 | let old_req = __kernel_old_timespec { |
131 | tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, |
132 | tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, |
133 | }; |
134 | ret(syscall_readonly!( |
135 | __NR_clock_nanosleep, |
136 | id, |
137 | c_int(0), |
138 | by_ref(&old_req), |
139 | zero() |
140 | )) |
141 | } |
142 | |
143 | #[inline ] |
144 | pub(crate) fn nanosleep(req: &__kernel_timespec) -> NanosleepRelativeResult { |
145 | #[cfg (target_pointer_width = "32" )] |
146 | unsafe { |
147 | let mut rem = MaybeUninit::<__kernel_timespec>::uninit(); |
148 | match ret(syscall!( |
149 | __NR_clock_nanosleep_time64, |
150 | ClockId::Realtime, |
151 | c_int(0), |
152 | by_ref(req), |
153 | &mut rem |
154 | )) |
155 | .or_else(|err| { |
156 | // See the comments in `rustix_clock_gettime_via_syscall` about |
157 | // emulation. |
158 | if err == io::Errno::NOSYS { |
159 | nanosleep_old(req, &mut rem) |
160 | } else { |
161 | Err(err) |
162 | } |
163 | }) { |
164 | Ok(()) => NanosleepRelativeResult::Ok, |
165 | Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), |
166 | Err(err) => NanosleepRelativeResult::Err(err), |
167 | } |
168 | } |
169 | #[cfg (target_pointer_width = "64" )] |
170 | unsafe { |
171 | let mut rem = MaybeUninit::<__kernel_timespec>::uninit(); |
172 | match ret(syscall!(__NR_nanosleep, by_ref(req), &mut rem)) { |
173 | Ok(()) => NanosleepRelativeResult::Ok, |
174 | Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()), |
175 | Err(err) => NanosleepRelativeResult::Err(err), |
176 | } |
177 | } |
178 | } |
179 | |
180 | #[cfg (target_pointer_width = "32" )] |
181 | unsafe fn nanosleep_old( |
182 | req: &__kernel_timespec, |
183 | rem: &mut MaybeUninit<__kernel_timespec>, |
184 | ) -> io::Result<()> { |
185 | let old_req = __kernel_old_timespec { |
186 | tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, |
187 | tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?, |
188 | }; |
189 | let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit(); |
190 | ret(syscall!(__NR_nanosleep, by_ref(&old_req), &mut old_rem))?; |
191 | let old_rem = old_rem.assume_init(); |
192 | rem.write(__kernel_timespec { |
193 | tv_sec: old_rem.tv_sec.into(), |
194 | tv_nsec: old_rem.tv_nsec.into(), |
195 | }); |
196 | Ok(()) |
197 | } |
198 | |
199 | #[inline ] |
200 | pub(crate) fn gettid() -> Pid { |
201 | unsafe { |
202 | let tid: i32 = ret_c_int_infallible(raw:syscall_readonly!(__NR_gettid)); |
203 | Pid::from_raw_unchecked(raw:tid) |
204 | } |
205 | } |
206 | |
207 | /// # Safety |
208 | /// |
209 | /// The raw pointers must point to valid aligned memory. |
210 | #[inline ] |
211 | pub(crate) unsafe fn futex_val2( |
212 | uaddr: *const AtomicU32, |
213 | op: super::futex::Operation, |
214 | flags: futex::Flags, |
215 | val: u32, |
216 | val2: u32, |
217 | uaddr2: *const AtomicU32, |
218 | val3: u32, |
219 | ) -> io::Result<usize> { |
220 | // Pass `val2` in the least-significant bytes of the `timeout` argument. |
221 | // [“the kernel casts the timeout value first to unsigned long, then to |
222 | // uint32_t”], so we perform that exact conversion in reverse to create |
223 | // the pointer. |
224 | // |
225 | // [“the kernel casts the timeout value first to unsigned long, then to uint32_t”]: https://man7.org/linux/man-pages/man2/futex.2.html |
226 | let timeout = val2 as usize as *const Timespec; |
227 | |
228 | #[cfg (target_pointer_width = "32" )] |
229 | { |
230 | ret_usize(syscall!( |
231 | __NR_futex_time64, |
232 | uaddr, |
233 | (op, flags), |
234 | c_uint(val), |
235 | timeout, |
236 | uaddr2, |
237 | c_uint(val3) |
238 | )) |
239 | } |
240 | #[cfg (target_pointer_width = "64" )] |
241 | ret_usize(syscall!( |
242 | __NR_futex, |
243 | uaddr, |
244 | (op, flags), |
245 | c_uint(val), |
246 | timeout, |
247 | uaddr2, |
248 | c_uint(val3) |
249 | )) |
250 | } |
251 | |
252 | /// # Safety |
253 | /// |
254 | /// The raw pointers must point to valid aligned memory. |
255 | #[inline ] |
256 | pub(crate) unsafe fn futex_timeout( |
257 | uaddr: *const AtomicU32, |
258 | op: super::futex::Operation, |
259 | flags: futex::Flags, |
260 | val: u32, |
261 | timeout: *const Timespec, |
262 | uaddr2: *const AtomicU32, |
263 | val3: u32, |
264 | ) -> io::Result<usize> { |
265 | #[cfg (target_pointer_width = "32" )] |
266 | { |
267 | ret_usize(syscall!( |
268 | __NR_futex_time64, |
269 | uaddr, |
270 | (op, flags), |
271 | c_uint(val), |
272 | timeout, |
273 | uaddr2, |
274 | c_uint(val3) |
275 | )) |
276 | .or_else(|err| { |
277 | // See the comments in `rustix_clock_gettime_via_syscall` about |
278 | // emulation. |
279 | if err == io::Errno::NOSYS { |
280 | futex_old_timespec(uaddr, op, flags, val, timeout, uaddr2, val3) |
281 | } else { |
282 | Err(err) |
283 | } |
284 | }) |
285 | } |
286 | #[cfg (target_pointer_width = "64" )] |
287 | ret_usize(syscall!( |
288 | __NR_futex, |
289 | uaddr, |
290 | (op, flags), |
291 | c_uint(val), |
292 | timeout, |
293 | uaddr2, |
294 | c_uint(val3) |
295 | )) |
296 | } |
297 | |
298 | /// # Safety |
299 | /// |
300 | /// The raw pointers must point to valid aligned memory. |
301 | #[cfg (target_pointer_width = "32" )] |
302 | unsafe fn futex_old_timespec( |
303 | uaddr: *const AtomicU32, |
304 | op: super::futex::Operation, |
305 | flags: futex::Flags, |
306 | val: u32, |
307 | timeout: *const Timespec, |
308 | uaddr2: *const AtomicU32, |
309 | val3: u32, |
310 | ) -> io::Result<usize> { |
311 | let old_timeout = if timeout.is_null() { |
312 | None |
313 | } else { |
314 | Some(__kernel_old_timespec { |
315 | tv_sec: (*timeout).tv_sec.try_into().map_err(|_| io::Errno::INVAL)?, |
316 | tv_nsec: (*timeout) |
317 | .tv_nsec |
318 | .try_into() |
319 | .map_err(|_| io::Errno::INVAL)?, |
320 | }) |
321 | }; |
322 | ret_usize(syscall!( |
323 | __NR_futex, |
324 | uaddr, |
325 | (op, flags), |
326 | c_uint(val), |
327 | option_as_ptr(old_timeout.as_ref()), |
328 | uaddr2, |
329 | c_uint(val3) |
330 | )) |
331 | } |
332 | #[inline ] |
333 | pub(crate) fn setns(fd: BorrowedFd<'_>, nstype: c::c_int) -> io::Result<c::c_int> { |
334 | unsafe { ret_c_int(raw:syscall_readonly!(__NR_setns, fd, c_int(nstype))) } |
335 | } |
336 | |
337 | #[inline ] |
338 | pub(crate) fn unshare(flags: crate::thread::UnshareFlags) -> io::Result<()> { |
339 | unsafe { ret(raw:syscall_readonly!(__NR_unshare, flags)) } |
340 | } |
341 | |
342 | #[inline ] |
343 | pub(crate) fn capget( |
344 | header: &mut linux_raw_sys::general::__user_cap_header_struct, |
345 | data: &mut [MaybeUninit<linux_raw_sys::general::__user_cap_data_struct>], |
346 | ) -> io::Result<()> { |
347 | unsafe { |
348 | ret(raw:syscall!( |
349 | __NR_capget, |
350 | by_mut(header), |
351 | slice_just_addr_mut(data) |
352 | )) |
353 | } |
354 | } |
355 | |
356 | #[inline ] |
357 | pub(crate) fn capset( |
358 | header: &mut linux_raw_sys::general::__user_cap_header_struct, |
359 | data: &[linux_raw_sys::general::__user_cap_data_struct], |
360 | ) -> io::Result<()> { |
361 | unsafe { ret(raw:syscall!(__NR_capset, by_mut(header), slice_just_addr(data))) } |
362 | } |
363 | |
364 | #[inline ] |
365 | pub(crate) fn setuid_thread(uid: crate::ugid::Uid) -> io::Result<()> { |
366 | unsafe { ret(raw:syscall_readonly!(__NR_setuid, uid)) } |
367 | } |
368 | |
369 | #[inline ] |
370 | pub(crate) fn setresuid_thread( |
371 | ruid: crate::ugid::Uid, |
372 | euid: crate::ugid::Uid, |
373 | suid: crate::ugid::Uid, |
374 | ) -> io::Result<()> { |
375 | #[cfg (any(target_arch = "x86" , target_arch = "arm" , target_arch = "sparc" ))] |
376 | unsafe { |
377 | ret(syscall_readonly!(__NR_setresuid32, ruid, euid, suid)) |
378 | } |
379 | #[cfg (not(any(target_arch = "x86" , target_arch = "arm" , target_arch = "sparc" )))] |
380 | unsafe { |
381 | ret(raw:syscall_readonly!(__NR_setresuid, ruid, euid, suid)) |
382 | } |
383 | } |
384 | |
385 | #[inline ] |
386 | pub(crate) fn setgid_thread(gid: crate::ugid::Gid) -> io::Result<()> { |
387 | unsafe { ret(raw:syscall_readonly!(__NR_setgid, gid)) } |
388 | } |
389 | |
390 | #[inline ] |
391 | pub(crate) fn setresgid_thread( |
392 | rgid: crate::ugid::Gid, |
393 | egid: crate::ugid::Gid, |
394 | sgid: crate::ugid::Gid, |
395 | ) -> io::Result<()> { |
396 | #[cfg (any(target_arch = "x86" , target_arch = "arm" , target_arch = "sparc" ))] |
397 | unsafe { |
398 | ret(syscall_readonly!(__NR_setresgid32, rgid, egid, sgid)) |
399 | } |
400 | #[cfg (not(any(target_arch = "x86" , target_arch = "arm" , target_arch = "sparc" )))] |
401 | unsafe { |
402 | ret(raw:syscall_readonly!(__NR_setresgid, rgid, egid, sgid)) |
403 | } |
404 | } |
405 | |
406 | #[inline ] |
407 | pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> { |
408 | let (addr: ArgReg<'_, A1>, len: ArgReg<'_, A0>) = slice(gids); |
409 | unsafe { ret(raw:syscall_readonly!(__NR_setgroups, len, addr)) } |
410 | } |
411 | |