1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2020 Intel Corporation. |
4 | ** |
5 | ** Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | ** of this software and associated documentation files (the "Software"), to deal |
7 | ** in the Software without restriction, including without limitation the rights |
8 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9 | ** copies of the Software, and to permit persons to whom the Software is |
10 | ** furnished to do so, subject to the following conditions: |
11 | ** |
12 | ** The above copyright notice and this permission notice shall be included in |
13 | ** all copies or substantial portions of the Software. |
14 | ** |
15 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
18 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
21 | ** THE SOFTWARE. |
22 | ** |
23 | ****************************************************************************/ |
24 | |
25 | #ifndef _GNU_SOURCE |
26 | # define _GNU_SOURCE |
27 | #endif |
28 | |
29 | #include "forkfd.h" |
30 | |
31 | #include <errno.h> |
32 | #include <fcntl.h> |
33 | #include <limits.h> |
34 | #include <sched.h> |
35 | #include <signal.h> |
36 | #include <stdio.h> |
37 | #include <stdlib.h> |
38 | #include <string.h> |
39 | #include <sys/resource.h> |
40 | #include <sys/syscall.h> |
41 | #include <sys/types.h> |
42 | #include <sys/wait.h> |
43 | #include <unistd.h> |
44 | |
45 | #include "forkfd_atomic.h" |
46 | |
47 | #ifndef CLONE_PIDFD |
48 | # define CLONE_PIDFD 0x00001000 |
49 | #endif |
50 | #ifndef P_PIDFD |
51 | # define P_PIDFD 3 |
52 | #endif |
53 | |
54 | // in forkfd.c |
55 | static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions); |
56 | static void convertStatusToForkfdInfo(int status, struct forkfd_info *info); |
57 | |
58 | static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0); |
59 | |
60 | static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options, |
61 | struct rusage *ru) |
62 | { |
63 | /* use the waitid raw system call, which has an extra parameter that glibc |
64 | * doesn't offer to us */ |
65 | return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru); |
66 | } |
67 | |
68 | static int sys_clone(unsigned long cloneflags, int *ptid) |
69 | { |
70 | void *child_stack = NULL; |
71 | int *ctid = NULL; |
72 | unsigned long newtls = 0; |
73 | #if defined(__NR_clone2) |
74 | size_t stack_size = 0; |
75 | return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls); |
76 | #elif defined(__cris__) || defined(__s390__) |
77 | /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */ |
78 | return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid); |
79 | #elif defined(__microblaze__) |
80 | /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */ |
81 | size_t stack_size = 0; |
82 | return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid); |
83 | #elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ |
84 | defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \ |
85 | defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__) || defined(__riscv) |
86 | /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures, |
87 | * but since both values are 0, there's no harm. */ |
88 | return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls); |
89 | #else |
90 | (void) child_stack; |
91 | (void) ctid; |
92 | (void) newtls; |
93 | errno = ENOSYS; |
94 | return -1; |
95 | #endif |
96 | } |
97 | |
98 | static int detect_clone_pidfd_support() |
99 | { |
100 | /* |
101 | * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps: |
102 | * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call |
103 | * - Linux 5.2 added pidfd_send_signal(2) |
104 | * - Linux 5.3 added support for poll(2) on pidfds |
105 | * - Linux 5.3 added clone3(2) |
106 | * - Linux 5.4 added P_PIDFD support in waitid(2) |
107 | * |
108 | * We need CLONE_PIDFD and the poll(2) support. We could emulate the |
109 | * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works |
110 | * in Linux 5.2, but without poll(2), we can't guarantee the functionality |
111 | * anyway. |
112 | * |
113 | * So we detect by trying to waitid(2) on a positive file descriptor that |
114 | * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will |
115 | * return EBADF. If it isn't supported, it returns EINVAL (as it would for |
116 | * a negative file descriptor). This will succeed on Linux 5.4. |
117 | * |
118 | * We could have instead detected by the existence of the clone3(2) system |
119 | * call, but for that we would have needed to wait for __NR_clone3 to show |
120 | * up on the libcs. We choose to go via the waitid(2) route, which requires |
121 | * platform-independent constants only. It would have simplified the |
122 | * sys_clone() mess above... |
123 | */ |
124 | |
125 | sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL); |
126 | return errno == EBADF ? 1 : -1; |
127 | } |
128 | |
129 | int system_has_forkfd() |
130 | { |
131 | return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0; |
132 | } |
133 | |
134 | int system_forkfd(int flags, pid_t *ppid, int *system) |
135 | { |
136 | pid_t pid; |
137 | int pidfd; |
138 | |
139 | int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED); |
140 | if (state == 0) { |
141 | state = detect_clone_pidfd_support(); |
142 | ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED); |
143 | } |
144 | if (state < 0) { |
145 | *system = 0; |
146 | return state; |
147 | } |
148 | |
149 | *system = 1; |
150 | unsigned long cloneflags = CLONE_PIDFD | SIGCHLD; |
151 | pid = sys_clone(cloneflags, ptid: &pidfd); |
152 | if (ppid) |
153 | *ppid = pid; |
154 | |
155 | if (pid == 0) { |
156 | /* Child process */ |
157 | return FFD_CHILD_PROCESS; |
158 | } |
159 | |
160 | /* parent process */ |
161 | if ((flags & FFD_CLOEXEC) == 0) { |
162 | /* pidfd defaults to O_CLOEXEC */ |
163 | fcntl(fd: pidfd, F_SETFD, 0); |
164 | } |
165 | if (flags & FFD_NONBLOCK) |
166 | fcntl(fd: pidfd, F_SETFL, fcntl(fd: pidfd, F_GETFL) | O_NONBLOCK); |
167 | return pidfd; |
168 | } |
169 | |
170 | int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdoptions, struct rusage *rusage) |
171 | { |
172 | siginfo_t si; |
173 | int ret; |
174 | int options = convertForkfdWaitFlagsToWaitFlags(ffdoptions); |
175 | |
176 | if ((options & WNOHANG) == 0) { |
177 | /* check if the file descriptor is non-blocking */ |
178 | ret = fcntl(fd: ffd, F_GETFL); |
179 | if (ret == -1) |
180 | return ret; |
181 | if (ret & O_NONBLOCK) |
182 | options |= WNOHANG; |
183 | } |
184 | |
185 | ret = sys_waitid(P_PIDFD, pid_or_pidfd: ffd, infop: &si, options, ru: rusage); |
186 | if (ret == -1 && errno == ECHILD) { |
187 | errno = EWOULDBLOCK; |
188 | } else if (ret == 0 && info) { |
189 | info->code = si.si_code; |
190 | info->status = si.si_status; |
191 | } |
192 | return ret; |
193 | } |
194 | |