1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Based on Christian Brauner's clone3() example. |
5 | * These tests are assuming to be running in the host's |
6 | * PID namespace. |
7 | */ |
8 | |
9 | #define _GNU_SOURCE |
10 | #include <errno.h> |
11 | #include <linux/types.h> |
12 | #include <linux/sched.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | #include <stdbool.h> |
16 | #include <sys/syscall.h> |
17 | #include <sys/types.h> |
18 | #include <sys/un.h> |
19 | #include <sys/wait.h> |
20 | #include <unistd.h> |
21 | #include <sched.h> |
22 | |
23 | #include "../kselftest.h" |
24 | #include "clone3_selftests.h" |
25 | |
26 | #define MAX_PID_NS_LEVEL 32 |
27 | |
28 | static int pipe_1[2]; |
29 | static int pipe_2[2]; |
30 | |
31 | static void child_exit(int ret) |
32 | { |
33 | fflush(stdout); |
34 | fflush(stderr); |
35 | _exit(ret); |
36 | } |
37 | |
38 | static int call_clone3_set_tid(pid_t *set_tid, |
39 | size_t set_tid_size, |
40 | int flags, |
41 | int expected_pid, |
42 | bool wait_for_it) |
43 | { |
44 | int status; |
45 | pid_t pid = -1; |
46 | |
47 | struct __clone_args args = { |
48 | .flags = flags, |
49 | .exit_signal = SIGCHLD, |
50 | .set_tid = ptr_to_u64(set_tid), |
51 | .set_tid_size = set_tid_size, |
52 | }; |
53 | |
54 | pid = sys_clone3(args: &args, size: sizeof(args)); |
55 | if (pid < 0) { |
56 | ksft_print_msg(msg: "%s - Failed to create new process\n" , |
57 | strerror(errno)); |
58 | return -errno; |
59 | } |
60 | |
61 | if (pid == 0) { |
62 | int ret; |
63 | char tmp = 0; |
64 | int exit_code = EXIT_SUCCESS; |
65 | |
66 | ksft_print_msg(msg: "I am the child, my PID is %d (expected %d)\n" , |
67 | getpid(), set_tid[0]); |
68 | if (wait_for_it) { |
69 | ksft_print_msg(msg: "[%d] Child is ready and waiting\n" , |
70 | getpid()); |
71 | |
72 | /* Signal the parent that the child is ready */ |
73 | close(pipe_1[0]); |
74 | ret = write(pipe_1[1], &tmp, 1); |
75 | if (ret != 1) { |
76 | ksft_print_msg( |
77 | msg: "Writing to pipe returned %d" , ret); |
78 | exit_code = EXIT_FAILURE; |
79 | } |
80 | close(pipe_1[1]); |
81 | close(pipe_2[1]); |
82 | ret = read(pipe_2[0], &tmp, 1); |
83 | if (ret != 1) { |
84 | ksft_print_msg( |
85 | msg: "Reading from pipe returned %d" , ret); |
86 | exit_code = EXIT_FAILURE; |
87 | } |
88 | close(pipe_2[0]); |
89 | } |
90 | |
91 | if (set_tid[0] != getpid()) |
92 | child_exit(ret: EXIT_FAILURE); |
93 | child_exit(ret: exit_code); |
94 | } |
95 | |
96 | if (expected_pid == 0 || expected_pid == pid) { |
97 | ksft_print_msg(msg: "I am the parent (%d). My child's pid is %d\n" , |
98 | getpid(), pid); |
99 | } else { |
100 | ksft_print_msg( |
101 | msg: "Expected child pid %d does not match actual pid %d\n" , |
102 | expected_pid, pid); |
103 | return -1; |
104 | } |
105 | |
106 | if (waitpid(pid, &status, 0) < 0) { |
107 | ksft_print_msg(msg: "Child returned %s\n" , strerror(errno)); |
108 | return -errno; |
109 | } |
110 | |
111 | if (!WIFEXITED(status)) |
112 | return -1; |
113 | |
114 | return WEXITSTATUS(status); |
115 | } |
116 | |
117 | static void test_clone3_set_tid(pid_t *set_tid, |
118 | size_t set_tid_size, |
119 | int flags, |
120 | int expected, |
121 | int expected_pid, |
122 | bool wait_for_it) |
123 | { |
124 | int ret; |
125 | |
126 | ksft_print_msg( |
127 | msg: "[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n" , |
128 | getpid(), set_tid[0], flags); |
129 | ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid, |
130 | wait_for_it); |
131 | ksft_print_msg( |
132 | msg: "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n" , |
133 | getpid(), set_tid[0], ret, expected); |
134 | if (ret != expected) |
135 | ksft_test_result_fail( |
136 | msg: "[%d] Result (%d) is different than expected (%d)\n" , |
137 | getpid(), ret, expected); |
138 | else |
139 | ksft_test_result_pass( |
140 | msg: "[%d] Result (%d) matches expectation (%d)\n" , |
141 | getpid(), ret, expected); |
142 | } |
143 | int main(int argc, char *argv[]) |
144 | { |
145 | FILE *f; |
146 | char buf; |
147 | char *line; |
148 | int status; |
149 | int ret = -1; |
150 | size_t len = 0; |
151 | int pid_max = 0; |
152 | uid_t uid = getuid(); |
153 | char proc_path[100] = {0}; |
154 | pid_t pid, ns1, ns2, ns3, ns_pid; |
155 | pid_t set_tid[MAX_PID_NS_LEVEL * 2]; |
156 | |
157 | ksft_print_header(); |
158 | ksft_set_plan(plan: 29); |
159 | test_clone3_supported(); |
160 | |
161 | if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0) |
162 | ksft_exit_fail_msg(msg: "pipe() failed\n" ); |
163 | |
164 | f = fopen("/proc/sys/kernel/pid_max" , "r" ); |
165 | if (f == NULL) |
166 | ksft_exit_fail_msg( |
167 | msg: "%s - Could not open /proc/sys/kernel/pid_max\n" , |
168 | strerror(errno)); |
169 | fscanf(f, "%d" , &pid_max); |
170 | fclose(f); |
171 | ksft_print_msg(msg: "/proc/sys/kernel/pid_max %d\n" , pid_max); |
172 | |
173 | /* Try invalid settings */ |
174 | memset(&set_tid, 0, sizeof(set_tid)); |
175 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
176 | |
177 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
178 | |
179 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, flags: 0, |
180 | expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
181 | |
182 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
183 | |
184 | /* |
185 | * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 |
186 | * nested PID namespace. |
187 | */ |
188 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
189 | |
190 | memset(&set_tid, 0xff, sizeof(set_tid)); |
191 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
192 | |
193 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
194 | |
195 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, flags: 0, |
196 | expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
197 | |
198 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
199 | |
200 | /* |
201 | * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 |
202 | * nested PID namespace. |
203 | */ |
204 | test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
205 | |
206 | memset(&set_tid, 0, sizeof(set_tid)); |
207 | /* Try with an invalid PID */ |
208 | set_tid[0] = 0; |
209 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
210 | |
211 | set_tid[0] = -1; |
212 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
213 | |
214 | /* Claim that the set_tid array actually contains 2 elements. */ |
215 | test_clone3_set_tid(set_tid, set_tid_size: 2, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
216 | |
217 | /* Try it in a new PID namespace */ |
218 | if (uid == 0) |
219 | test_clone3_set_tid(set_tid, set_tid_size: 1, CLONE_NEWPID, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
220 | else |
221 | ksft_test_result_skip(msg: "Clone3() with set_tid requires root\n" ); |
222 | |
223 | /* Try with a valid PID (1) this should return -EEXIST. */ |
224 | set_tid[0] = 1; |
225 | if (uid == 0) |
226 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: -EEXIST, expected_pid: 0, wait_for_it: 0); |
227 | else |
228 | ksft_test_result_skip(msg: "Clone3() with set_tid requires root\n" ); |
229 | |
230 | /* Try it in a new PID namespace */ |
231 | if (uid == 0) |
232 | test_clone3_set_tid(set_tid, set_tid_size: 1, CLONE_NEWPID, expected: 0, expected_pid: 0, wait_for_it: 0); |
233 | else |
234 | ksft_test_result_skip(msg: "Clone3() with set_tid requires root\n" ); |
235 | |
236 | /* pid_max should fail everywhere */ |
237 | set_tid[0] = pid_max; |
238 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
239 | |
240 | if (uid == 0) |
241 | test_clone3_set_tid(set_tid, set_tid_size: 1, CLONE_NEWPID, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
242 | else |
243 | ksft_test_result_skip(msg: "Clone3() with set_tid requires root\n" ); |
244 | |
245 | if (uid != 0) { |
246 | /* |
247 | * All remaining tests require root. Tell the framework |
248 | * that all those tests are skipped as non-root. |
249 | */ |
250 | ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num(); |
251 | goto out; |
252 | } |
253 | |
254 | /* Find the current active PID */ |
255 | pid = fork(); |
256 | if (pid == 0) { |
257 | ksft_print_msg(msg: "Child has PID %d\n" , getpid()); |
258 | child_exit(ret: EXIT_SUCCESS); |
259 | } |
260 | if (waitpid(pid, &status, 0) < 0) |
261 | ksft_exit_fail_msg(msg: "Waiting for child %d failed" , pid); |
262 | |
263 | /* After the child has finished, its PID should be free. */ |
264 | set_tid[0] = pid; |
265 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: 0, expected_pid: 0, wait_for_it: 0); |
266 | |
267 | /* This should fail as there is no PID 1 in that namespace */ |
268 | test_clone3_set_tid(set_tid, set_tid_size: 1, CLONE_NEWPID, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
269 | |
270 | /* |
271 | * Creating a process with PID 1 in the newly created most nested |
272 | * PID namespace and PID 'pid' in the parent PID namespace. This |
273 | * needs to work. |
274 | */ |
275 | set_tid[0] = 1; |
276 | set_tid[1] = pid; |
277 | test_clone3_set_tid(set_tid, set_tid_size: 2, CLONE_NEWPID, expected: 0, expected_pid: pid, wait_for_it: 0); |
278 | |
279 | ksft_print_msg(msg: "unshare PID namespace\n" ); |
280 | if (unshare(CLONE_NEWPID) == -1) |
281 | ksft_exit_fail_msg(msg: "unshare(CLONE_NEWPID) failed: %s\n" , |
282 | strerror(errno)); |
283 | |
284 | set_tid[0] = pid; |
285 | |
286 | /* This should fail as there is no PID 1 in that namespace */ |
287 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
288 | |
289 | /* Let's create a PID 1 */ |
290 | ns_pid = fork(); |
291 | if (ns_pid == 0) { |
292 | /* |
293 | * This and the next test cases check that all pid-s are |
294 | * released on error paths. |
295 | */ |
296 | set_tid[0] = 43; |
297 | set_tid[1] = -1; |
298 | test_clone3_set_tid(set_tid, set_tid_size: 2, flags: 0, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
299 | |
300 | set_tid[0] = 43; |
301 | set_tid[1] = pid; |
302 | test_clone3_set_tid(set_tid, set_tid_size: 2, flags: 0, expected: 0, expected_pid: 43, wait_for_it: 0); |
303 | |
304 | ksft_print_msg(msg: "Child in PID namespace has PID %d\n" , getpid()); |
305 | set_tid[0] = 2; |
306 | test_clone3_set_tid(set_tid, set_tid_size: 1, flags: 0, expected: 0, expected_pid: 2, wait_for_it: 0); |
307 | |
308 | set_tid[0] = 1; |
309 | set_tid[1] = -1; |
310 | set_tid[2] = pid; |
311 | /* This should fail as there is invalid PID at level '1'. */ |
312 | test_clone3_set_tid(set_tid, set_tid_size: 3, CLONE_NEWPID, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
313 | |
314 | set_tid[0] = 1; |
315 | set_tid[1] = 42; |
316 | set_tid[2] = pid; |
317 | /* |
318 | * This should fail as there are not enough active PID |
319 | * namespaces. Again assuming this is running in the host's |
320 | * PID namespace. Not yet nested. |
321 | */ |
322 | test_clone3_set_tid(set_tid, set_tid_size: 4, CLONE_NEWPID, expected: -EINVAL, expected_pid: 0, wait_for_it: 0); |
323 | |
324 | /* |
325 | * This should work and from the parent we should see |
326 | * something like 'NSpid: pid 42 1'. |
327 | */ |
328 | test_clone3_set_tid(set_tid, set_tid_size: 3, CLONE_NEWPID, expected: 0, expected_pid: 42, wait_for_it: true); |
329 | |
330 | child_exit(ret: ksft_cnt.ksft_fail); |
331 | } |
332 | |
333 | close(pipe_1[1]); |
334 | close(pipe_2[0]); |
335 | while (read(pipe_1[0], &buf, 1) > 0) { |
336 | ksft_print_msg(msg: "[%d] Child is ready and waiting\n" , getpid()); |
337 | break; |
338 | } |
339 | |
340 | snprintf(buf: proc_path, size: sizeof(proc_path), fmt: "/proc/%d/status" , pid); |
341 | f = fopen(proc_path, "r" ); |
342 | if (f == NULL) |
343 | ksft_exit_fail_msg( |
344 | msg: "%s - Could not open %s\n" , |
345 | strerror(errno), proc_path); |
346 | |
347 | while (getline(&line, &len, f) != -1) { |
348 | if (strstr(line, "NSpid" )) { |
349 | int i; |
350 | |
351 | /* Verify that all generated PIDs are as expected. */ |
352 | i = sscanf(line, "NSpid:\t%d\t%d\t%d" , |
353 | &ns3, &ns2, &ns1); |
354 | if (i != 3) { |
355 | ksft_print_msg( |
356 | msg: "Unexpected 'NSPid:' entry: %s" , |
357 | line); |
358 | ns1 = ns2 = ns3 = 0; |
359 | } |
360 | break; |
361 | } |
362 | } |
363 | fclose(f); |
364 | free(line); |
365 | close(pipe_2[0]); |
366 | |
367 | /* Tell the clone3()'d child to finish. */ |
368 | write(pipe_2[1], &buf, 1); |
369 | close(pipe_2[1]); |
370 | |
371 | if (waitpid(ns_pid, &status, 0) < 0) { |
372 | ksft_print_msg(msg: "Child returned %s\n" , strerror(errno)); |
373 | ret = -errno; |
374 | goto out; |
375 | } |
376 | |
377 | if (!WIFEXITED(status)) |
378 | ksft_test_result_fail(msg: "Child error\n" ); |
379 | |
380 | ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status)); |
381 | ksft_cnt.ksft_fail = WEXITSTATUS(status); |
382 | |
383 | if (ns3 == pid && ns2 == 42 && ns1 == 1) |
384 | ksft_test_result_pass( |
385 | msg: "PIDs in all namespaces as expected (%d,%d,%d)\n" , |
386 | ns3, ns2, ns1); |
387 | else |
388 | ksft_test_result_fail( |
389 | msg: "PIDs in all namespaces not as expected (%d,%d,%d)\n" , |
390 | ns3, ns2, ns1); |
391 | out: |
392 | ret = 0; |
393 | |
394 | return !ret ? ksft_exit_pass() : ksft_exit_fail(); |
395 | } |
396 | |