1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #define _GNU_SOURCE |
4 | #include <errno.h> |
5 | #include <fcntl.h> |
6 | #include <limits.h> |
7 | #include <linux/types.h> |
8 | #include <poll.h> |
9 | #include <sched.h> |
10 | #include <signal.h> |
11 | #include <stdio.h> |
12 | #include <stdlib.h> |
13 | #include <string.h> |
14 | #include <syscall.h> |
15 | #include <sys/prctl.h> |
16 | #include <sys/wait.h> |
17 | #include <unistd.h> |
18 | #include <sys/socket.h> |
19 | #include <linux/kcmp.h> |
20 | |
21 | #include "pidfd.h" |
22 | #include "../kselftest_harness.h" |
23 | |
24 | /* |
25 | * UNKNOWN_FD is an fd number that should never exist in the child, as it is |
26 | * used to check the negative case. |
27 | */ |
28 | #define UNKNOWN_FD 111 |
29 | #define UID_NOBODY 65535 |
30 | |
31 | static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, |
32 | unsigned long idx2) |
33 | { |
34 | return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); |
35 | } |
36 | |
37 | static int __child(int sk, int memfd) |
38 | { |
39 | int ret; |
40 | char buf; |
41 | |
42 | /* |
43 | * Ensure we don't leave around a bunch of orphaned children if our |
44 | * tests fail. |
45 | */ |
46 | ret = prctl(PR_SET_PDEATHSIG, SIGKILL); |
47 | if (ret) { |
48 | fprintf(stderr, "%s: Child could not set DEATHSIG\n" , |
49 | strerror(errno)); |
50 | return -1; |
51 | } |
52 | |
53 | ret = send(sk, &memfd, sizeof(memfd), 0); |
54 | if (ret != sizeof(memfd)) { |
55 | fprintf(stderr, "%s: Child failed to send fd number\n" , |
56 | strerror(errno)); |
57 | return -1; |
58 | } |
59 | |
60 | /* |
61 | * The fixture setup is completed at this point. The tests will run. |
62 | * |
63 | * This blocking recv enables the parent to message the child. |
64 | * Either we will read 'P' off of the sk, indicating that we need |
65 | * to disable ptrace, or we will read a 0, indicating that the other |
66 | * side has closed the sk. This occurs during fixture teardown time, |
67 | * indicating that the child should exit. |
68 | */ |
69 | while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) { |
70 | if (buf == 'P') { |
71 | ret = prctl(PR_SET_DUMPABLE, 0); |
72 | if (ret < 0) { |
73 | fprintf(stderr, |
74 | "%s: Child failed to disable ptrace\n" , |
75 | strerror(errno)); |
76 | return -1; |
77 | } |
78 | } else { |
79 | fprintf(stderr, "Child received unknown command %c\n" , |
80 | buf); |
81 | return -1; |
82 | } |
83 | ret = send(sk, &buf, sizeof(buf), 0); |
84 | if (ret != 1) { |
85 | fprintf(stderr, "%s: Child failed to ack\n" , |
86 | strerror(errno)); |
87 | return -1; |
88 | } |
89 | } |
90 | if (ret < 0) { |
91 | fprintf(stderr, "%s: Child failed to read from socket\n" , |
92 | strerror(errno)); |
93 | return -1; |
94 | } |
95 | |
96 | return 0; |
97 | } |
98 | |
99 | static int child(int sk) |
100 | { |
101 | int memfd, ret; |
102 | |
103 | memfd = sys_memfd_create(name: "test" , flags: 0); |
104 | if (memfd < 0) { |
105 | fprintf(stderr, "%s: Child could not create memfd\n" , |
106 | strerror(errno)); |
107 | ret = -1; |
108 | } else { |
109 | ret = __child(sk, memfd); |
110 | close(memfd); |
111 | } |
112 | |
113 | close(sk); |
114 | return ret; |
115 | } |
116 | |
117 | FIXTURE(child) |
118 | { |
119 | /* |
120 | * remote_fd is the number of the FD which we are trying to retrieve |
121 | * from the child. |
122 | */ |
123 | int remote_fd; |
124 | /* pid points to the child which we are fetching FDs from */ |
125 | pid_t pid; |
126 | /* pidfd is the pidfd of the child */ |
127 | int pidfd; |
128 | /* |
129 | * sk is our side of the socketpair used to communicate with the child. |
130 | * When it is closed, the child will exit. |
131 | */ |
132 | int sk; |
133 | bool ignore_child_result; |
134 | }; |
135 | |
136 | FIXTURE_SETUP(child) |
137 | { |
138 | int ret, sk_pair[2]; |
139 | |
140 | ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { |
141 | TH_LOG("%s: failed to create socketpair" , strerror(errno)); |
142 | } |
143 | self->sk = sk_pair[0]; |
144 | |
145 | self->pid = fork(); |
146 | ASSERT_GE(self->pid, 0); |
147 | |
148 | if (self->pid == 0) { |
149 | close(sk_pair[0]); |
150 | if (child(sk: sk_pair[1])) |
151 | _exit(EXIT_FAILURE); |
152 | _exit(EXIT_SUCCESS); |
153 | } |
154 | |
155 | close(sk_pair[1]); |
156 | |
157 | self->pidfd = sys_pidfd_open(pid: self->pid, flags: 0); |
158 | ASSERT_GE(self->pidfd, 0); |
159 | |
160 | /* |
161 | * Wait for the child to complete setup. It'll send the remote memfd's |
162 | * number when ready. |
163 | */ |
164 | ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0); |
165 | ASSERT_EQ(sizeof(self->remote_fd), ret); |
166 | } |
167 | |
168 | FIXTURE_TEARDOWN(child) |
169 | { |
170 | int ret; |
171 | |
172 | EXPECT_EQ(0, close(self->pidfd)); |
173 | EXPECT_EQ(0, close(self->sk)); |
174 | |
175 | ret = wait_for_pid(pid: self->pid); |
176 | if (!self->ignore_child_result) |
177 | EXPECT_EQ(0, ret); |
178 | } |
179 | |
180 | TEST_F(child, disable_ptrace) |
181 | { |
182 | int uid, fd; |
183 | char c; |
184 | |
185 | /* |
186 | * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE |
187 | * |
188 | * The tests should run in their own process, so even this test fails, |
189 | * it shouldn't result in subsequent tests failing. |
190 | */ |
191 | uid = getuid(); |
192 | if (uid == 0) |
193 | ASSERT_EQ(0, seteuid(UID_NOBODY)); |
194 | |
195 | ASSERT_EQ(1, send(self->sk, "P" , 1, 0)); |
196 | ASSERT_EQ(1, recv(self->sk, &c, 1, 0)); |
197 | |
198 | fd = sys_pidfd_getfd(pidfd: self->pidfd, fd: self->remote_fd, flags: 0); |
199 | EXPECT_EQ(-1, fd); |
200 | EXPECT_EQ(EPERM, errno); |
201 | |
202 | if (uid == 0) |
203 | ASSERT_EQ(0, seteuid(0)); |
204 | } |
205 | |
206 | TEST_F(child, fetch_fd) |
207 | { |
208 | int fd, ret; |
209 | |
210 | fd = sys_pidfd_getfd(pidfd: self->pidfd, fd: self->remote_fd, flags: 0); |
211 | ASSERT_GE(fd, 0); |
212 | |
213 | ret = sys_kcmp(pid1: getpid(), pid2: self->pid, type: KCMP_FILE, idx1: fd, idx2: self->remote_fd); |
214 | if (ret < 0 && errno == ENOSYS) |
215 | SKIP(return, "kcmp() syscall not supported" ); |
216 | EXPECT_EQ(ret, 0); |
217 | |
218 | ret = fcntl(fd, F_GETFD); |
219 | ASSERT_GE(ret, 0); |
220 | EXPECT_GE(ret & FD_CLOEXEC, 0); |
221 | |
222 | close(fd); |
223 | } |
224 | |
225 | TEST_F(child, test_unknown_fd) |
226 | { |
227 | int fd; |
228 | |
229 | fd = sys_pidfd_getfd(pidfd: self->pidfd, UNKNOWN_FD, flags: 0); |
230 | EXPECT_EQ(-1, fd) { |
231 | TH_LOG("getfd succeeded while fetching unknown fd" ); |
232 | }; |
233 | EXPECT_EQ(EBADF, errno) { |
234 | TH_LOG("%s: getfd did not get EBADF" , strerror(errno)); |
235 | } |
236 | } |
237 | |
238 | TEST(flags_set) |
239 | { |
240 | ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1)); |
241 | EXPECT_EQ(errno, EINVAL); |
242 | } |
243 | |
244 | TEST_F(child, no_strange_EBADF) |
245 | { |
246 | struct pollfd fds; |
247 | |
248 | self->ignore_child_result = true; |
249 | |
250 | fds.fd = self->pidfd; |
251 | fds.events = POLLIN; |
252 | |
253 | ASSERT_EQ(kill(self->pid, SIGKILL), 0); |
254 | ASSERT_EQ(poll(&fds, 1, 5000), 1); |
255 | |
256 | /* |
257 | * It used to be that pidfd_getfd() could race with the exiting thread |
258 | * between exit_files() and release_task(), and get a non-null task |
259 | * with a NULL files struct, and you'd get EBADF, which was slightly |
260 | * confusing. |
261 | */ |
262 | errno = 0; |
263 | EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1); |
264 | EXPECT_EQ(errno, ESRCH); |
265 | } |
266 | |
267 | #if __NR_pidfd_getfd == -1 |
268 | int main(void) |
269 | { |
270 | fprintf(stderr, "__NR_pidfd_getfd undefined. The pidfd_getfd syscall is unavailable. Test aborting\n" ); |
271 | return KSFT_SKIP; |
272 | } |
273 | #else |
274 | TEST_HARNESS_MAIN |
275 | #endif |
276 | |