| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2020 Intel Corporation. |
| 4 | ** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com |
| 5 | ** |
| 6 | ** Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | ** of this software and associated documentation files (the "Software"), to deal |
| 8 | ** in the Software without restriction, including without limitation the rights |
| 9 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | ** copies of the Software, and to permit persons to whom the Software is |
| 11 | ** furnished to do so, subject to the following conditions: |
| 12 | ** |
| 13 | ** The above copyright notice and this permission notice shall be included in |
| 14 | ** all copies or substantial portions of the Software. |
| 15 | ** |
| 16 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 22 | ** THE SOFTWARE. |
| 23 | ** |
| 24 | ****************************************************************************/ |
| 25 | |
| 26 | #ifndef _GNU_SOURCE |
| 27 | # define _GNU_SOURCE |
| 28 | #endif |
| 29 | |
| 30 | #include "forkfd.h" |
| 31 | |
| 32 | /* Macros fine-tuning the build: */ |
| 33 | //#define FORKFD_NO_FORKFD 1 /* disable the forkfd() function */ |
| 34 | //#define FORKFD_NO_SPAWNFD 1 /* disable the spawnfd() function */ |
| 35 | //#define FORKFD_DISABLE_FORK_FALLBACK 1 /* disable falling back to fork() from system_forkfd() */ |
| 36 | |
| 37 | #include <sys/types.h> |
| 38 | #if defined(__OpenBSD__) || defined(__NetBSD__) |
| 39 | # include <sys/param.h> |
| 40 | #endif |
| 41 | #include <sys/time.h> |
| 42 | #include <sys/resource.h> |
| 43 | #include <sys/wait.h> |
| 44 | #include <assert.h> |
| 45 | #include <errno.h> |
| 46 | #include <pthread.h> |
| 47 | #include <signal.h> |
| 48 | #include <stdlib.h> |
| 49 | #include <string.h> |
| 50 | #include <time.h> |
| 51 | #include <unistd.h> |
| 52 | |
| 53 | #ifdef __linux__ |
| 54 | # define HAVE_WAIT4 1 |
| 55 | # if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x208 && \ |
| 56 | (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201))) |
| 57 | # include <sys/eventfd.h> |
| 58 | # ifdef EFD_CLOEXEC |
| 59 | # define HAVE_EVENTFD 1 |
| 60 | # endif |
| 61 | # endif |
| 62 | # if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x209 && \ |
| 63 | (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201))) |
| 64 | # define HAVE_PIPE2 1 |
| 65 | # endif |
| 66 | #endif |
| 67 | |
| 68 | #if _POSIX_VERSION-0 >= 200809L || _XOPEN_VERSION-0 >= 500 |
| 69 | # define HAVE_WAITID 1 |
| 70 | #endif |
| 71 | #if !defined(WEXITED) || !defined(WNOWAIT) |
| 72 | # undef HAVE_WAITID |
| 73 | #endif |
| 74 | |
| 75 | #if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1000032) || \ |
| 76 | (defined(__OpenBSD__) && OpenBSD >= 201505) || \ |
| 77 | (defined(__NetBSD__) && __NetBSD_Version__ >= 600000000) |
| 78 | # define HAVE_PIPE2 1 |
| 79 | #endif |
| 80 | #if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \ |
| 81 | defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__) |
| 82 | # define HAVE_WAIT4 1 |
| 83 | #endif |
| 84 | |
| 85 | #if defined(__APPLE__) |
| 86 | /* Up until OS X 10.7, waitid(P_ALL, ...) will return success, but will not |
| 87 | * fill in the details of the dead child. That means waitid is not useful to us. |
| 88 | * Therefore, we only enable waitid() support if we're targetting OS X 10.8 or |
| 89 | * later. |
| 90 | */ |
| 91 | # include <Availability.h> |
| 92 | # include <AvailabilityMacros.h> |
| 93 | # if MAC_OS_X_VERSION_MIN_REQUIRED <= 1070 |
| 94 | # define HAVE_BROKEN_WAITID 1 |
| 95 | # endif |
| 96 | #endif |
| 97 | |
| 98 | #include "forkfd_atomic.h" |
| 99 | |
| 100 | static int system_has_forkfd(void); |
| 101 | static int system_forkfd(int flags, pid_t *ppid, int *system); |
| 102 | static int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdwoptions, struct rusage *rusage); |
| 103 | |
| 104 | static int disable_fork_fallback(void) |
| 105 | { |
| 106 | #ifdef FORKFD_DISABLE_FORK_FALLBACK |
| 107 | /* if there's no system forkfd, we have to use the fallback */ |
| 108 | return system_has_forkfd(); |
| 109 | #else |
| 110 | return false; |
| 111 | #endif |
| 112 | } |
| 113 | |
| 114 | #define CHILDREN_IN_SMALL_ARRAY 16 |
| 115 | #define CHILDREN_IN_BIG_ARRAY 256 |
| 116 | #define sizeofarray(array) (sizeof(array)/sizeof(array[0])) |
| 117 | #define EINTR_LOOP(ret, call) \ |
| 118 | do { \ |
| 119 | ret = call; \ |
| 120 | } while (ret == -1 && errno == EINTR) |
| 121 | |
| 122 | struct pipe_payload |
| 123 | { |
| 124 | struct forkfd_info info; |
| 125 | struct rusage rusage; |
| 126 | }; |
| 127 | |
| 128 | typedef struct process_info |
| 129 | { |
| 130 | ffd_atomic_int pid; |
| 131 | int deathPipe; |
| 132 | } ProcessInfo; |
| 133 | |
| 134 | struct BigArray; |
| 135 | typedef struct |
| 136 | { |
| 137 | ffd_atomic_pointer(struct BigArray) ; |
| 138 | ffd_atomic_int ; |
| 139 | } ; |
| 140 | |
| 141 | typedef struct BigArray |
| 142 | { |
| 143 | Header ; |
| 144 | ProcessInfo entries[CHILDREN_IN_BIG_ARRAY]; |
| 145 | } BigArray; |
| 146 | |
| 147 | typedef struct SmallArray |
| 148 | { |
| 149 | Header ; |
| 150 | ProcessInfo entries[CHILDREN_IN_SMALL_ARRAY]; |
| 151 | } SmallArray; |
| 152 | static SmallArray children; |
| 153 | |
| 154 | static struct sigaction old_sigaction; |
| 155 | static pthread_once_t forkfd_initialization = PTHREAD_ONCE_INIT; |
| 156 | static ffd_atomic_int forkfd_status = FFD_ATOMIC_INIT(0); |
| 157 | |
| 158 | #ifdef HAVE_BROKEN_WAITID |
| 159 | static int waitid_works = 0; |
| 160 | #else |
| 161 | static const int waitid_works = 1; |
| 162 | #endif |
| 163 | |
| 164 | static ProcessInfo *(Header *, ProcessInfo entries[], int maxCount) |
| 165 | { |
| 166 | /* we use ACQUIRE here because the signal handler might have released the PID */ |
| 167 | int busyCount = ffd_atomic_add_fetch(&header->busyCount, 1, FFD_ATOMIC_ACQUIRE); |
| 168 | if (busyCount <= maxCount) { |
| 169 | /* there's an available entry in this section, find it and take it */ |
| 170 | int i; |
| 171 | for (i = 0; i < maxCount; ++i) { |
| 172 | /* if the PID is 0, it's free; mark it as used by swapping it with -1 */ |
| 173 | int expected_pid = 0; |
| 174 | if (ffd_atomic_compare_exchange(&entries[i].pid, &expected_pid, |
| 175 | -1, FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 176 | return &entries[i]; |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | /* there isn't an available entry, undo our increment */ |
| 181 | (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELAXED); |
| 182 | return NULL; |
| 183 | } |
| 184 | |
| 185 | static ProcessInfo *(Header **) |
| 186 | { |
| 187 | Header * = &children.header; |
| 188 | |
| 189 | /* try to find an available entry in the small array first */ |
| 190 | ProcessInfo *info = |
| 191 | tryAllocateInSection(header: currentHeader, entries: children.entries, sizeofarray(children.entries)); |
| 192 | |
| 193 | /* go on to the next arrays */ |
| 194 | while (info == NULL) { |
| 195 | BigArray *array = ffd_atomic_load(¤tHeader->nextArray, FFD_ATOMIC_ACQUIRE); |
| 196 | if (array == NULL) { |
| 197 | /* allocate an array and try to use it */ |
| 198 | BigArray *allocatedArray = (BigArray *)calloc(nmemb: 1, size: sizeof(BigArray)); |
| 199 | if (allocatedArray == NULL) |
| 200 | return NULL; |
| 201 | |
| 202 | if (ffd_atomic_compare_exchange(¤tHeader->nextArray, &array, allocatedArray, |
| 203 | FFD_ATOMIC_RELEASE, FFD_ATOMIC_ACQUIRE)) { |
| 204 | /* success */ |
| 205 | array = allocatedArray; |
| 206 | } else { |
| 207 | /* failed, the atomic updated 'array' */ |
| 208 | free(ptr: allocatedArray); |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | currentHeader = &array->header; |
| 213 | info = tryAllocateInSection(header: currentHeader, entries: array->entries, sizeofarray(array->entries)); |
| 214 | } |
| 215 | |
| 216 | *header = currentHeader; |
| 217 | return info; |
| 218 | } |
| 219 | |
| 220 | #ifdef HAVE_WAITID |
| 221 | static int isChildReady(pid_t pid, siginfo_t *info) |
| 222 | { |
| 223 | info->si_pid = 0; |
| 224 | return waitid(idtype: P_PID, id: pid, infop: info, WEXITED | WNOHANG | WNOWAIT) == 0 && info->si_pid == pid; |
| 225 | } |
| 226 | #endif |
| 227 | |
| 228 | static void convertStatusToForkfdInfo(int status, struct forkfd_info *info) |
| 229 | { |
| 230 | if (WIFEXITED(status)) { |
| 231 | info->code = CLD_EXITED; |
| 232 | info->status = WEXITSTATUS(status); |
| 233 | } else if (WIFSIGNALED(status)) { |
| 234 | info->code = CLD_KILLED; |
| 235 | # ifdef WCOREDUMP |
| 236 | if (WCOREDUMP(status)) |
| 237 | info->code = CLD_DUMPED; |
| 238 | # endif |
| 239 | info->status = WTERMSIG(status); |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions) |
| 244 | { |
| 245 | int woptions = WEXITED; |
| 246 | if (ffdoptions & FFDW_NOWAIT) |
| 247 | woptions |= WNOWAIT; |
| 248 | if (ffdoptions & FFDW_NOHANG) |
| 249 | woptions |= WNOHANG; |
| 250 | return woptions; |
| 251 | } |
| 252 | |
| 253 | static int tryReaping(pid_t pid, struct pipe_payload *payload) |
| 254 | { |
| 255 | /* reap the child */ |
| 256 | #if defined(HAVE_WAIT4) |
| 257 | int status; |
| 258 | if (wait4(pid: pid, stat_loc: &status, WNOHANG, usage: &payload->rusage) <= 0) |
| 259 | return 0; |
| 260 | convertStatusToForkfdInfo(status, info: &payload->info); |
| 261 | #else |
| 262 | # if defined(HAVE_WAITID) |
| 263 | if (waitid_works) { |
| 264 | /* we have waitid(2), which gets us some payload values on some systems */ |
| 265 | siginfo_t info; |
| 266 | info.si_pid = 0; |
| 267 | int ret = waitid(P_PID, pid, &info, WEXITED | WNOHANG) == 0 && info.si_pid == pid; |
| 268 | if (!ret) |
| 269 | return ret; |
| 270 | |
| 271 | payload->info.code = info.si_code; |
| 272 | payload->info.status = info.si_status; |
| 273 | # ifdef __linux__ |
| 274 | payload->rusage.ru_utime.tv_sec = info.si_utime / CLOCKS_PER_SEC; |
| 275 | payload->rusage.ru_utime.tv_usec = info.si_utime % CLOCKS_PER_SEC; |
| 276 | payload->rusage.ru_stime.tv_sec = info.si_stime / CLOCKS_PER_SEC; |
| 277 | payload->rusage.ru_stime.tv_usec = info.si_stime % CLOCKS_PER_SEC; |
| 278 | # endif |
| 279 | return 1; |
| 280 | } |
| 281 | # endif // HAVE_WAITID |
| 282 | int status; |
| 283 | if (waitpid(pid, &status, WNOHANG) <= 0) |
| 284 | return 0; // child did not change state |
| 285 | convertStatusToForkfdInfo(status, &payload->info); |
| 286 | #endif // !HAVE_WAIT4 |
| 287 | |
| 288 | return 1; |
| 289 | } |
| 290 | |
| 291 | static void (Header *, ProcessInfo *entry) |
| 292 | { |
| 293 | entry->deathPipe = -1; |
| 294 | ffd_atomic_store(&entry->pid, 0, FFD_ATOMIC_RELEASE); |
| 295 | |
| 296 | (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELEASE); |
| 297 | assert(header->busyCount >= 0); |
| 298 | } |
| 299 | |
| 300 | static void notifyAndFreeInfo(Header *, ProcessInfo *entry, |
| 301 | const struct pipe_payload *payload) |
| 302 | { |
| 303 | ssize_t ret; |
| 304 | EINTR_LOOP(ret, write(entry->deathPipe, payload, sizeof(*payload))); |
| 305 | EINTR_LOOP(ret, close(entry->deathPipe)); |
| 306 | |
| 307 | freeInfo(header, entry); |
| 308 | } |
| 309 | |
| 310 | static void reapChildProcesses(); |
| 311 | static void sigchld_handler(int signum, siginfo_t *handler_info, void *handler_context) |
| 312 | { |
| 313 | /* |
| 314 | * This is a signal handler, so we need to be careful about which functions |
| 315 | * we can call. See the full, official listing in the POSIX.1-2008 |
| 316 | * specification at: |
| 317 | * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 |
| 318 | * |
| 319 | * The handler_info and handler_context parameters may not be valid, if |
| 320 | * we're a chained handler from another handler that did not use |
| 321 | * SA_SIGINFO. Therefore, we must obtain the siginfo ourselves directly by |
| 322 | * calling waitid. |
| 323 | * |
| 324 | * But we pass them anyway. Let's call the chained handler first, while |
| 325 | * those two arguments have a chance of being correct. |
| 326 | */ |
| 327 | if (old_sigaction.sa_handler != SIG_IGN && old_sigaction.sa_handler != SIG_DFL) { |
| 328 | if (old_sigaction.sa_flags & SA_SIGINFO) |
| 329 | old_sigaction.sa_sigaction(signum, handler_info, handler_context); |
| 330 | else |
| 331 | old_sigaction.sa_handler(signum); |
| 332 | } |
| 333 | |
| 334 | if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 1) { |
| 335 | int saved_errno = errno; |
| 336 | reapChildProcesses(); |
| 337 | errno = saved_errno; |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | static inline void reapChildProcesses() |
| 342 | { |
| 343 | /* is this one of our children? */ |
| 344 | BigArray *array; |
| 345 | siginfo_t info; |
| 346 | struct pipe_payload payload; |
| 347 | int i; |
| 348 | |
| 349 | memset(s: &info, c: 0, n: sizeof info); |
| 350 | memset(s: &payload, c: 0, n: sizeof payload); |
| 351 | |
| 352 | #ifdef HAVE_WAITID |
| 353 | if (waitid_works) { |
| 354 | /* be optimistic: try to see if we can get the child that exited */ |
| 355 | search_next_child: |
| 356 | /* waitid returns -1 ECHILD if there are no further children at all; |
| 357 | * it returns 0 and sets si_pid to 0 if there are children but they are not ready |
| 358 | * to be waited (we're passing WNOHANG). We should not get EINTR because |
| 359 | * we're passing WNOHANG and we should definitely not get EINVAL or anything else. |
| 360 | * That means we can actually ignore the return code and only inspect si_pid. |
| 361 | */ |
| 362 | info.si_pid = 0; |
| 363 | waitid(idtype: P_ALL, id: 0, infop: &info, WNOHANG | WNOWAIT | WEXITED); |
| 364 | if (info.si_pid == 0) { |
| 365 | /* there are no further un-waited-for children, so we can just exit. |
| 366 | */ |
| 367 | return; |
| 368 | } |
| 369 | |
| 370 | for (i = 0; i < (int)sizeofarray(children.entries); ++i) { |
| 371 | /* acquire the child first: swap the PID with -1 to indicate it's busy */ |
| 372 | int pid = info.si_pid; |
| 373 | if (ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1, |
| 374 | FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) { |
| 375 | /* this is our child, send notification and free up this entry */ |
| 376 | /* ### FIXME: what if tryReaping returns false? */ |
| 377 | if (tryReaping(pid, payload: &payload)) |
| 378 | notifyAndFreeInfo(header: &children.header, entry: &children.entries[i], payload: &payload); |
| 379 | goto search_next_child; |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | /* try the arrays */ |
| 384 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 385 | while (array != NULL) { |
| 386 | for (i = 0; i < (int)sizeofarray(array->entries); ++i) { |
| 387 | int pid = info.si_pid; |
| 388 | if (ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1, |
| 389 | FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) { |
| 390 | /* this is our child, send notification and free up this entry */ |
| 391 | /* ### FIXME: what if tryReaping returns false? */ |
| 392 | if (tryReaping(pid, payload: &payload)) |
| 393 | notifyAndFreeInfo(header: &array->header, entry: &array->entries[i], payload: &payload); |
| 394 | goto search_next_child; |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 399 | } |
| 400 | |
| 401 | /* if we got here, we couldn't find this child in our list. That means this child |
| 402 | * belongs to one of the chained SIGCHLD handlers. However, there might be another |
| 403 | * child that exited and does belong to us, so we need to check each one individually. |
| 404 | */ |
| 405 | } |
| 406 | #endif |
| 407 | |
| 408 | for (i = 0; i < (int)sizeofarray(children.entries); ++i) { |
| 409 | int pid = ffd_atomic_load(&children.entries[i].pid, FFD_ATOMIC_ACQUIRE); |
| 410 | if (pid <= 0) |
| 411 | continue; |
| 412 | #ifdef HAVE_WAITID |
| 413 | if (waitid_works) { |
| 414 | /* The child might have been reaped by the block above in another thread, |
| 415 | * so first check if it's ready and, if it is, lock it */ |
| 416 | if (!isChildReady(pid, info: &info) || |
| 417 | !ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1, |
| 418 | FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 419 | continue; |
| 420 | } |
| 421 | #endif |
| 422 | if (tryReaping(pid, payload: &payload)) { |
| 423 | /* this is our child, send notification and free up this entry */ |
| 424 | notifyAndFreeInfo(header: &children.header, entry: &children.entries[i], payload: &payload); |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | /* try the arrays */ |
| 429 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 430 | while (array != NULL) { |
| 431 | for (i = 0; i < (int)sizeofarray(array->entries); ++i) { |
| 432 | int pid = ffd_atomic_load(&array->entries[i].pid, FFD_ATOMIC_ACQUIRE); |
| 433 | if (pid <= 0) |
| 434 | continue; |
| 435 | #ifdef HAVE_WAITID |
| 436 | if (waitid_works) { |
| 437 | /* The child might have been reaped by the block above in another thread, |
| 438 | * so first check if it's ready and, if it is, lock it */ |
| 439 | if (!isChildReady(pid, info: &info) || |
| 440 | !ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1, |
| 441 | FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 442 | continue; |
| 443 | } |
| 444 | #endif |
| 445 | if (tryReaping(pid, payload: &payload)) { |
| 446 | /* this is our child, send notification and free up this entry */ |
| 447 | notifyAndFreeInfo(header: &array->header, entry: &array->entries[i], payload: &payload); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | static void ignore_sigpipe() |
| 456 | { |
| 457 | #ifdef O_NOSIGPIPE |
| 458 | static ffd_atomic_int done = FFD_ATOMIC_INIT(0); |
| 459 | if (ffd_atomic_load(&done, FFD_ATOMIC_RELAXED)) |
| 460 | return; |
| 461 | #endif |
| 462 | |
| 463 | struct sigaction action; |
| 464 | memset(s: &action, c: 0, n: sizeof action); |
| 465 | sigemptyset(set: &action.sa_mask); |
| 466 | action.sa_handler = SIG_IGN; |
| 467 | action.sa_flags = 0; |
| 468 | sigaction(SIGPIPE, act: &action, NULL); |
| 469 | |
| 470 | #ifdef O_NOSIGPIPE |
| 471 | ffd_atomic_store(&done, 1, FFD_ATOMIC_RELAXED); |
| 472 | #endif |
| 473 | } |
| 474 | |
| 475 | #if defined(__GNUC__) && (!defined(__FreeBSD__) || __FreeBSD__ < 10) |
| 476 | __attribute((destructor, unused)) static void cleanup(); |
| 477 | #endif |
| 478 | |
| 479 | static void cleanup() |
| 480 | { |
| 481 | BigArray *array; |
| 482 | /* This function is not thread-safe! |
| 483 | * It must only be called when the process is shutting down. |
| 484 | * At shutdown, we expect no one to be calling forkfd(), so we don't |
| 485 | * need to be thread-safe with what is done there. |
| 486 | * |
| 487 | * But SIGCHLD might be delivered to any thread, including this one. |
| 488 | * There's no way to prevent that. The correct solution would be to |
| 489 | * cooperatively delete. We don't do that. |
| 490 | */ |
| 491 | if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 0) |
| 492 | return; |
| 493 | |
| 494 | /* notify the handler that we're no longer in operation */ |
| 495 | ffd_atomic_store(&forkfd_status, 0, FFD_ATOMIC_RELAXED); |
| 496 | |
| 497 | /* free any arrays we might have */ |
| 498 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 499 | while (array != NULL) { |
| 500 | BigArray *next = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 501 | free(ptr: array); |
| 502 | array = next; |
| 503 | } |
| 504 | } |
| 505 | |
| 506 | static void forkfd_initialize() |
| 507 | { |
| 508 | #if defined(HAVE_BROKEN_WAITID) |
| 509 | pid_t pid = fork(); |
| 510 | if (pid == 0) { |
| 511 | _exit(0); |
| 512 | } else if (pid > 0) { |
| 513 | siginfo_t info; |
| 514 | waitid(P_ALL, 0, &info, WNOWAIT | WEXITED); |
| 515 | waitid_works = (info.si_pid != 0); |
| 516 | info.si_pid = 0; |
| 517 | |
| 518 | // now really reap the child |
| 519 | waitid(P_PID, pid, &info, WEXITED); |
| 520 | waitid_works = waitid_works && (info.si_pid != 0); |
| 521 | } |
| 522 | #endif |
| 523 | |
| 524 | /* install our signal handler */ |
| 525 | struct sigaction action; |
| 526 | memset(s: &action, c: 0, n: sizeof action); |
| 527 | sigemptyset(set: &action.sa_mask); |
| 528 | action.sa_flags = SA_NOCLDSTOP | SA_SIGINFO; |
| 529 | action.sa_sigaction = sigchld_handler; |
| 530 | |
| 531 | /* ### RACE CONDITION |
| 532 | * The sigaction function does a memcpy from an internal buffer |
| 533 | * to old_sigaction, which we use in the SIGCHLD handler. If a |
| 534 | * SIGCHLD is delivered before or during that memcpy, the handler will |
| 535 | * see an inconsistent state. |
| 536 | * |
| 537 | * There is no solution. pthread_sigmask doesn't work here because the |
| 538 | * signal could be delivered to another thread. |
| 539 | */ |
| 540 | sigaction(SIGCHLD, act: &action, oact: &old_sigaction); |
| 541 | |
| 542 | #ifndef O_NOSIGPIPE |
| 543 | /* disable SIGPIPE too */ |
| 544 | ignore_sigpipe(); |
| 545 | #endif |
| 546 | |
| 547 | #ifdef __GNUC__ |
| 548 | (void) cleanup; /* suppress unused static function warning */ |
| 549 | #else |
| 550 | atexit(cleanup); |
| 551 | #endif |
| 552 | |
| 553 | ffd_atomic_store(&forkfd_status, 1, FFD_ATOMIC_RELAXED); |
| 554 | } |
| 555 | |
| 556 | static int create_pipe(int filedes[], int flags) |
| 557 | { |
| 558 | int ret = -1; |
| 559 | #ifdef HAVE_PIPE2 |
| 560 | /* use pipe2(2) whenever possible, since it can thread-safely create a |
| 561 | * cloexec pair of pipes. Without it, we have a race condition setting |
| 562 | * FD_CLOEXEC |
| 563 | */ |
| 564 | |
| 565 | # ifdef O_NOSIGPIPE |
| 566 | /* try first with O_NOSIGPIPE */ |
| 567 | ret = pipe2(filedes, O_CLOEXEC | O_NOSIGPIPE); |
| 568 | if (ret == -1) { |
| 569 | /* O_NOSIGPIPE not supported, ignore SIGPIPE */ |
| 570 | ignore_sigpipe(); |
| 571 | } |
| 572 | # endif |
| 573 | if (ret == -1) |
| 574 | ret = pipe2(pipedes: filedes, O_CLOEXEC); |
| 575 | if (ret == -1) |
| 576 | return ret; |
| 577 | |
| 578 | if ((flags & FFD_CLOEXEC) == 0) |
| 579 | fcntl(fd: filedes[0], F_SETFD, 0); |
| 580 | #else |
| 581 | ret = pipe(filedes); |
| 582 | if (ret == -1) |
| 583 | return ret; |
| 584 | |
| 585 | fcntl(filedes[1], F_SETFD, FD_CLOEXEC); |
| 586 | if (flags & FFD_CLOEXEC) |
| 587 | fcntl(filedes[0], F_SETFD, FD_CLOEXEC); |
| 588 | #endif |
| 589 | if (flags & FFD_NONBLOCK) |
| 590 | fcntl(fd: filedes[0], F_SETFL, fcntl(fd: filedes[0], F_GETFL) | O_NONBLOCK); |
| 591 | return ret; |
| 592 | } |
| 593 | |
| 594 | #ifndef FORKFD_NO_FORKFD |
| 595 | /** |
| 596 | * @brief forkfd returns a file descriptor representing a child process |
| 597 | * @return a file descriptor, or -1 in case of failure |
| 598 | * |
| 599 | * forkfd() creates a file descriptor that can be used to be notified of when a |
| 600 | * child process exits. This file descriptor can be monitored using select(2), |
| 601 | * poll(2) or similar mechanisms. |
| 602 | * |
| 603 | * The @a flags parameter can contain the following values ORed to change the |
| 604 | * behaviour of forkfd(): |
| 605 | * |
| 606 | * @li @c FFD_NONBLOCK Set the O_NONBLOCK file status flag on the new open file |
| 607 | * descriptor. Using this flag saves extra calls to fnctl(2) to achieve the same |
| 608 | * result. |
| 609 | * |
| 610 | * @li @c FFD_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file |
| 611 | * descriptor. You probably want to set this flag, since forkfd() does not work |
| 612 | * if the original parent process dies. |
| 613 | * |
| 614 | * @li @c FFD_USE_FORK Tell forkfd() to actually call fork() instead of a |
| 615 | * different system implementation that may be available. On systems where a |
| 616 | * different implementation is available, its behavior may differ from that of |
| 617 | * fork(), such as not calling the functions registered with pthread_atfork(). |
| 618 | * If that's necessary, pass this flag. |
| 619 | * |
| 620 | * The file descriptor returned by forkfd() supports the following operations: |
| 621 | * |
| 622 | * @li read(2) When the child process exits, then the buffer supplied to |
| 623 | * read(2) is used to return information about the status of the child in the |
| 624 | * form of one @c siginfo_t structure. The buffer must be at least |
| 625 | * sizeof(siginfo_t) bytes. The return value of read(2) is the total number of |
| 626 | * bytes read. |
| 627 | * |
| 628 | * @li poll(2), select(2) (and similar) The file descriptor is readable (the |
| 629 | * select(2) readfds argument; the poll(2) POLLIN flag) if the child has exited |
| 630 | * or signalled via SIGCHLD. |
| 631 | * |
| 632 | * @li close(2) When the file descriptor is no longer required it should be closed. |
| 633 | */ |
| 634 | int forkfd(int flags, pid_t *ppid) |
| 635 | { |
| 636 | Header *; |
| 637 | ProcessInfo *info; |
| 638 | pid_t pid; |
| 639 | int fd = -1; |
| 640 | int death_pipe[2]; |
| 641 | int sync_pipe[2]; |
| 642 | int ret; |
| 643 | #ifdef __linux__ |
| 644 | int efd; |
| 645 | #endif |
| 646 | |
| 647 | if (disable_fork_fallback()) |
| 648 | flags &= ~FFD_USE_FORK; |
| 649 | |
| 650 | if ((flags & FFD_USE_FORK) == 0) { |
| 651 | fd = system_forkfd(flags, ppid, system: &ret); |
| 652 | if (ret || disable_fork_fallback()) |
| 653 | return fd; |
| 654 | } |
| 655 | |
| 656 | (void) pthread_once(once_control: &forkfd_initialization, init_routine: forkfd_initialize); |
| 657 | |
| 658 | info = allocateInfo(header: &header); |
| 659 | if (info == NULL) { |
| 660 | errno = ENOMEM; |
| 661 | return -1; |
| 662 | } |
| 663 | |
| 664 | /* create the pipes before we fork */ |
| 665 | if (create_pipe(filedes: death_pipe, flags) == -1) |
| 666 | goto err_free; /* failed to create the pipes, pass errno */ |
| 667 | |
| 668 | #ifdef HAVE_EVENTFD |
| 669 | /* try using an eventfd, which consumes less resources */ |
| 670 | efd = eventfd(count: 0, EFD_CLOEXEC); |
| 671 | if (efd == -1) |
| 672 | #endif |
| 673 | { |
| 674 | /* try a pipe */ |
| 675 | if (create_pipe(filedes: sync_pipe, FFD_CLOEXEC) == -1) { |
| 676 | /* failed both at eventfd and pipe; fail and pass errno */ |
| 677 | goto err_close; |
| 678 | } |
| 679 | } |
| 680 | |
| 681 | /* now fork */ |
| 682 | pid = fork(); |
| 683 | if (pid == -1) |
| 684 | goto err_close2; /* failed to fork, pass errno */ |
| 685 | if (ppid) |
| 686 | *ppid = pid; |
| 687 | |
| 688 | /* |
| 689 | * We need to store the child's PID in the info structure, so |
| 690 | * the SIGCHLD handler knows that this child is present and it |
| 691 | * knows the writing end of the pipe to pass information on. |
| 692 | * However, the child process could exit before we stored the |
| 693 | * information (or the handler could run for other children exiting). |
| 694 | * We prevent that from happening by blocking the child process in |
| 695 | * a read(2) until we're finished storing the information. |
| 696 | */ |
| 697 | if (pid == 0) { |
| 698 | /* this is the child process */ |
| 699 | /* first, wait for the all clear */ |
| 700 | #ifdef HAVE_EVENTFD |
| 701 | if (efd != -1) { |
| 702 | eventfd_t val64; |
| 703 | EINTR_LOOP(ret, eventfd_read(efd, &val64)); |
| 704 | EINTR_LOOP(ret, close(efd)); |
| 705 | } else |
| 706 | #endif |
| 707 | { |
| 708 | char c; |
| 709 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 710 | EINTR_LOOP(ret, read(sync_pipe[0], &c, sizeof c)); |
| 711 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 712 | } |
| 713 | |
| 714 | /* now close the pipes and return to the caller */ |
| 715 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 716 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 717 | fd = FFD_CHILD_PROCESS; |
| 718 | } else { |
| 719 | /* parent process */ |
| 720 | info->deathPipe = death_pipe[1]; |
| 721 | fd = death_pipe[0]; |
| 722 | ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE); |
| 723 | |
| 724 | /* release the child */ |
| 725 | #ifdef HAVE_EVENTFD |
| 726 | if (efd != -1) { |
| 727 | eventfd_t val64 = 42; |
| 728 | EINTR_LOOP(ret, eventfd_write(efd, val64)); |
| 729 | EINTR_LOOP(ret, close(efd)); |
| 730 | } else |
| 731 | #endif |
| 732 | { |
| 733 | /* |
| 734 | * Usually, closing would be enough to make read(2) return and the child process |
| 735 | * continue. We need to write here: another thread could be calling forkfd at the |
| 736 | * same time, which means auxpipe[1] might be open in another child process. |
| 737 | */ |
| 738 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 739 | EINTR_LOOP(ret, write(sync_pipe[1], "" , 1)); |
| 740 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | return fd; |
| 745 | |
| 746 | err_close2: |
| 747 | #ifdef HAVE_EVENTFD |
| 748 | if (efd != -1) { |
| 749 | EINTR_LOOP(ret, close(efd)); |
| 750 | } else |
| 751 | #endif |
| 752 | { |
| 753 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 754 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 755 | } |
| 756 | err_close: |
| 757 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 758 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 759 | err_free: |
| 760 | /* free the info pointer */ |
| 761 | freeInfo(header, entry: info); |
| 762 | return -1; |
| 763 | } |
| 764 | #endif // FORKFD_NO_FORKFD |
| 765 | |
| 766 | #if _POSIX_SPAWN > 0 && !defined(FORKFD_NO_SPAWNFD) |
| 767 | int spawnfd(int flags, pid_t *ppid, const char *path, const posix_spawn_file_actions_t *file_actions, |
| 768 | posix_spawnattr_t *attrp, char *const argv[], char *const envp[]) |
| 769 | { |
| 770 | Header *header; |
| 771 | ProcessInfo *info; |
| 772 | struct pipe_payload payload; |
| 773 | pid_t pid; |
| 774 | int death_pipe[2]; |
| 775 | int ret = -1; |
| 776 | /* we can only do work if we have a way to start the child in stopped mode; |
| 777 | * otherwise, we have a major race condition. */ |
| 778 | |
| 779 | assert(!system_has_forkfd()); |
| 780 | |
| 781 | (void) pthread_once(&forkfd_initialization, forkfd_initialize); |
| 782 | |
| 783 | info = allocateInfo(&header); |
| 784 | if (info == NULL) { |
| 785 | errno = ENOMEM; |
| 786 | goto out; |
| 787 | } |
| 788 | |
| 789 | /* create the pipe before we spawn */ |
| 790 | if (create_pipe(death_pipe, flags) == -1) |
| 791 | goto err_free; /* failed to create the pipes, pass errno */ |
| 792 | |
| 793 | /* start the process */ |
| 794 | if (flags & FFD_SPAWN_SEARCH_PATH) { |
| 795 | /* use posix_spawnp */ |
| 796 | if (posix_spawnp(&pid, path, file_actions, attrp, argv, envp) != 0) |
| 797 | goto err_close; |
| 798 | } else { |
| 799 | if (posix_spawn(&pid, path, file_actions, attrp, argv, envp) != 0) |
| 800 | goto err_close; |
| 801 | } |
| 802 | |
| 803 | if (ppid) |
| 804 | *ppid = pid; |
| 805 | |
| 806 | /* Store the child's PID in the info structure. |
| 807 | */ |
| 808 | info->deathPipe = death_pipe[1]; |
| 809 | ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE); |
| 810 | |
| 811 | /* check if the child has already exited */ |
| 812 | if (tryReaping(pid, &payload)) |
| 813 | notifyAndFreeInfo(header, info, &payload); |
| 814 | |
| 815 | ret = death_pipe[0]; |
| 816 | return ret; |
| 817 | |
| 818 | err_close: |
| 819 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 820 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 821 | |
| 822 | err_free: |
| 823 | /* free the info pointer */ |
| 824 | freeInfo(header, info); |
| 825 | |
| 826 | out: |
| 827 | return -1; |
| 828 | } |
| 829 | #endif // _POSIX_SPAWN && !FORKFD_NO_SPAWNFD |
| 830 | |
| 831 | int forkfd_wait4(int ffd, struct forkfd_info *info, int options, struct rusage *rusage) |
| 832 | { |
| 833 | struct pipe_payload payload; |
| 834 | int ret; |
| 835 | |
| 836 | if (system_has_forkfd()) { |
| 837 | /* if this is one of our pipes, not a procdesc/pidfd, we'll get an EBADF */ |
| 838 | ret = system_forkfd_wait(ffd, info, ffdwoptions: options, rusage); |
| 839 | if (disable_fork_fallback() || ret != -1 || errno != EBADF) |
| 840 | return ret; |
| 841 | } |
| 842 | |
| 843 | ret = read(fd: ffd, buf: &payload, nbytes: sizeof(payload)); |
| 844 | if (ret == -1) |
| 845 | return ret; /* pass errno, probably EINTR, EBADF or EWOULDBLOCK */ |
| 846 | |
| 847 | assert(ret == sizeof(payload)); |
| 848 | if (info) |
| 849 | *info = payload.info; |
| 850 | if (rusage) |
| 851 | *rusage = payload.rusage; |
| 852 | |
| 853 | return 0; /* success */ |
| 854 | } |
| 855 | |
| 856 | |
| 857 | int forkfd_close(int ffd) |
| 858 | { |
| 859 | return close(fd: ffd); |
| 860 | } |
| 861 | |
| 862 | #if defined(__FreeBSD__) && __FreeBSD__ >= 9 |
| 863 | # include "forkfd_freebsd.c" |
| 864 | #elif defined(__linux__) |
| 865 | # include "forkfd_linux.c" |
| 866 | #else |
| 867 | int system_has_forkfd() |
| 868 | { |
| 869 | return 0; |
| 870 | } |
| 871 | |
| 872 | int system_forkfd(int flags, pid_t *ppid, int *system) |
| 873 | { |
| 874 | (void)flags; |
| 875 | (void)ppid; |
| 876 | *system = 0; |
| 877 | return -1; |
| 878 | } |
| 879 | |
| 880 | int system_forkfd_wait(int ffd, struct forkfd_info *info, int options, struct rusage *rusage) |
| 881 | { |
| 882 | (void)ffd; |
| 883 | (void)info; |
| 884 | (void)options; |
| 885 | (void)rusage; |
| 886 | return -1; |
| 887 | } |
| 888 | #endif |
| 889 | |