| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2020 Intel Corporation. |
| 4 | ** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com |
| 5 | ** |
| 6 | ** Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | ** of this software and associated documentation files (the "Software"), to deal |
| 8 | ** in the Software without restriction, including without limitation the rights |
| 9 | ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | ** copies of the Software, and to permit persons to whom the Software is |
| 11 | ** furnished to do so, subject to the following conditions: |
| 12 | ** |
| 13 | ** The above copyright notice and this permission notice shall be included in |
| 14 | ** all copies or substantial portions of the Software. |
| 15 | ** |
| 16 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 22 | ** THE SOFTWARE. |
| 23 | ** |
| 24 | ****************************************************************************/ |
| 25 | |
| 26 | #ifndef _GNU_SOURCE |
| 27 | # define _GNU_SOURCE |
| 28 | #endif |
| 29 | |
| 30 | #include "forkfd.h" |
| 31 | |
| 32 | /* Macros fine-tuning the build: */ |
| 33 | //#define FORKFD_NO_FORKFD 1 /* disable the forkfd() function */ |
| 34 | //#define FORKFD_NO_SPAWNFD 1 /* disable the spawnfd() function */ |
| 35 | //#define FORKFD_DISABLE_FORK_FALLBACK 1 /* disable falling back to fork() from system_forkfd() */ |
| 36 | |
| 37 | #include <sys/types.h> |
| 38 | #if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) |
| 39 | # include <sys/param.h> |
| 40 | #endif |
| 41 | #include <sys/time.h> |
| 42 | #include <sys/resource.h> |
| 43 | #include <sys/wait.h> |
| 44 | #include <assert.h> |
| 45 | #include <errno.h> |
| 46 | #include <pthread.h> |
| 47 | #include <signal.h> |
| 48 | #include <stdlib.h> |
| 49 | #include <string.h> |
| 50 | #include <time.h> |
| 51 | #include <unistd.h> |
| 52 | |
| 53 | #ifdef __linux__ |
| 54 | # define HAVE_WAIT4 1 |
| 55 | # if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x208 && \ |
| 56 | (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201))) |
| 57 | # include <sys/eventfd.h> |
| 58 | # ifdef EFD_CLOEXEC |
| 59 | # define HAVE_EVENTFD 1 |
| 60 | # endif |
| 61 | # endif |
| 62 | # if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x209 && \ |
| 63 | (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201))) |
| 64 | # define HAVE_PIPE2 1 |
| 65 | # endif |
| 66 | #endif |
| 67 | |
| 68 | #if _POSIX_VERSION-0 >= 200809L || _XOPEN_VERSION-0 >= 500 |
| 69 | # define HAVE_WAITID 1 |
| 70 | #endif |
| 71 | #if !defined(WEXITED) || !defined(WNOWAIT) |
| 72 | # undef HAVE_WAITID |
| 73 | #endif |
| 74 | |
| 75 | #if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1300000) |
| 76 | # include <sys/eventfd.h> |
| 77 | # define HAVE_EVENTFD 1 |
| 78 | # define HAVE_WAITID 1 |
| 79 | #endif |
| 80 | #if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1000032) || \ |
| 81 | (defined(__OpenBSD__) && OpenBSD >= 201505) || \ |
| 82 | (defined(__NetBSD__) && __NetBSD_Version__ >= 600000000) |
| 83 | # define HAVE_PIPE2 1 |
| 84 | #endif |
| 85 | #if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \ |
| 86 | defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__) |
| 87 | # define HAVE_WAIT4 1 |
| 88 | #endif |
| 89 | |
| 90 | #if defined(__APPLE__) |
| 91 | /* Up until OS X 10.7, waitid(P_ALL, ...) will return success, but will not |
| 92 | * fill in the details of the dead child. That means waitid is not useful to us. |
| 93 | * Therefore, we only enable waitid() support if we're targetting OS X 10.8 or |
| 94 | * later. |
| 95 | */ |
| 96 | # include <Availability.h> |
| 97 | # include <AvailabilityMacros.h> |
| 98 | # if MAC_OS_X_VERSION_MIN_REQUIRED <= 1070 |
| 99 | # define HAVE_BROKEN_WAITID 1 |
| 100 | # endif |
| 101 | #endif |
| 102 | |
| 103 | #include "forkfd_atomic.h" |
| 104 | |
| 105 | static int system_has_forkfd(void); |
| 106 | static int system_forkfd(int flags, pid_t *ppid, int *system); |
| 107 | static int system_vforkfd(int flags, pid_t *ppid, int (*)(void *), void *, int *system); |
| 108 | static int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdwoptions, struct rusage *rusage); |
| 109 | |
| 110 | static int disable_fork_fallback(void) |
| 111 | { |
| 112 | #ifdef FORKFD_DISABLE_FORK_FALLBACK |
| 113 | /* if there's no system forkfd, we have to use the fallback */ |
| 114 | return system_has_forkfd(); |
| 115 | #else |
| 116 | return 0; |
| 117 | #endif |
| 118 | } |
| 119 | |
| 120 | #define CHILDREN_IN_SMALL_ARRAY 16 |
| 121 | #define CHILDREN_IN_BIG_ARRAY 256 |
| 122 | #define sizeofarray(array) (sizeof(array)/sizeof(array[0])) |
| 123 | #define EINTR_LOOP(ret, call) \ |
| 124 | do { \ |
| 125 | ret = call; \ |
| 126 | } while (ret == -1 && errno == EINTR) |
| 127 | |
| 128 | struct pipe_payload |
| 129 | { |
| 130 | struct forkfd_info info; |
| 131 | struct rusage rusage; |
| 132 | }; |
| 133 | |
| 134 | typedef struct process_info |
| 135 | { |
| 136 | ffd_atomic_int pid; |
| 137 | int deathPipe; |
| 138 | } ProcessInfo; |
| 139 | |
| 140 | struct BigArray; |
| 141 | typedef struct |
| 142 | { |
| 143 | ffd_atomic_pointer(struct BigArray) ; |
| 144 | ffd_atomic_int ; |
| 145 | } ; |
| 146 | |
| 147 | typedef struct BigArray |
| 148 | { |
| 149 | Header ; |
| 150 | ProcessInfo entries[CHILDREN_IN_BIG_ARRAY]; |
| 151 | } BigArray; |
| 152 | |
| 153 | typedef struct SmallArray |
| 154 | { |
| 155 | Header ; |
| 156 | ProcessInfo entries[CHILDREN_IN_SMALL_ARRAY]; |
| 157 | } SmallArray; |
| 158 | static SmallArray children; |
| 159 | |
| 160 | static struct sigaction old_sigaction; |
| 161 | static pthread_once_t forkfd_initialization = PTHREAD_ONCE_INIT; |
| 162 | static ffd_atomic_int forkfd_status = FFD_ATOMIC_INIT(0); |
| 163 | |
| 164 | #ifdef HAVE_BROKEN_WAITID |
| 165 | static int waitid_works = 0; |
| 166 | #else |
| 167 | static const int waitid_works = 1; |
| 168 | #endif |
| 169 | |
| 170 | static ProcessInfo *tryAllocateInSection(Header *, ProcessInfo entries[], int maxCount) |
| 171 | { |
| 172 | /* we use ACQUIRE here because the signal handler might have released the PID */ |
| 173 | int busyCount = ffd_atomic_add_fetch(&header->busyCount, 1, FFD_ATOMIC_ACQUIRE); |
| 174 | if (busyCount <= maxCount) { |
| 175 | /* there's an available entry in this section, find it and take it */ |
| 176 | int i; |
| 177 | for (i = 0; i < maxCount; ++i) { |
| 178 | /* if the PID is 0, it's free; mark it as used by swapping it with -1 */ |
| 179 | int expected_pid = 0; |
| 180 | if (ffd_atomic_compare_exchange(&entries[i].pid, &expected_pid, |
| 181 | -1, FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 182 | return &entries[i]; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | /* there isn't an available entry, undo our increment */ |
| 187 | (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELAXED); |
| 188 | return NULL; |
| 189 | } |
| 190 | |
| 191 | static ProcessInfo *allocateInfo(Header **) |
| 192 | { |
| 193 | Header * = &children.header; |
| 194 | |
| 195 | /* try to find an available entry in the small array first */ |
| 196 | ProcessInfo *info = |
| 197 | tryAllocateInSection(header: currentHeader, entries: children.entries, sizeofarray(children.entries)); |
| 198 | |
| 199 | /* go on to the next arrays */ |
| 200 | while (info == NULL) { |
| 201 | BigArray *array = ffd_atomic_load(¤tHeader->nextArray, FFD_ATOMIC_ACQUIRE); |
| 202 | if (array == NULL) { |
| 203 | /* allocate an array and try to use it */ |
| 204 | BigArray *allocatedArray = (BigArray *)calloc(nmemb: 1, size: sizeof(BigArray)); |
| 205 | if (allocatedArray == NULL) |
| 206 | return NULL; |
| 207 | |
| 208 | if (ffd_atomic_compare_exchange(¤tHeader->nextArray, &array, allocatedArray, |
| 209 | FFD_ATOMIC_RELEASE, FFD_ATOMIC_ACQUIRE)) { |
| 210 | /* success */ |
| 211 | array = allocatedArray; |
| 212 | } else { |
| 213 | /* failed, the atomic updated 'array' */ |
| 214 | free(ptr: allocatedArray); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | currentHeader = &array->header; |
| 219 | info = tryAllocateInSection(header: currentHeader, entries: array->entries, sizeofarray(array->entries)); |
| 220 | } |
| 221 | |
| 222 | *header = currentHeader; |
| 223 | return info; |
| 224 | } |
| 225 | |
| 226 | #ifdef HAVE_WAITID |
| 227 | static int isChildReady(pid_t pid, siginfo_t *info) |
| 228 | { |
| 229 | info->si_pid = 0; |
| 230 | return waitid(idtype: P_PID, id: pid, infop: info, WEXITED | WNOHANG | WNOWAIT) == 0 && info->si_pid == pid; |
| 231 | } |
| 232 | |
| 233 | #ifdef __GNUC__ |
| 234 | __attribute__((unused)) |
| 235 | #endif |
| 236 | static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions) |
| 237 | { |
| 238 | int woptions = WEXITED; |
| 239 | if (ffdoptions & FFDW_NOWAIT) |
| 240 | woptions |= WNOWAIT; |
| 241 | if (ffdoptions & FFDW_NOHANG) |
| 242 | woptions |= WNOHANG; |
| 243 | return woptions; |
| 244 | } |
| 245 | #endif |
| 246 | |
| 247 | static void convertStatusToForkfdInfo(int status, struct forkfd_info *info) |
| 248 | { |
| 249 | if (WIFEXITED(status)) { |
| 250 | info->code = CLD_EXITED; |
| 251 | info->status = WEXITSTATUS(status); |
| 252 | } else if (WIFSIGNALED(status)) { |
| 253 | info->code = CLD_KILLED; |
| 254 | # ifdef WCOREDUMP |
| 255 | if (WCOREDUMP(status)) |
| 256 | info->code = CLD_DUMPED; |
| 257 | # endif |
| 258 | info->status = WTERMSIG(status); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | static int tryReaping(pid_t pid, struct pipe_payload *payload) |
| 263 | { |
| 264 | /* reap the child */ |
| 265 | #if defined(HAVE_WAIT4) |
| 266 | int status; |
| 267 | if (wait4(pid: pid, stat_loc: &status, WNOHANG, usage: &payload->rusage) <= 0) |
| 268 | return 0; |
| 269 | convertStatusToForkfdInfo(status, info: &payload->info); |
| 270 | #else |
| 271 | # if defined(HAVE_WAITID) |
| 272 | if (waitid_works) { |
| 273 | /* we have waitid(2), which gets us some payload values on some systems */ |
| 274 | siginfo_t info; |
| 275 | info.si_pid = 0; |
| 276 | int ret = waitid(P_PID, pid, &info, WEXITED | WNOHANG) == 0 && info.si_pid == pid; |
| 277 | if (!ret) |
| 278 | return ret; |
| 279 | |
| 280 | payload->info.code = info.si_code; |
| 281 | payload->info.status = info.si_status; |
| 282 | # ifdef __linux__ |
| 283 | payload->rusage.ru_utime.tv_sec = info.si_utime / CLOCKS_PER_SEC; |
| 284 | payload->rusage.ru_utime.tv_usec = info.si_utime % CLOCKS_PER_SEC; |
| 285 | payload->rusage.ru_stime.tv_sec = info.si_stime / CLOCKS_PER_SEC; |
| 286 | payload->rusage.ru_stime.tv_usec = info.si_stime % CLOCKS_PER_SEC; |
| 287 | # endif |
| 288 | return 1; |
| 289 | } |
| 290 | # endif // HAVE_WAITID |
| 291 | int status; |
| 292 | if (waitpid(pid, &status, WNOHANG) <= 0) |
| 293 | return 0; // child did not change state |
| 294 | convertStatusToForkfdInfo(status, &payload->info); |
| 295 | #endif // !HAVE_WAIT4 |
| 296 | |
| 297 | return 1; |
| 298 | } |
| 299 | |
| 300 | static void freeInfo(Header *, ProcessInfo *entry) |
| 301 | { |
| 302 | entry->deathPipe = -1; |
| 303 | ffd_atomic_store(&entry->pid, 0, FFD_ATOMIC_RELEASE); |
| 304 | |
| 305 | (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELEASE); |
| 306 | assert(header->busyCount >= 0); |
| 307 | } |
| 308 | |
| 309 | static void notifyAndFreeInfo(Header *, ProcessInfo *entry, |
| 310 | const struct pipe_payload *payload) |
| 311 | { |
| 312 | ssize_t ret; |
| 313 | EINTR_LOOP(ret, write(entry->deathPipe, payload, sizeof(*payload))); |
| 314 | EINTR_LOOP(ret, close(entry->deathPipe)); |
| 315 | |
| 316 | freeInfo(header, entry); |
| 317 | } |
| 318 | |
| 319 | static void reapChildProcesses(); |
| 320 | static void sigchld_handler(int signum, siginfo_t *handler_info, void *handler_context) |
| 321 | { |
| 322 | /* |
| 323 | * This is a signal handler, so we need to be careful about which functions |
| 324 | * we can call. See the full, official listing in the POSIX.1-2008 |
| 325 | * specification at: |
| 326 | * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 |
| 327 | * |
| 328 | * The handler_info and handler_context parameters may not be valid, if |
| 329 | * we're a chained handler from another handler that did not use |
| 330 | * SA_SIGINFO. Therefore, we must obtain the siginfo ourselves directly by |
| 331 | * calling waitid. |
| 332 | * |
| 333 | * But we pass them anyway. Let's call the chained handler first, while |
| 334 | * those two arguments have a chance of being correct. |
| 335 | */ |
| 336 | if (old_sigaction.sa_handler != SIG_IGN && old_sigaction.sa_handler != SIG_DFL) { |
| 337 | if (old_sigaction.sa_flags & SA_SIGINFO) |
| 338 | old_sigaction.sa_sigaction(signum, handler_info, handler_context); |
| 339 | else |
| 340 | old_sigaction.sa_handler(signum); |
| 341 | } |
| 342 | |
| 343 | if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 1) { |
| 344 | int saved_errno = errno; |
| 345 | reapChildProcesses(); |
| 346 | errno = saved_errno; |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | static inline void reapChildProcesses() |
| 351 | { |
| 352 | /* is this one of our children? */ |
| 353 | BigArray *array; |
| 354 | siginfo_t info; |
| 355 | struct pipe_payload payload; |
| 356 | int i; |
| 357 | |
| 358 | memset(s: &info, c: 0, n: sizeof info); |
| 359 | memset(s: &payload, c: 0, n: sizeof payload); |
| 360 | |
| 361 | #ifdef HAVE_WAITID |
| 362 | if (waitid_works) { |
| 363 | /* be optimistic: try to see if we can get the child that exited */ |
| 364 | search_next_child: |
| 365 | /* waitid returns -1 ECHILD if there are no further children at all; |
| 366 | * it returns 0 and sets si_pid to 0 if there are children but they are not ready |
| 367 | * to be waited (we're passing WNOHANG). We should not get EINTR because |
| 368 | * we're passing WNOHANG and we should definitely not get EINVAL or anything else. |
| 369 | * That means we can actually ignore the return code and only inspect si_pid. |
| 370 | */ |
| 371 | info.si_pid = 0; |
| 372 | waitid(idtype: P_ALL, id: 0, infop: &info, WNOHANG | WNOWAIT | WEXITED); |
| 373 | if (info.si_pid == 0) { |
| 374 | /* there are no further un-waited-for children, so we can just exit. |
| 375 | */ |
| 376 | return; |
| 377 | } |
| 378 | |
| 379 | for (i = 0; i < (int)sizeofarray(children.entries); ++i) { |
| 380 | /* acquire the child first: swap the PID with -1 to indicate it's busy */ |
| 381 | int pid = info.si_pid; |
| 382 | if (ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1, |
| 383 | FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) { |
| 384 | /* this is our child, send notification and free up this entry */ |
| 385 | /* ### FIXME: what if tryReaping returns false? */ |
| 386 | if (tryReaping(pid, payload: &payload)) |
| 387 | notifyAndFreeInfo(header: &children.header, entry: &children.entries[i], payload: &payload); |
| 388 | goto search_next_child; |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | /* try the arrays */ |
| 393 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 394 | while (array != NULL) { |
| 395 | for (i = 0; i < (int)sizeofarray(array->entries); ++i) { |
| 396 | int pid = info.si_pid; |
| 397 | if (ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1, |
| 398 | FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) { |
| 399 | /* this is our child, send notification and free up this entry */ |
| 400 | /* ### FIXME: what if tryReaping returns false? */ |
| 401 | if (tryReaping(pid, payload: &payload)) |
| 402 | notifyAndFreeInfo(header: &array->header, entry: &array->entries[i], payload: &payload); |
| 403 | goto search_next_child; |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 408 | } |
| 409 | |
| 410 | /* if we got here, we couldn't find this child in our list. That means this child |
| 411 | * belongs to one of the chained SIGCHLD handlers. However, there might be another |
| 412 | * child that exited and does belong to us, so we need to check each one individually. |
| 413 | */ |
| 414 | } |
| 415 | #endif |
| 416 | |
| 417 | for (i = 0; i < (int)sizeofarray(children.entries); ++i) { |
| 418 | int pid = ffd_atomic_load(&children.entries[i].pid, FFD_ATOMIC_ACQUIRE); |
| 419 | if (pid <= 0) |
| 420 | continue; |
| 421 | #ifdef HAVE_WAITID |
| 422 | if (waitid_works) { |
| 423 | /* The child might have been reaped by the block above in another thread, |
| 424 | * so first check if it's ready and, if it is, lock it */ |
| 425 | if (!isChildReady(pid, info: &info) || |
| 426 | !ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1, |
| 427 | FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 428 | continue; |
| 429 | } |
| 430 | #endif |
| 431 | if (tryReaping(pid, payload: &payload)) { |
| 432 | /* this is our child, send notification and free up this entry */ |
| 433 | notifyAndFreeInfo(header: &children.header, entry: &children.entries[i], payload: &payload); |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | /* try the arrays */ |
| 438 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 439 | while (array != NULL) { |
| 440 | for (i = 0; i < (int)sizeofarray(array->entries); ++i) { |
| 441 | int pid = ffd_atomic_load(&array->entries[i].pid, FFD_ATOMIC_ACQUIRE); |
| 442 | if (pid <= 0) |
| 443 | continue; |
| 444 | #ifdef HAVE_WAITID |
| 445 | if (waitid_works) { |
| 446 | /* The child might have been reaped by the block above in another thread, |
| 447 | * so first check if it's ready and, if it is, lock it */ |
| 448 | if (!isChildReady(pid, info: &info) || |
| 449 | !ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1, |
| 450 | FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED)) |
| 451 | continue; |
| 452 | } |
| 453 | #endif |
| 454 | if (tryReaping(pid, payload: &payload)) { |
| 455 | /* this is our child, send notification and free up this entry */ |
| 456 | notifyAndFreeInfo(header: &array->header, entry: &array->entries[i], payload: &payload); |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | static void ignore_sigpipe() |
| 465 | { |
| 466 | #ifdef O_NOSIGPIPE |
| 467 | static ffd_atomic_int done = FFD_ATOMIC_INIT(0); |
| 468 | if (ffd_atomic_load(&done, FFD_ATOMIC_RELAXED)) |
| 469 | return; |
| 470 | #endif |
| 471 | |
| 472 | struct sigaction action; |
| 473 | memset(s: &action, c: 0, n: sizeof action); |
| 474 | sigemptyset(set: &action.sa_mask); |
| 475 | action.sa_handler = SIG_IGN; |
| 476 | action.sa_flags = 0; |
| 477 | sigaction(SIGPIPE, act: &action, NULL); |
| 478 | |
| 479 | #ifdef O_NOSIGPIPE |
| 480 | ffd_atomic_store(&done, 1, FFD_ATOMIC_RELAXED); |
| 481 | #endif |
| 482 | } |
| 483 | |
| 484 | #if defined(__GNUC__) && (!defined(__FreeBSD__) || __FreeBSD__ < 10) |
| 485 | __attribute((destructor, unused)) static void cleanup(); |
| 486 | #endif |
| 487 | |
| 488 | static void cleanup() |
| 489 | { |
| 490 | BigArray *array; |
| 491 | /* This function is not thread-safe! |
| 492 | * It must only be called when the process is shutting down. |
| 493 | * At shutdown, we expect no one to be calling forkfd(), so we don't |
| 494 | * need to be thread-safe with what is done there. |
| 495 | * |
| 496 | * But SIGCHLD might be delivered to any thread, including this one. |
| 497 | * There's no way to prevent that. The correct solution would be to |
| 498 | * cooperatively delete. We don't do that. |
| 499 | */ |
| 500 | if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 0) |
| 501 | return; |
| 502 | |
| 503 | /* notify the handler that we're no longer in operation */ |
| 504 | ffd_atomic_store(&forkfd_status, 0, FFD_ATOMIC_RELAXED); |
| 505 | |
| 506 | /* free any arrays we might have */ |
| 507 | array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 508 | while (array != NULL) { |
| 509 | BigArray *next = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE); |
| 510 | free(ptr: array); |
| 511 | array = next; |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | static void forkfd_initialize() |
| 516 | { |
| 517 | #if defined(HAVE_BROKEN_WAITID) |
| 518 | pid_t pid = fork(); |
| 519 | if (pid == 0) { |
| 520 | _exit(0); |
| 521 | } else if (pid > 0) { |
| 522 | siginfo_t info; |
| 523 | waitid(P_ALL, 0, &info, WNOWAIT | WEXITED); |
| 524 | waitid_works = (info.si_pid != 0); |
| 525 | info.si_pid = 0; |
| 526 | |
| 527 | // now really reap the child |
| 528 | waitid(P_PID, pid, &info, WEXITED); |
| 529 | waitid_works = waitid_works && (info.si_pid != 0); |
| 530 | } |
| 531 | #endif |
| 532 | |
| 533 | /* install our signal handler */ |
| 534 | struct sigaction action; |
| 535 | memset(s: &action, c: 0, n: sizeof action); |
| 536 | sigemptyset(set: &action.sa_mask); |
| 537 | action.sa_flags = SA_NOCLDSTOP | SA_SIGINFO; |
| 538 | action.sa_sigaction = sigchld_handler; |
| 539 | |
| 540 | /* ### RACE CONDITION |
| 541 | * The sigaction function does a memcpy from an internal buffer |
| 542 | * to old_sigaction, which we use in the SIGCHLD handler. If a |
| 543 | * SIGCHLD is delivered before or during that memcpy, the handler will |
| 544 | * see an inconsistent state. |
| 545 | * |
| 546 | * There is no solution. pthread_sigmask doesn't work here because the |
| 547 | * signal could be delivered to another thread. |
| 548 | */ |
| 549 | sigaction(SIGCHLD, act: &action, oact: &old_sigaction); |
| 550 | |
| 551 | #ifndef O_NOSIGPIPE |
| 552 | /* disable SIGPIPE too */ |
| 553 | ignore_sigpipe(); |
| 554 | #endif |
| 555 | |
| 556 | #ifdef __GNUC__ |
| 557 | (void) cleanup; /* suppress unused static function warning */ |
| 558 | #else |
| 559 | atexit(cleanup); |
| 560 | #endif |
| 561 | |
| 562 | ffd_atomic_store(&forkfd_status, 1, FFD_ATOMIC_RELAXED); |
| 563 | } |
| 564 | |
| 565 | static int create_pipe(int filedes[], int flags) |
| 566 | { |
| 567 | int ret = -1; |
| 568 | #ifdef HAVE_PIPE2 |
| 569 | /* use pipe2(2) whenever possible, since it can thread-safely create a |
| 570 | * cloexec pair of pipes. Without it, we have a race condition setting |
| 571 | * FD_CLOEXEC |
| 572 | */ |
| 573 | |
| 574 | # ifdef O_NOSIGPIPE |
| 575 | /* try first with O_NOSIGPIPE */ |
| 576 | ret = pipe2(filedes, O_CLOEXEC | O_NOSIGPIPE); |
| 577 | if (ret == -1) { |
| 578 | /* O_NOSIGPIPE not supported, ignore SIGPIPE */ |
| 579 | ignore_sigpipe(); |
| 580 | } |
| 581 | # endif |
| 582 | if (ret == -1) |
| 583 | ret = pipe2(pipedes: filedes, O_CLOEXEC); |
| 584 | if (ret == -1) |
| 585 | return ret; |
| 586 | |
| 587 | if ((flags & FFD_CLOEXEC) == 0) |
| 588 | fcntl(fd: filedes[0], F_SETFD, 0); |
| 589 | #else |
| 590 | ret = pipe(filedes); |
| 591 | if (ret == -1) |
| 592 | return ret; |
| 593 | |
| 594 | fcntl(filedes[1], F_SETFD, FD_CLOEXEC); |
| 595 | if (flags & FFD_CLOEXEC) |
| 596 | fcntl(filedes[0], F_SETFD, FD_CLOEXEC); |
| 597 | #endif |
| 598 | if (flags & FFD_NONBLOCK) |
| 599 | fcntl(fd: filedes[0], F_SETFL, fcntl(fd: filedes[0], F_GETFL) | O_NONBLOCK); |
| 600 | return ret; |
| 601 | } |
| 602 | |
| 603 | #ifndef FORKFD_NO_FORKFD |
| 604 | static int forkfd_fork_fallback(int flags, pid_t *ppid) |
| 605 | { |
| 606 | Header *; |
| 607 | ProcessInfo *info; |
| 608 | pid_t pid; |
| 609 | int fd = -1; |
| 610 | int death_pipe[2]; |
| 611 | int sync_pipe[2]; |
| 612 | int ret; |
| 613 | int efd = -1; |
| 614 | |
| 615 | (void) pthread_once(once_control: &forkfd_initialization, init_routine: forkfd_initialize); |
| 616 | |
| 617 | info = allocateInfo(header: &header); |
| 618 | if (info == NULL) { |
| 619 | errno = ENOMEM; |
| 620 | return -1; |
| 621 | } |
| 622 | |
| 623 | /* create the pipes before we fork */ |
| 624 | if (create_pipe(filedes: death_pipe, flags) == -1) |
| 625 | goto err_free; /* failed to create the pipes, pass errno */ |
| 626 | |
| 627 | #ifdef HAVE_EVENTFD |
| 628 | /* try using an eventfd, which consumes less resources */ |
| 629 | efd = eventfd(count: 0, EFD_CLOEXEC); |
| 630 | #endif |
| 631 | if (efd == -1) { |
| 632 | /* try a pipe */ |
| 633 | if (create_pipe(filedes: sync_pipe, FFD_CLOEXEC) == -1) { |
| 634 | /* failed both at eventfd and pipe; fail and pass errno */ |
| 635 | goto err_close; |
| 636 | } |
| 637 | } |
| 638 | |
| 639 | /* now fork */ |
| 640 | pid = fork(); |
| 641 | if (pid == -1) |
| 642 | goto err_close2; /* failed to fork, pass errno */ |
| 643 | if (ppid) |
| 644 | *ppid = pid; |
| 645 | |
| 646 | /* |
| 647 | * We need to store the child's PID in the info structure, so |
| 648 | * the SIGCHLD handler knows that this child is present and it |
| 649 | * knows the writing end of the pipe to pass information on. |
| 650 | * However, the child process could exit before we stored the |
| 651 | * information (or the handler could run for other children exiting). |
| 652 | * We prevent that from happening by blocking the child process in |
| 653 | * a read(2) until we're finished storing the information. |
| 654 | */ |
| 655 | if (pid == 0) { |
| 656 | /* this is the child process */ |
| 657 | /* first, wait for the all clear */ |
| 658 | if (efd != -1) { |
| 659 | #ifdef HAVE_EVENTFD |
| 660 | eventfd_t val64; |
| 661 | EINTR_LOOP(ret, eventfd_read(efd, &val64)); |
| 662 | EINTR_LOOP(ret, close(efd)); |
| 663 | #endif |
| 664 | } else { |
| 665 | char c; |
| 666 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 667 | EINTR_LOOP(ret, read(sync_pipe[0], &c, sizeof c)); |
| 668 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 669 | } |
| 670 | |
| 671 | /* now close the pipes and return to the caller */ |
| 672 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 673 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 674 | fd = FFD_CHILD_PROCESS; |
| 675 | } else { |
| 676 | /* parent process */ |
| 677 | info->deathPipe = death_pipe[1]; |
| 678 | fd = death_pipe[0]; |
| 679 | ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE); |
| 680 | |
| 681 | /* release the child */ |
| 682 | #ifdef HAVE_EVENTFD |
| 683 | if (efd != -1) { |
| 684 | eventfd_t val64 = 42; |
| 685 | EINTR_LOOP(ret, eventfd_write(efd, val64)); |
| 686 | EINTR_LOOP(ret, close(efd)); |
| 687 | } else |
| 688 | #endif |
| 689 | { |
| 690 | /* |
| 691 | * Usually, closing would be enough to make read(2) return and the child process |
| 692 | * continue. We need to write here: another thread could be calling forkfd at the |
| 693 | * same time, which means auxpipe[1] might be open in another child process. |
| 694 | */ |
| 695 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 696 | EINTR_LOOP(ret, write(sync_pipe[1], "" , 1)); |
| 697 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | return fd; |
| 702 | |
| 703 | err_close2: |
| 704 | #ifdef HAVE_EVENTFD |
| 705 | if (efd != -1) { |
| 706 | EINTR_LOOP(ret, close(efd)); |
| 707 | } else |
| 708 | #endif |
| 709 | { |
| 710 | EINTR_LOOP(ret, close(sync_pipe[0])); |
| 711 | EINTR_LOOP(ret, close(sync_pipe[1])); |
| 712 | } |
| 713 | err_close: |
| 714 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 715 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 716 | err_free: |
| 717 | /* free the info pointer */ |
| 718 | freeInfo(header, entry: info); |
| 719 | return -1; |
| 720 | } |
| 721 | |
| 722 | /** |
| 723 | * @brief forkfd returns a file descriptor representing a child process |
| 724 | * @return a file descriptor, or -1 in case of failure |
| 725 | * |
| 726 | * forkfd() creates a file descriptor that can be used to be notified of when a |
| 727 | * child process exits. This file descriptor can be monitored using select(2), |
| 728 | * poll(2) or similar mechanisms. |
| 729 | * |
| 730 | * The @a flags parameter can contain the following values ORed to change the |
| 731 | * behaviour of forkfd(): |
| 732 | * |
| 733 | * @li @c FFD_NONBLOCK Set the O_NONBLOCK file status flag on the new open file |
| 734 | * descriptor. Using this flag saves extra calls to fnctl(2) to achieve the same |
| 735 | * result. |
| 736 | * |
| 737 | * @li @c FFD_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file |
| 738 | * descriptor. You probably want to set this flag, since forkfd() does not work |
| 739 | * if the original parent process dies. |
| 740 | * |
| 741 | * @li @c FFD_USE_FORK Tell forkfd() to actually call fork() instead of a |
| 742 | * different system implementation that may be available. On systems where a |
| 743 | * different implementation is available, its behavior may differ from that of |
| 744 | * fork(), such as not calling the functions registered with pthread_atfork(). |
| 745 | * If that's necessary, pass this flag. |
| 746 | * |
| 747 | * The file descriptor returned by forkfd() supports the following operations: |
| 748 | * |
| 749 | * @li read(2) When the child process exits, then the buffer supplied to |
| 750 | * read(2) is used to return information about the status of the child in the |
| 751 | * form of one @c siginfo_t structure. The buffer must be at least |
| 752 | * sizeof(siginfo_t) bytes. The return value of read(2) is the total number of |
| 753 | * bytes read. |
| 754 | * |
| 755 | * @li poll(2), select(2) (and similar) The file descriptor is readable (the |
| 756 | * select(2) readfds argument; the poll(2) POLLIN flag) if the child has exited |
| 757 | * or signalled via SIGCHLD. |
| 758 | * |
| 759 | * @li close(2) When the file descriptor is no longer required it should be closed. |
| 760 | */ |
| 761 | int forkfd(int flags, pid_t *ppid) |
| 762 | { |
| 763 | int fd; |
| 764 | if (disable_fork_fallback()) |
| 765 | flags &= ~FFD_USE_FORK; |
| 766 | |
| 767 | if ((flags & FFD_USE_FORK) == 0) { |
| 768 | int system_forkfd_works; |
| 769 | fd = system_forkfd(flags, ppid, system: &system_forkfd_works); |
| 770 | if (system_forkfd_works || disable_fork_fallback()) |
| 771 | return fd; |
| 772 | } |
| 773 | |
| 774 | return forkfd_fork_fallback(flags, ppid); |
| 775 | } |
| 776 | |
| 777 | /** |
| 778 | * @brief vforkfd returns a file descriptor representing a child process |
| 779 | * @return a file descriptor, or -1 in case of failure |
| 780 | * |
| 781 | * vforkfd() operates in the same way as forkfd() and the @a flags and @a ppid |
| 782 | * arguments are the same. See the forkfd() documentation for details on the |
| 783 | * possible values and information on the returned file descriptor. |
| 784 | * |
| 785 | * This function does not return @c FFD_CHILD_PROCESS. Instead, the function @a |
| 786 | * childFn is called in the child process with the @a token parameter as |
| 787 | * argument. If that function returns, its return value will be passed to |
| 788 | * _exit(2). |
| 789 | * |
| 790 | * This function differs from forkfd() the same way that vfork() differs from |
| 791 | * fork(): the parent process may be suspended while the child is has not yet |
| 792 | * called _exit(2) or execve(2). Additionally, on some systems, the child |
| 793 | * process may share memory with the parent process the same way an auxiliary |
| 794 | * thread would, so extreme care should be employed on what functions the child |
| 795 | * process uses before termination. |
| 796 | * |
| 797 | * The @c FFD_USE_FORK flag retains its behavior as described in the forkfd() |
| 798 | * documentation, including that of actually using fork(2) and no other |
| 799 | * implementation. |
| 800 | * |
| 801 | * Currently, only on Linux will this function have any behavior different from |
| 802 | * forkfd(). In all other systems, it is equivalent to the following code: |
| 803 | * |
| 804 | * @code |
| 805 | * int ffd = forkfd(flags, &pid); |
| 806 | * if (ffd == FFD_CHILD_PROCESS) |
| 807 | * _exit(childFn(token)); |
| 808 | * @endcode |
| 809 | */ |
| 810 | int vforkfd(int flags, pid_t *ppid, int (*childFn)(void *), void *token) |
| 811 | { |
| 812 | int fd; |
| 813 | if ((flags & FFD_USE_FORK) == 0) { |
| 814 | int system_forkfd_works; |
| 815 | fd = system_vforkfd(flags, ppid, childFn, token, system: &system_forkfd_works); |
| 816 | if (system_forkfd_works || disable_fork_fallback()) |
| 817 | return fd; |
| 818 | } |
| 819 | |
| 820 | fd = forkfd_fork_fallback(flags, ppid); |
| 821 | if (fd == FFD_CHILD_PROCESS) { |
| 822 | /* child process */ |
| 823 | _exit(status: childFn(token)); |
| 824 | } |
| 825 | return fd; |
| 826 | } |
| 827 | #endif // FORKFD_NO_FORKFD |
| 828 | |
| 829 | #if _POSIX_SPAWN > 0 && !defined(FORKFD_NO_SPAWNFD) |
| 830 | int spawnfd(int flags, pid_t *ppid, const char *path, const posix_spawn_file_actions_t *file_actions, |
| 831 | posix_spawnattr_t *attrp, char *const argv[], char *const envp[]) |
| 832 | { |
| 833 | Header *header; |
| 834 | ProcessInfo *info; |
| 835 | struct pipe_payload payload; |
| 836 | pid_t pid; |
| 837 | int death_pipe[2]; |
| 838 | int ret = -1; |
| 839 | /* we can only do work if we have a way to start the child in stopped mode; |
| 840 | * otherwise, we have a major race condition. */ |
| 841 | |
| 842 | assert(!system_has_forkfd()); |
| 843 | |
| 844 | (void) pthread_once(&forkfd_initialization, forkfd_initialize); |
| 845 | |
| 846 | info = allocateInfo(&header); |
| 847 | if (info == NULL) { |
| 848 | errno = ENOMEM; |
| 849 | goto out; |
| 850 | } |
| 851 | |
| 852 | /* create the pipe before we spawn */ |
| 853 | if (create_pipe(death_pipe, flags) == -1) |
| 854 | goto err_free; /* failed to create the pipes, pass errno */ |
| 855 | |
| 856 | /* start the process */ |
| 857 | if (flags & FFD_SPAWN_SEARCH_PATH) { |
| 858 | /* use posix_spawnp */ |
| 859 | if (posix_spawnp(&pid, path, file_actions, attrp, argv, envp) != 0) |
| 860 | goto err_close; |
| 861 | } else { |
| 862 | if (posix_spawn(&pid, path, file_actions, attrp, argv, envp) != 0) |
| 863 | goto err_close; |
| 864 | } |
| 865 | |
| 866 | if (ppid) |
| 867 | *ppid = pid; |
| 868 | |
| 869 | /* Store the child's PID in the info structure. |
| 870 | */ |
| 871 | info->deathPipe = death_pipe[1]; |
| 872 | ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE); |
| 873 | |
| 874 | /* check if the child has already exited */ |
| 875 | if (tryReaping(pid, &payload)) |
| 876 | notifyAndFreeInfo(header, info, &payload); |
| 877 | |
| 878 | ret = death_pipe[0]; |
| 879 | return ret; |
| 880 | |
| 881 | err_close: |
| 882 | EINTR_LOOP(ret, close(death_pipe[0])); |
| 883 | EINTR_LOOP(ret, close(death_pipe[1])); |
| 884 | |
| 885 | err_free: |
| 886 | /* free the info pointer */ |
| 887 | freeInfo(header, info); |
| 888 | |
| 889 | out: |
| 890 | return -1; |
| 891 | } |
| 892 | #endif // _POSIX_SPAWN && !FORKFD_NO_SPAWNFD |
| 893 | |
| 894 | int forkfd_wait4(int ffd, struct forkfd_info *info, int options, struct rusage *rusage) |
| 895 | { |
| 896 | struct pipe_payload payload; |
| 897 | int ret; |
| 898 | |
| 899 | if (system_has_forkfd()) { |
| 900 | /* if this is one of our pipes, not a procdesc/pidfd, we'll get an EBADF */ |
| 901 | ret = system_forkfd_wait(ffd, info, ffdwoptions: options, rusage); |
| 902 | if (disable_fork_fallback() || ret != -1 || errno != EBADF) |
| 903 | return ret; |
| 904 | } |
| 905 | |
| 906 | ret = read(fd: ffd, buf: &payload, nbytes: sizeof(payload)); |
| 907 | if (ret == -1) |
| 908 | return ret; /* pass errno, probably EINTR, EBADF or EWOULDBLOCK */ |
| 909 | |
| 910 | assert(ret == sizeof(payload)); |
| 911 | if (info) |
| 912 | *info = payload.info; |
| 913 | if (rusage) |
| 914 | *rusage = payload.rusage; |
| 915 | |
| 916 | return 0; /* success */ |
| 917 | } |
| 918 | |
| 919 | |
| 920 | int forkfd_close(int ffd) |
| 921 | { |
| 922 | return close(fd: ffd); |
| 923 | } |
| 924 | |
| 925 | #if defined(__FreeBSD__) && __FreeBSD__ >= 9 |
| 926 | # include "forkfd_freebsd.c" |
| 927 | #elif defined(__linux__) |
| 928 | # include "forkfd_linux.c" |
| 929 | #else |
| 930 | int system_has_forkfd() |
| 931 | { |
| 932 | return 0; |
| 933 | } |
| 934 | |
| 935 | int system_forkfd(int flags, pid_t *ppid, int *system) |
| 936 | { |
| 937 | (void)flags; |
| 938 | (void)ppid; |
| 939 | *system = 0; |
| 940 | return -1; |
| 941 | } |
| 942 | |
| 943 | int system_forkfd_wait(int ffd, struct forkfd_info *info, int options, struct rusage *rusage) |
| 944 | { |
| 945 | (void)ffd; |
| 946 | (void)info; |
| 947 | (void)options; |
| 948 | (void)rusage; |
| 949 | return -1; |
| 950 | } |
| 951 | #endif |
| 952 | #ifndef SYSTEM_FORKFD_CAN_VFORK |
| 953 | int system_vforkfd(int flags, pid_t *ppid, int (*childFn)(void *), void *token, int *system) |
| 954 | { |
| 955 | /* we don't have a way to vfork(), so fake it */ |
| 956 | int ret = system_forkfd(flags, ppid, system); |
| 957 | if (ret == FFD_CHILD_PROCESS) { |
| 958 | /* child process */ |
| 959 | _exit(childFn(token)); |
| 960 | } |
| 961 | return ret; |
| 962 | } |
| 963 | #endif |
| 964 | #undef SYSTEM_FORKFD_CAN_VFORK |
| 965 | |