1 | /* Guts of both `select' and `poll' for Hurd. |
2 | Copyright (C) 1991-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sys/time.h> |
20 | #include <sys/types.h> |
21 | #include <sys/poll.h> |
22 | #include <hurd.h> |
23 | #include <hurd/fd.h> |
24 | #include <hurd/io_request.h> |
25 | #include <stdlib.h> |
26 | #include <string.h> |
27 | #include <assert.h> |
28 | #include <stdint.h> |
29 | #include <limits.h> |
30 | #include <time.h> |
31 | #include <sysdep-cancel.h> |
32 | |
33 | /* All user select types. */ |
34 | #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG) |
35 | |
36 | /* Used to record that a particular select rpc returned. Must be distinct |
37 | from SELECT_ALL (which better not have the high bit set). */ |
38 | #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL) |
39 | #define SELECT_ERROR (SELECT_RETURNED << 1) |
40 | |
41 | /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in |
42 | each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not |
43 | NULL, time out after waiting the interval specified therein. Returns |
44 | the number of ready descriptors, or -1 for errors. */ |
45 | int |
46 | _hurd_select (int nfds, |
47 | struct pollfd *pollfds, |
48 | fd_set *readfds, fd_set *writefds, fd_set *exceptfds, |
49 | const struct timespec *timeout, const sigset_t *sigmask) |
50 | { |
51 | int i; |
52 | mach_port_t portset, sigport; |
53 | int got, ready; |
54 | error_t err; |
55 | fd_set rfds, wfds, xfds; |
56 | int firstfd, lastfd; |
57 | mach_msg_id_t reply_msgid; |
58 | mach_msg_timeout_t to; |
59 | struct timespec ts; |
60 | struct |
61 | { |
62 | struct hurd_userlink ulink; |
63 | struct hurd_fd *cell; |
64 | mach_port_t io_port; |
65 | int type; |
66 | mach_port_t reply_port; |
67 | int error; |
68 | } d[nfds]; |
69 | sigset_t oset; |
70 | struct hurd_sigstate *ss = NULL; |
71 | |
72 | union typeword /* Use this to avoid unkosher casts. */ |
73 | { |
74 | mach_msg_type_t type; |
75 | uint32_t word; |
76 | }; |
77 | assert (sizeof (union typeword) == sizeof (mach_msg_type_t)); |
78 | assert (sizeof (uint32_t) == sizeof (mach_msg_type_t)); |
79 | |
80 | if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE)) |
81 | { |
82 | errno = EINVAL; |
83 | return -1; |
84 | } |
85 | |
86 | #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ |
87 | #define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */ |
88 | |
89 | if (timeout == NULL) |
90 | reply_msgid = IO_SELECT_REPLY_MSGID; |
91 | else |
92 | { |
93 | struct timespec now; |
94 | |
95 | if (timeout->tv_sec < 0 || ! valid_nanoseconds (ns: timeout->tv_nsec)) |
96 | { |
97 | errno = EINVAL; |
98 | return -1; |
99 | } |
100 | |
101 | err = __clock_gettime (CLOCK_REALTIME, &now); |
102 | if (err) |
103 | return -1; |
104 | |
105 | ts.tv_sec = now.tv_sec + timeout->tv_sec; |
106 | ts.tv_nsec = now.tv_nsec + timeout->tv_nsec; |
107 | |
108 | if (ts.tv_nsec >= 1000000000) |
109 | { |
110 | ts.tv_sec++; |
111 | ts.tv_nsec -= 1000000000; |
112 | } |
113 | |
114 | if (ts.tv_sec < 0) |
115 | ts.tv_sec = LONG_MAX; /* XXX */ |
116 | |
117 | reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID; |
118 | } |
119 | |
120 | if (sigmask) |
121 | { |
122 | /* Add a port to the portset for the case when we get the signal even |
123 | before calling __mach_msg. */ |
124 | |
125 | sigport = __mach_reply_port (); |
126 | |
127 | ss = _hurd_self_sigstate (); |
128 | _hurd_sigstate_lock (ss); |
129 | /* And tell the signal thread to message us when a signal arrives. */ |
130 | ss->suspended = sigport; |
131 | _hurd_sigstate_unlock (ss); |
132 | |
133 | if (__sigprocmask (SIG_SETMASK, set: sigmask, oset: &oset)) |
134 | { |
135 | _hurd_sigstate_lock (ss); |
136 | ss->suspended = MACH_PORT_NULL; |
137 | _hurd_sigstate_unlock (ss); |
138 | __mach_port_destroy (__mach_task_self (), sigport); |
139 | return -1; |
140 | } |
141 | } |
142 | else |
143 | sigport = MACH_PORT_NULL; |
144 | |
145 | if (pollfds) |
146 | { |
147 | int error = 0; |
148 | /* Collect interesting descriptors from the user's `pollfd' array. |
149 | We do a first pass that reads the user's array before taking |
150 | any locks. The second pass then only touches our own stack, |
151 | and gets the port references. */ |
152 | |
153 | for (i = 0; i < nfds; ++i) |
154 | if (pollfds[i].fd >= 0) |
155 | { |
156 | int type = 0; |
157 | if (pollfds[i].events & POLLIN) |
158 | type |= SELECT_READ; |
159 | if (pollfds[i].events & POLLOUT) |
160 | type |= SELECT_WRITE; |
161 | if (pollfds[i].events & POLLPRI) |
162 | type |= SELECT_URG; |
163 | |
164 | d[i].io_port = pollfds[i].fd; |
165 | d[i].type = type; |
166 | } |
167 | else |
168 | d[i].type = 0; |
169 | |
170 | HURD_CRITICAL_BEGIN; |
171 | __mutex_lock (&_hurd_dtable_lock); |
172 | |
173 | for (i = 0; i < nfds; ++i) |
174 | if (d[i].type != 0) |
175 | { |
176 | const int fd = (int) d[i].io_port; |
177 | |
178 | if (fd < _hurd_dtablesize) |
179 | { |
180 | d[i].cell = _hurd_dtable[fd]; |
181 | if (d[i].cell != NULL) |
182 | { |
183 | d[i].io_port = _hurd_port_get (&d[i].cell->port, |
184 | &d[i].ulink); |
185 | if (d[i].io_port != MACH_PORT_NULL) |
186 | continue; |
187 | } |
188 | } |
189 | |
190 | /* Bogus descriptor, make it EBADF already. */ |
191 | d[i].error = EBADF; |
192 | d[i].type = SELECT_ERROR; |
193 | error = 1; |
194 | } |
195 | |
196 | __mutex_unlock (&_hurd_dtable_lock); |
197 | HURD_CRITICAL_END; |
198 | |
199 | if (error) |
200 | { |
201 | /* Set timeout to 0. */ |
202 | err = __clock_gettime (CLOCK_REALTIME, &ts); |
203 | if (err) |
204 | { |
205 | /* Really bad luck. */ |
206 | err = errno; |
207 | HURD_CRITICAL_BEGIN; |
208 | __mutex_lock (&_hurd_dtable_lock); |
209 | while (i-- > 0) |
210 | if (d[i].type & ~SELECT_ERROR != 0) |
211 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, |
212 | d[i].io_port); |
213 | __mutex_unlock (&_hurd_dtable_lock); |
214 | HURD_CRITICAL_END; |
215 | if (sigmask) |
216 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
217 | errno = err; |
218 | return -1; |
219 | } |
220 | reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID; |
221 | } |
222 | |
223 | lastfd = i - 1; |
224 | firstfd = i == 0 ? lastfd : 0; |
225 | } |
226 | else |
227 | { |
228 | /* Collect interested descriptors from the user's fd_set arguments. |
229 | Use local copies so we can't crash from user bogosity. */ |
230 | |
231 | if (readfds == NULL) |
232 | FD_ZERO (&rfds); |
233 | else |
234 | rfds = *readfds; |
235 | if (writefds == NULL) |
236 | FD_ZERO (&wfds); |
237 | else |
238 | wfds = *writefds; |
239 | if (exceptfds == NULL) |
240 | FD_ZERO (&xfds); |
241 | else |
242 | xfds = *exceptfds; |
243 | |
244 | HURD_CRITICAL_BEGIN; |
245 | __mutex_lock (&_hurd_dtable_lock); |
246 | |
247 | /* Collect the ports for interesting FDs. */ |
248 | firstfd = lastfd = -1; |
249 | for (i = 0; i < nfds; ++i) |
250 | { |
251 | int type = 0; |
252 | if (readfds != NULL && FD_ISSET (i, &rfds)) |
253 | type |= SELECT_READ; |
254 | if (writefds != NULL && FD_ISSET (i, &wfds)) |
255 | type |= SELECT_WRITE; |
256 | if (exceptfds != NULL && FD_ISSET (i, &xfds)) |
257 | type |= SELECT_URG; |
258 | d[i].type = type; |
259 | if (type) |
260 | { |
261 | if (i < _hurd_dtablesize) |
262 | { |
263 | d[i].cell = _hurd_dtable[i]; |
264 | if (d[i].cell != NULL) |
265 | d[i].io_port = _hurd_port_get (&d[i].cell->port, |
266 | &d[i].ulink); |
267 | } |
268 | if (i >= _hurd_dtablesize || d[i].cell == NULL || |
269 | d[i].io_port == MACH_PORT_NULL) |
270 | { |
271 | /* If one descriptor is bogus, we fail completely. */ |
272 | while (i-- > 0) |
273 | if (d[i].type != 0) |
274 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, |
275 | d[i].io_port); |
276 | break; |
277 | } |
278 | lastfd = i; |
279 | if (firstfd == -1) |
280 | firstfd = i; |
281 | } |
282 | } |
283 | |
284 | __mutex_unlock (&_hurd_dtable_lock); |
285 | HURD_CRITICAL_END; |
286 | |
287 | if (i < nfds) |
288 | { |
289 | if (sigmask) |
290 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
291 | errno = EBADF; |
292 | return -1; |
293 | } |
294 | |
295 | if (nfds > _hurd_dtablesize) |
296 | nfds = _hurd_dtablesize; |
297 | } |
298 | |
299 | |
300 | err = 0; |
301 | got = 0; |
302 | |
303 | /* Send them all io_select request messages. */ |
304 | |
305 | if (firstfd == -1) |
306 | { |
307 | if (sigport == MACH_PORT_NULL) |
308 | /* But not if there were no ports to deal with at all. |
309 | We are just a pure timeout. */ |
310 | portset = __mach_reply_port (); |
311 | else |
312 | portset = sigport; |
313 | } |
314 | else |
315 | { |
316 | portset = MACH_PORT_NULL; |
317 | |
318 | for (i = firstfd; i <= lastfd; ++i) |
319 | if (!(d[i].type & ~SELECT_ERROR)) |
320 | d[i].reply_port = MACH_PORT_NULL; |
321 | else |
322 | { |
323 | int type = d[i].type; |
324 | d[i].reply_port = __mach_reply_port (); |
325 | if (timeout == NULL) |
326 | err = __io_select_request (d[i].io_port, d[i].reply_port, type); |
327 | else |
328 | err = __io_select_timeout_request (d[i].io_port, d[i].reply_port, |
329 | ts, type); |
330 | if (!err) |
331 | { |
332 | if (firstfd == lastfd && sigport == MACH_PORT_NULL) |
333 | /* When there's a single descriptor, we don't need a |
334 | portset, so just pretend we have one, but really |
335 | use the single reply port. */ |
336 | portset = d[i].reply_port; |
337 | else if (got == 0) |
338 | /* We've got multiple reply ports, so we need a port set to |
339 | multiplex them. */ |
340 | { |
341 | /* We will wait again for a reply later. */ |
342 | if (portset == MACH_PORT_NULL) |
343 | /* Create the portset to receive all the replies on. */ |
344 | err = __mach_port_allocate (__mach_task_self (), |
345 | MACH_PORT_RIGHT_PORT_SET, |
346 | &portset); |
347 | if (! err) |
348 | /* Put this reply port in the port set. */ |
349 | __mach_port_move_member (__mach_task_self (), |
350 | d[i].reply_port, portset); |
351 | } |
352 | } |
353 | else |
354 | { |
355 | /* No error should happen, but record it for later |
356 | processing. */ |
357 | d[i].error = err; |
358 | d[i].type |= SELECT_ERROR; |
359 | ++got; |
360 | } |
361 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); |
362 | } |
363 | |
364 | if (got == 0 && sigport != MACH_PORT_NULL) |
365 | { |
366 | if (portset == MACH_PORT_NULL) |
367 | /* Create the portset to receive the signal message on. */ |
368 | __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET, |
369 | &portset); |
370 | /* Put the signal reply port in the port set. */ |
371 | __mach_port_move_member (__mach_task_self (), sigport, portset); |
372 | } |
373 | } |
374 | |
375 | /* GOT is the number of replies (or errors), while READY is the number of |
376 | replies with at least one type bit set. */ |
377 | ready = 0; |
378 | |
379 | /* Now wait for reply messages. */ |
380 | if (!err && got == 0) |
381 | { |
382 | /* Now wait for io_select_reply messages on PORT, |
383 | timing out as appropriate. */ |
384 | |
385 | union |
386 | { |
387 | mach_msg_header_t head; |
388 | #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE |
389 | struct |
390 | { |
391 | mach_msg_header_t head; |
392 | NDR_record_t ndr; |
393 | error_t err; |
394 | } error; |
395 | struct |
396 | { |
397 | mach_msg_header_t head; |
398 | NDR_record_t ndr; |
399 | error_t err; |
400 | int result; |
401 | mach_msg_trailer_t trailer; |
402 | } success; |
403 | #else |
404 | struct |
405 | { |
406 | mach_msg_header_t head; |
407 | union typeword err_type; |
408 | error_t err; |
409 | } error; |
410 | struct |
411 | { |
412 | mach_msg_header_t head; |
413 | union typeword err_type; |
414 | error_t err; |
415 | union typeword result_type; |
416 | int result; |
417 | } success; |
418 | #endif |
419 | } msg; |
420 | mach_msg_option_t options; |
421 | error_t msgerr; |
422 | |
423 | /* We rely on servers to implement the timeout, but when there are none, |
424 | do it on the client side. */ |
425 | if (timeout != NULL && firstfd == -1) |
426 | { |
427 | options = MACH_RCV_TIMEOUT; |
428 | to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000; |
429 | } |
430 | else |
431 | { |
432 | options = 0; |
433 | to = MACH_MSG_TIMEOUT_NONE; |
434 | } |
435 | |
436 | int cancel_oldtype = LIBC_CANCEL_ASYNC(); |
437 | while ((msgerr = __mach_msg (&msg.head, |
438 | MACH_RCV_MSG | MACH_RCV_INTERRUPT | options, |
439 | 0, sizeof msg, portset, to, |
440 | MACH_PORT_NULL)) == MACH_MSG_SUCCESS) |
441 | { |
442 | LIBC_CANCEL_RESET (cancel_oldtype); |
443 | |
444 | /* We got a message. Decode it. */ |
445 | #ifdef MACH_MSG_TYPE_BIT |
446 | const union typeword inttype = |
447 | { type: |
448 | { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 } |
449 | }; |
450 | #endif |
451 | |
452 | if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port) |
453 | { |
454 | /* We actually got interrupted by a signal before |
455 | __mach_msg; poll for further responses and then |
456 | return quickly. */ |
457 | err = EINTR; |
458 | goto poll; |
459 | } |
460 | |
461 | if (msg.head.msgh_id == reply_msgid |
462 | && msg.head.msgh_size >= sizeof msg.error |
463 | && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) |
464 | #ifdef MACH_MSG_TYPE_BIT |
465 | && msg.error.err_type.word == inttype.word |
466 | #endif |
467 | ) |
468 | { |
469 | /* This is a properly formatted message so far. |
470 | See if it is a success or a failure. */ |
471 | if (msg.error.err == EINTR |
472 | && msg.head.msgh_size == sizeof msg.error) |
473 | { |
474 | /* EINTR response; poll for further responses |
475 | and then return quickly. */ |
476 | err = EINTR; |
477 | goto poll; |
478 | } |
479 | /* Keep in mind msg.success.result can be 0 if a timeout |
480 | occurred. */ |
481 | if (msg.error.err |
482 | #ifdef MACH_MSG_TYPE_BIT |
483 | || msg.success.result_type.word != inttype.word |
484 | #endif |
485 | || msg.head.msgh_size != sizeof msg.success) |
486 | { |
487 | /* Error or bogus reply. */ |
488 | if (!msg.error.err) |
489 | msg.error.err = EIO; |
490 | __mach_msg_destroy (&msg.head); |
491 | } |
492 | |
493 | /* Look up the respondent's reply port and record its |
494 | readiness. */ |
495 | { |
496 | int had = got; |
497 | if (firstfd != -1) |
498 | for (i = firstfd; i <= lastfd; ++i) |
499 | if (d[i].type |
500 | && d[i].reply_port == msg.head.msgh_local_port) |
501 | { |
502 | if (msg.error.err) |
503 | { |
504 | d[i].error = msg.error.err; |
505 | d[i].type = SELECT_ERROR; |
506 | ++ready; |
507 | } |
508 | else |
509 | { |
510 | d[i].type &= msg.success.result; |
511 | if (d[i].type) |
512 | ++ready; |
513 | } |
514 | |
515 | d[i].type |= SELECT_RETURNED; |
516 | ++got; |
517 | } |
518 | assert (got > had); |
519 | } |
520 | } |
521 | |
522 | if (msg.head.msgh_remote_port != MACH_PORT_NULL) |
523 | __mach_port_deallocate (__mach_task_self (), |
524 | msg.head.msgh_remote_port); |
525 | |
526 | if (got) |
527 | poll: |
528 | { |
529 | /* Poll for another message. */ |
530 | to = 0; |
531 | options |= MACH_RCV_TIMEOUT; |
532 | } |
533 | } |
534 | LIBC_CANCEL_RESET (cancel_oldtype); |
535 | |
536 | if (msgerr == MACH_RCV_INTERRUPTED) |
537 | /* Interruption on our side (e.g. signal reception). */ |
538 | err = EINTR; |
539 | |
540 | if (ready) |
541 | /* At least one descriptor is known to be ready now, so we will |
542 | return success. */ |
543 | err = 0; |
544 | } |
545 | |
546 | if (firstfd != -1) |
547 | for (i = firstfd; i <= lastfd; ++i) |
548 | if (d[i].reply_port != MACH_PORT_NULL) |
549 | __mach_port_destroy (__mach_task_self (), d[i].reply_port); |
550 | |
551 | if (sigport != MACH_PORT_NULL) |
552 | { |
553 | _hurd_sigstate_lock (ss); |
554 | ss->suspended = MACH_PORT_NULL; |
555 | _hurd_sigstate_unlock (ss); |
556 | __mach_port_destroy (__mach_task_self (), sigport); |
557 | } |
558 | |
559 | if ((firstfd == -1 && sigport == MACH_PORT_NULL) |
560 | || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL)) |
561 | /* Destroy PORTSET, but only if it's not actually the reply port for a |
562 | single descriptor (in which case it's destroyed in the previous loop; |
563 | not doing it here is just a bit more efficient). */ |
564 | __mach_port_destroy (__mach_task_self (), portset); |
565 | |
566 | if (err) |
567 | { |
568 | if (sigmask) |
569 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
570 | return __hurd_fail (err); |
571 | } |
572 | |
573 | if (pollfds) |
574 | /* Fill in the `revents' members of the user's array. */ |
575 | for (i = 0; i < nfds; ++i) |
576 | { |
577 | int type = d[i].type; |
578 | int_fast16_t revents = 0; |
579 | |
580 | if (type & SELECT_ERROR) |
581 | switch (d[i].error) |
582 | { |
583 | case EPIPE: |
584 | revents = POLLHUP; |
585 | break; |
586 | case EBADF: |
587 | revents = POLLNVAL; |
588 | break; |
589 | default: |
590 | revents = POLLERR; |
591 | break; |
592 | } |
593 | else |
594 | if (type & SELECT_RETURNED) |
595 | { |
596 | if (type & SELECT_READ) |
597 | revents |= POLLIN; |
598 | if (type & SELECT_WRITE) |
599 | revents |= POLLOUT; |
600 | if (type & SELECT_URG) |
601 | revents |= POLLPRI; |
602 | } |
603 | |
604 | pollfds[i].revents = revents; |
605 | } |
606 | else |
607 | { |
608 | /* Below we recalculate READY to include an increment for each operation |
609 | allowed on each fd. */ |
610 | ready = 0; |
611 | |
612 | /* Set the user bitarrays. We only ever have to clear bits, as all |
613 | desired ones are initially set. */ |
614 | if (firstfd != -1) |
615 | for (i = firstfd; i <= lastfd; ++i) |
616 | { |
617 | int type = d[i].type; |
618 | |
619 | if ((type & SELECT_RETURNED) == 0) |
620 | type = 0; |
621 | |
622 | /* Callers of select don't expect to see errors, so we simulate |
623 | readiness of the erring object and the next call hopefully |
624 | will get the error again. */ |
625 | if (type & SELECT_ERROR) |
626 | { |
627 | type = 0; |
628 | if (readfds != NULL && FD_ISSET (i, readfds)) |
629 | type |= SELECT_READ; |
630 | if (writefds != NULL && FD_ISSET (i, writefds)) |
631 | type |= SELECT_WRITE; |
632 | if (exceptfds != NULL && FD_ISSET (i, exceptfds)) |
633 | type |= SELECT_URG; |
634 | } |
635 | |
636 | if (type & SELECT_READ) |
637 | ready++; |
638 | else if (readfds) |
639 | FD_CLR (i, readfds); |
640 | if (type & SELECT_WRITE) |
641 | ready++; |
642 | else if (writefds) |
643 | FD_CLR (i, writefds); |
644 | if (type & SELECT_URG) |
645 | ready++; |
646 | else if (exceptfds) |
647 | FD_CLR (i, exceptfds); |
648 | } |
649 | } |
650 | |
651 | if (sigmask && __sigprocmask (SIG_SETMASK, set: &oset, NULL)) |
652 | return -1; |
653 | |
654 | return ready; |
655 | } |
656 | |