1/* Guts of both `select' and `poll' for Hurd.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sys/time.h>
20#include <sys/types.h>
21#include <sys/poll.h>
22#include <hurd.h>
23#include <hurd/fd.h>
24#include <hurd/io_request.h>
25#include <stdlib.h>
26#include <string.h>
27#include <assert.h>
28#include <stdint.h>
29#include <limits.h>
30#include <time.h>
31#include <sysdep-cancel.h>
32
33/* All user select types. */
34#define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG)
35
36/* Used to record that a particular select rpc returned. Must be distinct
37 from SELECT_ALL (which better not have the high bit set). */
38#define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL)
39#define SELECT_ERROR (SELECT_RETURNED << 1)
40
41/* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in
42 each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not
43 NULL, time out after waiting the interval specified therein. Returns
44 the number of ready descriptors, or -1 for errors. */
45int
46_hurd_select (int nfds,
47 struct pollfd *pollfds,
48 fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
49 const struct timespec *timeout, const sigset_t *sigmask)
50{
51 int i;
52 mach_port_t portset, sigport;
53 int got, ready;
54 error_t err;
55 fd_set rfds, wfds, xfds;
56 int firstfd, lastfd;
57 mach_msg_id_t reply_msgid;
58 mach_msg_timeout_t to;
59 struct timespec ts;
60 struct
61 {
62 struct hurd_userlink ulink;
63 struct hurd_fd *cell;
64 mach_port_t io_port;
65 int type;
66 mach_port_t reply_port;
67 int error;
68 } d[nfds];
69 sigset_t oset;
70 struct hurd_sigstate *ss = NULL;
71
72 union typeword /* Use this to avoid unkosher casts. */
73 {
74 mach_msg_type_t type;
75 uint32_t word;
76 };
77 assert (sizeof (union typeword) == sizeof (mach_msg_type_t));
78 assert (sizeof (uint32_t) == sizeof (mach_msg_type_t));
79
80 if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE))
81 {
82 errno = EINVAL;
83 return -1;
84 }
85
86#define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */
87#define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */
88
89 if (timeout == NULL)
90 reply_msgid = IO_SELECT_REPLY_MSGID;
91 else
92 {
93 struct timespec now;
94
95 if (timeout->tv_sec < 0 || ! valid_nanoseconds (ns: timeout->tv_nsec))
96 {
97 errno = EINVAL;
98 return -1;
99 }
100
101 err = __clock_gettime (CLOCK_REALTIME, &now);
102 if (err)
103 return -1;
104
105 ts.tv_sec = now.tv_sec + timeout->tv_sec;
106 ts.tv_nsec = now.tv_nsec + timeout->tv_nsec;
107
108 if (ts.tv_nsec >= 1000000000)
109 {
110 ts.tv_sec++;
111 ts.tv_nsec -= 1000000000;
112 }
113
114 if (ts.tv_sec < 0)
115 ts.tv_sec = LONG_MAX; /* XXX */
116
117 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
118 }
119
120 if (sigmask)
121 {
122 /* Add a port to the portset for the case when we get the signal even
123 before calling __mach_msg. */
124
125 sigport = __mach_reply_port ();
126
127 ss = _hurd_self_sigstate ();
128 _hurd_sigstate_lock (ss);
129 /* And tell the signal thread to message us when a signal arrives. */
130 ss->suspended = sigport;
131 _hurd_sigstate_unlock (ss);
132
133 if (__sigprocmask (SIG_SETMASK, set: sigmask, oset: &oset))
134 {
135 _hurd_sigstate_lock (ss);
136 ss->suspended = MACH_PORT_NULL;
137 _hurd_sigstate_unlock (ss);
138 __mach_port_destroy (__mach_task_self (), sigport);
139 return -1;
140 }
141 }
142 else
143 sigport = MACH_PORT_NULL;
144
145 if (pollfds)
146 {
147 int error = 0;
148 /* Collect interesting descriptors from the user's `pollfd' array.
149 We do a first pass that reads the user's array before taking
150 any locks. The second pass then only touches our own stack,
151 and gets the port references. */
152
153 for (i = 0; i < nfds; ++i)
154 if (pollfds[i].fd >= 0)
155 {
156 int type = 0;
157 if (pollfds[i].events & POLLIN)
158 type |= SELECT_READ;
159 if (pollfds[i].events & POLLOUT)
160 type |= SELECT_WRITE;
161 if (pollfds[i].events & POLLPRI)
162 type |= SELECT_URG;
163
164 d[i].io_port = pollfds[i].fd;
165 d[i].type = type;
166 }
167 else
168 d[i].type = 0;
169
170 HURD_CRITICAL_BEGIN;
171 __mutex_lock (&_hurd_dtable_lock);
172
173 for (i = 0; i < nfds; ++i)
174 if (d[i].type != 0)
175 {
176 const int fd = (int) d[i].io_port;
177
178 if (fd < _hurd_dtablesize)
179 {
180 d[i].cell = _hurd_dtable[fd];
181 if (d[i].cell != NULL)
182 {
183 d[i].io_port = _hurd_port_get (&d[i].cell->port,
184 &d[i].ulink);
185 if (d[i].io_port != MACH_PORT_NULL)
186 continue;
187 }
188 }
189
190 /* Bogus descriptor, make it EBADF already. */
191 d[i].error = EBADF;
192 d[i].type = SELECT_ERROR;
193 error = 1;
194 }
195
196 __mutex_unlock (&_hurd_dtable_lock);
197 HURD_CRITICAL_END;
198
199 if (error)
200 {
201 /* Set timeout to 0. */
202 err = __clock_gettime (CLOCK_REALTIME, &ts);
203 if (err)
204 {
205 /* Really bad luck. */
206 err = errno;
207 HURD_CRITICAL_BEGIN;
208 __mutex_lock (&_hurd_dtable_lock);
209 while (i-- > 0)
210 if (d[i].type & ~SELECT_ERROR != 0)
211 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
212 d[i].io_port);
213 __mutex_unlock (&_hurd_dtable_lock);
214 HURD_CRITICAL_END;
215 if (sigmask)
216 __sigprocmask (SIG_SETMASK, set: &oset, NULL);
217 errno = err;
218 return -1;
219 }
220 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
221 }
222
223 lastfd = i - 1;
224 firstfd = i == 0 ? lastfd : 0;
225 }
226 else
227 {
228 /* Collect interested descriptors from the user's fd_set arguments.
229 Use local copies so we can't crash from user bogosity. */
230
231 if (readfds == NULL)
232 FD_ZERO (&rfds);
233 else
234 rfds = *readfds;
235 if (writefds == NULL)
236 FD_ZERO (&wfds);
237 else
238 wfds = *writefds;
239 if (exceptfds == NULL)
240 FD_ZERO (&xfds);
241 else
242 xfds = *exceptfds;
243
244 HURD_CRITICAL_BEGIN;
245 __mutex_lock (&_hurd_dtable_lock);
246
247 /* Collect the ports for interesting FDs. */
248 firstfd = lastfd = -1;
249 for (i = 0; i < nfds; ++i)
250 {
251 int type = 0;
252 if (readfds != NULL && FD_ISSET (i, &rfds))
253 type |= SELECT_READ;
254 if (writefds != NULL && FD_ISSET (i, &wfds))
255 type |= SELECT_WRITE;
256 if (exceptfds != NULL && FD_ISSET (i, &xfds))
257 type |= SELECT_URG;
258 d[i].type = type;
259 if (type)
260 {
261 if (i < _hurd_dtablesize)
262 {
263 d[i].cell = _hurd_dtable[i];
264 if (d[i].cell != NULL)
265 d[i].io_port = _hurd_port_get (&d[i].cell->port,
266 &d[i].ulink);
267 }
268 if (i >= _hurd_dtablesize || d[i].cell == NULL ||
269 d[i].io_port == MACH_PORT_NULL)
270 {
271 /* If one descriptor is bogus, we fail completely. */
272 while (i-- > 0)
273 if (d[i].type != 0)
274 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
275 d[i].io_port);
276 break;
277 }
278 lastfd = i;
279 if (firstfd == -1)
280 firstfd = i;
281 }
282 }
283
284 __mutex_unlock (&_hurd_dtable_lock);
285 HURD_CRITICAL_END;
286
287 if (i < nfds)
288 {
289 if (sigmask)
290 __sigprocmask (SIG_SETMASK, set: &oset, NULL);
291 errno = EBADF;
292 return -1;
293 }
294
295 if (nfds > _hurd_dtablesize)
296 nfds = _hurd_dtablesize;
297 }
298
299
300 err = 0;
301 got = 0;
302
303 /* Send them all io_select request messages. */
304
305 if (firstfd == -1)
306 {
307 if (sigport == MACH_PORT_NULL)
308 /* But not if there were no ports to deal with at all.
309 We are just a pure timeout. */
310 portset = __mach_reply_port ();
311 else
312 portset = sigport;
313 }
314 else
315 {
316 portset = MACH_PORT_NULL;
317
318 for (i = firstfd; i <= lastfd; ++i)
319 if (!(d[i].type & ~SELECT_ERROR))
320 d[i].reply_port = MACH_PORT_NULL;
321 else
322 {
323 int type = d[i].type;
324 d[i].reply_port = __mach_reply_port ();
325 if (timeout == NULL)
326 err = __io_select_request (d[i].io_port, d[i].reply_port, type);
327 else
328 err = __io_select_timeout_request (d[i].io_port, d[i].reply_port,
329 ts, type);
330 if (!err)
331 {
332 if (firstfd == lastfd && sigport == MACH_PORT_NULL)
333 /* When there's a single descriptor, we don't need a
334 portset, so just pretend we have one, but really
335 use the single reply port. */
336 portset = d[i].reply_port;
337 else if (got == 0)
338 /* We've got multiple reply ports, so we need a port set to
339 multiplex them. */
340 {
341 /* We will wait again for a reply later. */
342 if (portset == MACH_PORT_NULL)
343 /* Create the portset to receive all the replies on. */
344 err = __mach_port_allocate (__mach_task_self (),
345 MACH_PORT_RIGHT_PORT_SET,
346 &portset);
347 if (! err)
348 /* Put this reply port in the port set. */
349 __mach_port_move_member (__mach_task_self (),
350 d[i].reply_port, portset);
351 }
352 }
353 else
354 {
355 /* No error should happen, but record it for later
356 processing. */
357 d[i].error = err;
358 d[i].type |= SELECT_ERROR;
359 ++got;
360 }
361 _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port);
362 }
363
364 if (got == 0 && sigport != MACH_PORT_NULL)
365 {
366 if (portset == MACH_PORT_NULL)
367 /* Create the portset to receive the signal message on. */
368 __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET,
369 &portset);
370 /* Put the signal reply port in the port set. */
371 __mach_port_move_member (__mach_task_self (), sigport, portset);
372 }
373 }
374
375 /* GOT is the number of replies (or errors), while READY is the number of
376 replies with at least one type bit set. */
377 ready = 0;
378
379 /* Now wait for reply messages. */
380 if (!err && got == 0)
381 {
382 /* Now wait for io_select_reply messages on PORT,
383 timing out as appropriate. */
384
385 union
386 {
387 mach_msg_header_t head;
388#ifdef MACH_MSG_TRAILER_MINIMUM_SIZE
389 struct
390 {
391 mach_msg_header_t head;
392 NDR_record_t ndr;
393 error_t err;
394 } error;
395 struct
396 {
397 mach_msg_header_t head;
398 NDR_record_t ndr;
399 error_t err;
400 int result;
401 mach_msg_trailer_t trailer;
402 } success;
403#else
404 struct
405 {
406 mach_msg_header_t head;
407 union typeword err_type;
408 error_t err;
409 } error;
410 struct
411 {
412 mach_msg_header_t head;
413 union typeword err_type;
414 error_t err;
415 union typeword result_type;
416 int result;
417 } success;
418#endif
419 } msg;
420 mach_msg_option_t options;
421 error_t msgerr;
422
423 /* We rely on servers to implement the timeout, but when there are none,
424 do it on the client side. */
425 if (timeout != NULL && firstfd == -1)
426 {
427 options = MACH_RCV_TIMEOUT;
428 to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000;
429 }
430 else
431 {
432 options = 0;
433 to = MACH_MSG_TIMEOUT_NONE;
434 }
435
436 int cancel_oldtype = LIBC_CANCEL_ASYNC();
437 while ((msgerr = __mach_msg (&msg.head,
438 MACH_RCV_MSG | MACH_RCV_INTERRUPT | options,
439 0, sizeof msg, portset, to,
440 MACH_PORT_NULL)) == MACH_MSG_SUCCESS)
441 {
442 LIBC_CANCEL_RESET (cancel_oldtype);
443
444 /* We got a message. Decode it. */
445#ifdef MACH_MSG_TYPE_BIT
446 const union typeword inttype =
447 { type:
448 { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 }
449 };
450#endif
451
452 if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port)
453 {
454 /* We actually got interrupted by a signal before
455 __mach_msg; poll for further responses and then
456 return quickly. */
457 err = EINTR;
458 goto poll;
459 }
460
461 if (msg.head.msgh_id == reply_msgid
462 && msg.head.msgh_size >= sizeof msg.error
463 && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX)
464#ifdef MACH_MSG_TYPE_BIT
465 && msg.error.err_type.word == inttype.word
466#endif
467 )
468 {
469 /* This is a properly formatted message so far.
470 See if it is a success or a failure. */
471 if (msg.error.err == EINTR
472 && msg.head.msgh_size == sizeof msg.error)
473 {
474 /* EINTR response; poll for further responses
475 and then return quickly. */
476 err = EINTR;
477 goto poll;
478 }
479 /* Keep in mind msg.success.result can be 0 if a timeout
480 occurred. */
481 if (msg.error.err
482#ifdef MACH_MSG_TYPE_BIT
483 || msg.success.result_type.word != inttype.word
484#endif
485 || msg.head.msgh_size != sizeof msg.success)
486 {
487 /* Error or bogus reply. */
488 if (!msg.error.err)
489 msg.error.err = EIO;
490 __mach_msg_destroy (&msg.head);
491 }
492
493 /* Look up the respondent's reply port and record its
494 readiness. */
495 {
496 int had = got;
497 if (firstfd != -1)
498 for (i = firstfd; i <= lastfd; ++i)
499 if (d[i].type
500 && d[i].reply_port == msg.head.msgh_local_port)
501 {
502 if (msg.error.err)
503 {
504 d[i].error = msg.error.err;
505 d[i].type = SELECT_ERROR;
506 ++ready;
507 }
508 else
509 {
510 d[i].type &= msg.success.result;
511 if (d[i].type)
512 ++ready;
513 }
514
515 d[i].type |= SELECT_RETURNED;
516 ++got;
517 }
518 assert (got > had);
519 }
520 }
521
522 if (msg.head.msgh_remote_port != MACH_PORT_NULL)
523 __mach_port_deallocate (__mach_task_self (),
524 msg.head.msgh_remote_port);
525
526 if (got)
527 poll:
528 {
529 /* Poll for another message. */
530 to = 0;
531 options |= MACH_RCV_TIMEOUT;
532 }
533 }
534 LIBC_CANCEL_RESET (cancel_oldtype);
535
536 if (msgerr == MACH_RCV_INTERRUPTED)
537 /* Interruption on our side (e.g. signal reception). */
538 err = EINTR;
539
540 if (ready)
541 /* At least one descriptor is known to be ready now, so we will
542 return success. */
543 err = 0;
544 }
545
546 if (firstfd != -1)
547 for (i = firstfd; i <= lastfd; ++i)
548 if (d[i].reply_port != MACH_PORT_NULL)
549 __mach_port_destroy (__mach_task_self (), d[i].reply_port);
550
551 if (sigport != MACH_PORT_NULL)
552 {
553 _hurd_sigstate_lock (ss);
554 ss->suspended = MACH_PORT_NULL;
555 _hurd_sigstate_unlock (ss);
556 __mach_port_destroy (__mach_task_self (), sigport);
557 }
558
559 if ((firstfd == -1 && sigport == MACH_PORT_NULL)
560 || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL))
561 /* Destroy PORTSET, but only if it's not actually the reply port for a
562 single descriptor (in which case it's destroyed in the previous loop;
563 not doing it here is just a bit more efficient). */
564 __mach_port_destroy (__mach_task_self (), portset);
565
566 if (err)
567 {
568 if (sigmask)
569 __sigprocmask (SIG_SETMASK, set: &oset, NULL);
570 return __hurd_fail (err);
571 }
572
573 if (pollfds)
574 /* Fill in the `revents' members of the user's array. */
575 for (i = 0; i < nfds; ++i)
576 {
577 int type = d[i].type;
578 int_fast16_t revents = 0;
579
580 if (type & SELECT_ERROR)
581 switch (d[i].error)
582 {
583 case EPIPE:
584 revents = POLLHUP;
585 break;
586 case EBADF:
587 revents = POLLNVAL;
588 break;
589 default:
590 revents = POLLERR;
591 break;
592 }
593 else
594 if (type & SELECT_RETURNED)
595 {
596 if (type & SELECT_READ)
597 revents |= POLLIN;
598 if (type & SELECT_WRITE)
599 revents |= POLLOUT;
600 if (type & SELECT_URG)
601 revents |= POLLPRI;
602 }
603
604 pollfds[i].revents = revents;
605 }
606 else
607 {
608 /* Below we recalculate READY to include an increment for each operation
609 allowed on each fd. */
610 ready = 0;
611
612 /* Set the user bitarrays. We only ever have to clear bits, as all
613 desired ones are initially set. */
614 if (firstfd != -1)
615 for (i = firstfd; i <= lastfd; ++i)
616 {
617 int type = d[i].type;
618
619 if ((type & SELECT_RETURNED) == 0)
620 type = 0;
621
622 /* Callers of select don't expect to see errors, so we simulate
623 readiness of the erring object and the next call hopefully
624 will get the error again. */
625 if (type & SELECT_ERROR)
626 {
627 type = 0;
628 if (readfds != NULL && FD_ISSET (i, readfds))
629 type |= SELECT_READ;
630 if (writefds != NULL && FD_ISSET (i, writefds))
631 type |= SELECT_WRITE;
632 if (exceptfds != NULL && FD_ISSET (i, exceptfds))
633 type |= SELECT_URG;
634 }
635
636 if (type & SELECT_READ)
637 ready++;
638 else if (readfds)
639 FD_CLR (i, readfds);
640 if (type & SELECT_WRITE)
641 ready++;
642 else if (writefds)
643 FD_CLR (i, writefds);
644 if (type & SELECT_URG)
645 ready++;
646 else if (exceptfds)
647 FD_CLR (i, exceptfds);
648 }
649 }
650
651 if (sigmask && __sigprocmask (SIG_SETMASK, set: &oset, NULL))
652 return -1;
653
654 return ready;
655}
656

source code of glibc/hurd/hurdselect.c