1 | //! Bindings to epoll (Linux, Android). |
2 | |
3 | use std::io; |
4 | use std::os::unix::io::{AsFd, AsRawFd, BorrowedFd, RawFd}; |
5 | use std::time::Duration; |
6 | |
7 | #[cfg (not(target_os = "redox" ))] |
8 | use rustix::event::{eventfd, EventfdFlags}; |
9 | #[cfg (not(target_os = "redox" ))] |
10 | use rustix::time::{ |
11 | timerfd_create, timerfd_settime, Itimerspec, TimerfdClockId, TimerfdFlags, TimerfdTimerFlags, |
12 | Timespec, |
13 | }; |
14 | |
15 | use rustix::event::epoll; |
16 | use rustix::fd::OwnedFd; |
17 | use rustix::fs::{fcntl_getfl, fcntl_setfl, OFlags}; |
18 | use rustix::io::{fcntl_getfd, fcntl_setfd, read, write, FdFlags}; |
19 | use rustix::pipe::{pipe, pipe_with, PipeFlags}; |
20 | |
21 | use crate::{Event, PollMode}; |
22 | |
23 | /// Interface to epoll. |
24 | #[derive (Debug)] |
25 | pub struct Poller { |
26 | /// File descriptor for the epoll instance. |
27 | epoll_fd: OwnedFd, |
28 | |
29 | /// Notifier used to wake up epoll. |
30 | notifier: Notifier, |
31 | |
32 | /// File descriptor for the timerfd that produces timeouts. |
33 | /// |
34 | /// Redox does not support timerfd. |
35 | #[cfg (not(target_os = "redox" ))] |
36 | timer_fd: Option<OwnedFd>, |
37 | } |
38 | |
39 | impl Poller { |
40 | /// Creates a new poller. |
41 | pub fn new() -> io::Result<Poller> { |
42 | // Create an epoll instance. |
43 | // |
44 | // Use `epoll_create1` with `EPOLL_CLOEXEC`. |
45 | let epoll_fd = epoll::create(epoll::CreateFlags::CLOEXEC)?; |
46 | |
47 | // Set up notifier and timerfd. |
48 | let notifier = Notifier::new()?; |
49 | #[cfg (not(target_os = "redox" ))] |
50 | let timer_fd = timerfd_create( |
51 | TimerfdClockId::Monotonic, |
52 | TimerfdFlags::CLOEXEC | TimerfdFlags::NONBLOCK, |
53 | ) |
54 | .ok(); |
55 | |
56 | let poller = Poller { |
57 | epoll_fd, |
58 | notifier, |
59 | #[cfg (not(target_os = "redox" ))] |
60 | timer_fd, |
61 | }; |
62 | |
63 | unsafe { |
64 | #[cfg (not(target_os = "redox" ))] |
65 | if let Some(ref timer_fd) = poller.timer_fd { |
66 | poller.add( |
67 | timer_fd.as_raw_fd(), |
68 | Event::none(crate::NOTIFY_KEY), |
69 | PollMode::Oneshot, |
70 | )?; |
71 | } |
72 | |
73 | poller.add( |
74 | poller.notifier.as_fd().as_raw_fd(), |
75 | Event::readable(crate::NOTIFY_KEY), |
76 | PollMode::Oneshot, |
77 | )?; |
78 | } |
79 | |
80 | tracing::trace!( |
81 | epoll_fd = ?poller.epoll_fd.as_raw_fd(), |
82 | notifier = ?poller.notifier, |
83 | "new" , |
84 | ); |
85 | Ok(poller) |
86 | } |
87 | |
88 | /// Whether this poller supports level-triggered events. |
89 | pub fn supports_level(&self) -> bool { |
90 | true |
91 | } |
92 | |
93 | /// Whether the poller supports edge-triggered events. |
94 | pub fn supports_edge(&self) -> bool { |
95 | true |
96 | } |
97 | |
98 | /// Adds a new file descriptor. |
99 | /// |
100 | /// # Safety |
101 | /// |
102 | /// The `fd` must be a valid file descriptor. The usual condition of remaining registered in |
103 | /// the `Poller` doesn't apply to `epoll`. |
104 | pub unsafe fn add(&self, fd: RawFd, ev: Event, mode: PollMode) -> io::Result<()> { |
105 | let span = tracing::trace_span!( |
106 | "add" , |
107 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
108 | ?fd, |
109 | ?ev, |
110 | ); |
111 | let _enter = span.enter(); |
112 | |
113 | epoll::add( |
114 | &self.epoll_fd, |
115 | unsafe { rustix::fd::BorrowedFd::borrow_raw(fd) }, |
116 | epoll::EventData::new_u64(ev.key as u64), |
117 | epoll_flags(&ev, mode) | ev.extra.flags, |
118 | )?; |
119 | |
120 | Ok(()) |
121 | } |
122 | |
123 | /// Modifies an existing file descriptor. |
124 | pub fn modify(&self, fd: BorrowedFd<'_>, ev: Event, mode: PollMode) -> io::Result<()> { |
125 | let span = tracing::trace_span!( |
126 | "modify" , |
127 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
128 | ?fd, |
129 | ?ev, |
130 | ); |
131 | let _enter = span.enter(); |
132 | |
133 | epoll::modify( |
134 | &self.epoll_fd, |
135 | fd, |
136 | epoll::EventData::new_u64(ev.key as u64), |
137 | epoll_flags(&ev, mode) | ev.extra.flags, |
138 | )?; |
139 | |
140 | Ok(()) |
141 | } |
142 | |
143 | /// Deletes a file descriptor. |
144 | pub fn delete(&self, fd: BorrowedFd<'_>) -> io::Result<()> { |
145 | let span = tracing::trace_span!( |
146 | "delete" , |
147 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
148 | ?fd, |
149 | ); |
150 | let _enter = span.enter(); |
151 | |
152 | epoll::delete(&self.epoll_fd, fd)?; |
153 | |
154 | Ok(()) |
155 | } |
156 | |
157 | /// Waits for I/O events with an optional timeout. |
158 | #[allow (clippy::needless_update)] |
159 | pub fn wait(&self, events: &mut Events, timeout: Option<Duration>) -> io::Result<()> { |
160 | let span = tracing::trace_span!( |
161 | "wait" , |
162 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
163 | ?timeout, |
164 | ); |
165 | let _enter = span.enter(); |
166 | |
167 | #[cfg (not(target_os = "redox" ))] |
168 | if let Some(ref timer_fd) = self.timer_fd { |
169 | // Configure the timeout using timerfd. |
170 | let new_val = Itimerspec { |
171 | it_interval: TS_ZERO, |
172 | it_value: match timeout { |
173 | None => TS_ZERO, |
174 | Some(t) => { |
175 | let mut ts = TS_ZERO; |
176 | ts.tv_sec = t.as_secs() as _; |
177 | ts.tv_nsec = t.subsec_nanos() as _; |
178 | ts |
179 | } |
180 | }, |
181 | ..unsafe { std::mem::zeroed() } |
182 | }; |
183 | |
184 | timerfd_settime(timer_fd, TimerfdTimerFlags::empty(), &new_val)?; |
185 | |
186 | // Set interest in timerfd. |
187 | self.modify( |
188 | timer_fd.as_fd(), |
189 | Event::readable(crate::NOTIFY_KEY), |
190 | PollMode::Oneshot, |
191 | )?; |
192 | } |
193 | |
194 | #[cfg (not(target_os = "redox" ))] |
195 | let timer_fd = &self.timer_fd; |
196 | #[cfg (target_os = "redox" )] |
197 | let timer_fd: Option<core::convert::Infallible> = None; |
198 | |
199 | // Timeout in milliseconds for epoll. |
200 | let timeout_ms = match (timer_fd, timeout) { |
201 | (_, Some(t)) if t == Duration::from_secs(0) => 0, |
202 | (None, Some(t)) => { |
203 | // Round up to a whole millisecond. |
204 | let mut ms = t.as_millis().try_into().unwrap_or(std::i32::MAX); |
205 | if Duration::from_millis(ms as u64) < t { |
206 | ms = ms.saturating_add(1); |
207 | } |
208 | ms |
209 | } |
210 | _ => -1, |
211 | }; |
212 | |
213 | // Wait for I/O events. |
214 | epoll::wait(&self.epoll_fd, &mut events.list, timeout_ms)?; |
215 | tracing::trace!( |
216 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
217 | res = ?events.list.len(), |
218 | "new events" , |
219 | ); |
220 | |
221 | // Clear the notification (if received) and re-register interest in it. |
222 | self.notifier.clear(); |
223 | self.modify( |
224 | self.notifier.as_fd(), |
225 | Event::readable(crate::NOTIFY_KEY), |
226 | PollMode::Oneshot, |
227 | )?; |
228 | Ok(()) |
229 | } |
230 | |
231 | /// Sends a notification to wake up the current or next `wait()` call. |
232 | pub fn notify(&self) -> io::Result<()> { |
233 | let span = tracing::trace_span!( |
234 | "notify" , |
235 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
236 | notifier = ?self.notifier, |
237 | ); |
238 | let _enter = span.enter(); |
239 | |
240 | self.notifier.notify(); |
241 | Ok(()) |
242 | } |
243 | } |
244 | |
245 | impl AsRawFd for Poller { |
246 | fn as_raw_fd(&self) -> RawFd { |
247 | self.epoll_fd.as_raw_fd() |
248 | } |
249 | } |
250 | |
251 | impl AsFd for Poller { |
252 | fn as_fd(&self) -> BorrowedFd<'_> { |
253 | self.epoll_fd.as_fd() |
254 | } |
255 | } |
256 | |
257 | impl Drop for Poller { |
258 | fn drop(&mut self) { |
259 | let span: Span = tracing::trace_span!( |
260 | "drop" , |
261 | epoll_fd = ?self.epoll_fd.as_raw_fd(), |
262 | notifier = ?self.notifier, |
263 | ); |
264 | let _enter: Entered<'_> = span.enter(); |
265 | |
266 | #[cfg (not(target_os = "redox" ))] |
267 | if let Some(timer_fd: OwnedFd) = self.timer_fd.take() { |
268 | let _ = self.delete(timer_fd.as_fd()); |
269 | } |
270 | let _ = self.delete(self.notifier.as_fd()); |
271 | } |
272 | } |
273 | |
274 | /// `timespec` value that equals zero. |
275 | #[cfg (not(target_os = "redox" ))] |
276 | const TS_ZERO: Timespec = unsafe { std::mem::transmute([0u8; std::mem::size_of::<Timespec>()]) }; |
277 | |
278 | /// Get the EPOLL flags for the interest. |
279 | fn epoll_flags(interest: &Event, mode: PollMode) -> epoll::EventFlags { |
280 | let mut flags: EventFlags = match mode { |
281 | PollMode::Oneshot => epoll::EventFlags::ONESHOT, |
282 | PollMode::Level => epoll::EventFlags::empty(), |
283 | PollMode::Edge => epoll::EventFlags::ET, |
284 | PollMode::EdgeOneshot => epoll::EventFlags::ET | epoll::EventFlags::ONESHOT, |
285 | }; |
286 | if interest.readable { |
287 | flags |= read_flags(); |
288 | } |
289 | if interest.writable { |
290 | flags |= write_flags(); |
291 | } |
292 | flags |
293 | } |
294 | |
295 | /// Epoll flags for all possible readability events. |
296 | fn read_flags() -> epoll::EventFlags { |
297 | use epoll::EventFlags as Epoll; |
298 | Epoll::IN | Epoll::HUP | Epoll::ERR | Epoll::PRI |
299 | } |
300 | |
301 | /// Epoll flags for all possible writability events. |
302 | fn write_flags() -> epoll::EventFlags { |
303 | use epoll::EventFlags as Epoll; |
304 | Epoll::OUT | Epoll::HUP | Epoll::ERR |
305 | } |
306 | |
307 | /// A list of reported I/O events. |
308 | pub struct Events { |
309 | list: epoll::EventVec, |
310 | } |
311 | |
312 | unsafe impl Send for Events {} |
313 | |
314 | impl Events { |
315 | /// Creates an empty list. |
316 | pub fn with_capacity(cap: usize) -> Events { |
317 | Events { |
318 | list: epoll::EventVec::with_capacity(cap), |
319 | } |
320 | } |
321 | |
322 | /// Iterates over I/O events. |
323 | pub fn iter(&self) -> impl Iterator<Item = Event> + '_ { |
324 | self.list.iter().map(|ev| { |
325 | let flags = ev.flags; |
326 | Event { |
327 | key: ev.data.u64() as usize, |
328 | readable: flags.intersects(read_flags()), |
329 | writable: flags.intersects(write_flags()), |
330 | extra: EventExtra { flags }, |
331 | } |
332 | }) |
333 | } |
334 | |
335 | /// Clear the list. |
336 | pub fn clear(&mut self) { |
337 | self.list.clear(); |
338 | } |
339 | |
340 | /// Get the capacity of the list. |
341 | pub fn capacity(&self) -> usize { |
342 | self.list.capacity() |
343 | } |
344 | } |
345 | |
346 | /// Extra information about this event. |
347 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
348 | pub struct EventExtra { |
349 | flags: epoll::EventFlags, |
350 | } |
351 | |
352 | impl EventExtra { |
353 | /// Create an empty version of the data. |
354 | #[inline ] |
355 | pub const fn empty() -> EventExtra { |
356 | EventExtra { |
357 | flags: epoll::EventFlags::empty(), |
358 | } |
359 | } |
360 | |
361 | /// Add the interrupt flag to this event. |
362 | #[inline ] |
363 | pub fn set_hup(&mut self, active: bool) { |
364 | self.flags.set(epoll::EventFlags::HUP, active); |
365 | } |
366 | |
367 | /// Add the priority flag to this event. |
368 | #[inline ] |
369 | pub fn set_pri(&mut self, active: bool) { |
370 | self.flags.set(epoll::EventFlags::PRI, active); |
371 | } |
372 | |
373 | /// Tell if the interrupt flag is set. |
374 | #[inline ] |
375 | pub fn is_hup(&self) -> bool { |
376 | self.flags.contains(epoll::EventFlags::HUP) |
377 | } |
378 | |
379 | /// Tell if the priority flag is set. |
380 | #[inline ] |
381 | pub fn is_pri(&self) -> bool { |
382 | self.flags.contains(epoll::EventFlags::PRI) |
383 | } |
384 | |
385 | #[inline ] |
386 | pub fn is_connect_failed(&self) -> Option<bool> { |
387 | Some( |
388 | self.flags.contains(epoll::EventFlags::ERR) |
389 | || self.flags.contains(epoll::EventFlags::HUP), |
390 | ) |
391 | } |
392 | } |
393 | |
394 | /// The notifier for Linux. |
395 | /// |
396 | /// Certain container runtimes do not expose eventfd to the client, as it relies on the host and |
397 | /// can be used to "escape" the container under certain conditions. Gramine is the prime example, |
398 | /// see [here](gramine). In this case, fall back to using a pipe. |
399 | /// |
400 | /// [gramine]: https://gramine.readthedocs.io/en/stable/manifest-syntax.html#allowing-eventfd |
401 | #[derive (Debug)] |
402 | enum Notifier { |
403 | /// The primary notifier, using eventfd. |
404 | #[cfg (not(target_os = "redox" ))] |
405 | EventFd(OwnedFd), |
406 | |
407 | /// The fallback notifier, using a pipe. |
408 | Pipe { |
409 | /// The read end of the pipe. |
410 | read_pipe: OwnedFd, |
411 | |
412 | /// The write end of the pipe. |
413 | write_pipe: OwnedFd, |
414 | }, |
415 | } |
416 | |
417 | impl Notifier { |
418 | /// Create a new notifier. |
419 | fn new() -> io::Result<Self> { |
420 | // Skip eventfd for testing if necessary. |
421 | #[cfg (not(target_os = "redox" ))] |
422 | { |
423 | if !cfg!(polling_test_epoll_pipe) { |
424 | // Try to create an eventfd. |
425 | match eventfd(0, EventfdFlags::CLOEXEC | EventfdFlags::NONBLOCK) { |
426 | Ok(fd) => { |
427 | tracing::trace!("created eventfd for notifier" ); |
428 | return Ok(Notifier::EventFd(fd)); |
429 | } |
430 | |
431 | Err(err) => { |
432 | tracing::warn!( |
433 | "eventfd() failed with error ( {}), falling back to pipe" , |
434 | err |
435 | ); |
436 | } |
437 | } |
438 | } |
439 | } |
440 | |
441 | let (read, write) = pipe_with(PipeFlags::CLOEXEC).or_else(|_| { |
442 | let (read, write) = pipe()?; |
443 | fcntl_setfd(&read, fcntl_getfd(&read)? | FdFlags::CLOEXEC)?; |
444 | fcntl_setfd(&write, fcntl_getfd(&write)? | FdFlags::CLOEXEC)?; |
445 | io::Result::Ok((read, write)) |
446 | })?; |
447 | |
448 | fcntl_setfl(&read, fcntl_getfl(&read)? | OFlags::NONBLOCK)?; |
449 | Ok(Notifier::Pipe { |
450 | read_pipe: read, |
451 | write_pipe: write, |
452 | }) |
453 | } |
454 | |
455 | /// The file descriptor to register in the poller. |
456 | fn as_fd(&self) -> BorrowedFd<'_> { |
457 | match self { |
458 | #[cfg (not(target_os = "redox" ))] |
459 | Notifier::EventFd(fd) => fd.as_fd(), |
460 | Notifier::Pipe { |
461 | read_pipe: read, .. |
462 | } => read.as_fd(), |
463 | } |
464 | } |
465 | |
466 | /// Notify the poller. |
467 | fn notify(&self) { |
468 | match self { |
469 | #[cfg (not(target_os = "redox" ))] |
470 | Self::EventFd(fd) => { |
471 | let buf: [u8; 8] = 1u64.to_ne_bytes(); |
472 | let _ = write(fd, &buf); |
473 | } |
474 | |
475 | Self::Pipe { write_pipe, .. } => { |
476 | write(write_pipe, &[0; 1]).ok(); |
477 | } |
478 | } |
479 | } |
480 | |
481 | /// Clear the notification. |
482 | fn clear(&self) { |
483 | match self { |
484 | #[cfg (not(target_os = "redox" ))] |
485 | Self::EventFd(fd) => { |
486 | let mut buf = [0u8; 8]; |
487 | let _ = read(fd, &mut buf); |
488 | } |
489 | |
490 | Self::Pipe { read_pipe, .. } => while read(read_pipe, &mut [0u8; 1024]).is_ok() {}, |
491 | } |
492 | } |
493 | } |
494 | |