| 1 | //! An encapsulation of `BufReader`'s buffer management logic. |
| 2 | //! |
| 3 | //! This module factors out the basic functionality of `BufReader` in order to protect two core |
| 4 | //! invariants: |
| 5 | //! * `filled` bytes of `buf` are always initialized |
| 6 | //! * `pos` is always <= `filled` |
| 7 | //! Since this module encapsulates the buffer management logic, we can ensure that the range |
| 8 | //! `pos..filled` is always a valid index into the initialized region of the buffer. This means |
| 9 | //! that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so |
| 10 | //! without encountering any runtime bounds checks. |
| 11 | |
| 12 | use crate::cmp; |
| 13 | use crate::io::{self, BorrowedBuf, ErrorKind, Read}; |
| 14 | use crate::mem::MaybeUninit; |
| 15 | |
| 16 | pub struct Buffer { |
| 17 | // The buffer. |
| 18 | buf: Box<[MaybeUninit<u8>]>, |
| 19 | // The current seek offset into `buf`, must always be <= `filled`. |
| 20 | pos: usize, |
| 21 | // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are |
| 22 | // initialized with bytes from a read. |
| 23 | filled: usize, |
| 24 | // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we |
| 25 | // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its |
| 26 | // defensive initialization as possible. Note that while this often the same as `filled`, it |
| 27 | // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and |
| 28 | // omitting this is a huge perf regression for `Read` impls that do not. |
| 29 | initialized: usize, |
| 30 | } |
| 31 | |
| 32 | impl Buffer { |
| 33 | #[inline ] |
| 34 | pub fn with_capacity(capacity: usize) -> Self { |
| 35 | let buf = Box::new_uninit_slice(capacity); |
| 36 | Self { buf, pos: 0, filled: 0, initialized: 0 } |
| 37 | } |
| 38 | |
| 39 | #[inline ] |
| 40 | pub fn try_with_capacity(capacity: usize) -> io::Result<Self> { |
| 41 | match Box::try_new_uninit_slice(capacity) { |
| 42 | Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), |
| 43 | Err(_) => { |
| 44 | Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer" )) |
| 45 | } |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | #[inline ] |
| 50 | pub fn buffer(&self) -> &[u8] { |
| 51 | // SAFETY: self.pos and self.cap are valid, and self.cap => self.pos, and |
| 52 | // that region is initialized because those are all invariants of this type. |
| 53 | unsafe { self.buf.get_unchecked(self.pos..self.filled).assume_init_ref() } |
| 54 | } |
| 55 | |
| 56 | #[inline ] |
| 57 | pub fn capacity(&self) -> usize { |
| 58 | self.buf.len() |
| 59 | } |
| 60 | |
| 61 | #[inline ] |
| 62 | pub fn filled(&self) -> usize { |
| 63 | self.filled |
| 64 | } |
| 65 | |
| 66 | #[inline ] |
| 67 | pub fn pos(&self) -> usize { |
| 68 | self.pos |
| 69 | } |
| 70 | |
| 71 | // This is only used by a test which asserts that the initialization-tracking is correct. |
| 72 | #[cfg (test)] |
| 73 | pub fn initialized(&self) -> usize { |
| 74 | self.initialized |
| 75 | } |
| 76 | |
| 77 | #[inline ] |
| 78 | pub fn discard_buffer(&mut self) { |
| 79 | self.pos = 0; |
| 80 | self.filled = 0; |
| 81 | } |
| 82 | |
| 83 | #[inline ] |
| 84 | pub fn consume(&mut self, amt: usize) { |
| 85 | self.pos = cmp::min(self.pos + amt, self.filled); |
| 86 | } |
| 87 | |
| 88 | /// If there are `amt` bytes available in the buffer, pass a slice containing those bytes to |
| 89 | /// `visitor` and return true. If there are not enough bytes available, return false. |
| 90 | #[inline ] |
| 91 | pub fn consume_with<V>(&mut self, amt: usize, mut visitor: V) -> bool |
| 92 | where |
| 93 | V: FnMut(&[u8]), |
| 94 | { |
| 95 | if let Some(claimed) = self.buffer().get(..amt) { |
| 96 | visitor(claimed); |
| 97 | // If the indexing into self.buffer() succeeds, amt must be a valid increment. |
| 98 | self.pos += amt; |
| 99 | true |
| 100 | } else { |
| 101 | false |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | #[inline ] |
| 106 | pub fn unconsume(&mut self, amt: usize) { |
| 107 | self.pos = self.pos.saturating_sub(amt); |
| 108 | } |
| 109 | |
| 110 | /// Read more bytes into the buffer without discarding any of its contents |
| 111 | pub fn read_more(&mut self, mut reader: impl Read) -> io::Result<usize> { |
| 112 | let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); |
| 113 | let old_init = self.initialized - self.filled; |
| 114 | unsafe { |
| 115 | buf.set_init(old_init); |
| 116 | } |
| 117 | reader.read_buf(buf.unfilled())?; |
| 118 | self.filled += buf.len(); |
| 119 | self.initialized += buf.init_len() - old_init; |
| 120 | Ok(buf.len()) |
| 121 | } |
| 122 | |
| 123 | /// Remove bytes that have already been read from the buffer. |
| 124 | pub fn backshift(&mut self) { |
| 125 | self.buf.copy_within(self.pos.., 0); |
| 126 | self.filled -= self.pos; |
| 127 | self.pos = 0; |
| 128 | } |
| 129 | |
| 130 | #[inline ] |
| 131 | pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> { |
| 132 | // If we've reached the end of our internal buffer then we need to fetch |
| 133 | // some more data from the reader. |
| 134 | // Branch using `>=` instead of the more correct `==` |
| 135 | // to tell the compiler that the pos..cap slice is always valid. |
| 136 | if self.pos >= self.filled { |
| 137 | debug_assert!(self.pos == self.filled); |
| 138 | |
| 139 | let mut buf = BorrowedBuf::from(&mut *self.buf); |
| 140 | // SAFETY: `self.filled` bytes will always have been initialized. |
| 141 | unsafe { |
| 142 | buf.set_init(self.initialized); |
| 143 | } |
| 144 | |
| 145 | let result = reader.read_buf(buf.unfilled()); |
| 146 | |
| 147 | self.pos = 0; |
| 148 | self.filled = buf.len(); |
| 149 | self.initialized = buf.init_len(); |
| 150 | |
| 151 | result?; |
| 152 | } |
| 153 | Ok(self.buffer()) |
| 154 | } |
| 155 | } |
| 156 | |