1 | //! An encapsulation of `BufReader`'s buffer management logic. |
2 | //! |
3 | //! This module factors out the basic functionality of `BufReader` in order to protect two core |
4 | //! invariants: |
5 | //! * `filled` bytes of `buf` are always initialized |
6 | //! * `pos` is always <= `filled` |
7 | //! Since this module encapsulates the buffer management logic, we can ensure that the range |
8 | //! `pos..filled` is always a valid index into the initialized region of the buffer. This means |
9 | //! that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so |
10 | //! without encountering any runtime bounds checks. |
11 | |
12 | use crate::cmp; |
13 | use crate::io::{self, BorrowedBuf, ErrorKind, Read}; |
14 | use crate::mem::MaybeUninit; |
15 | |
16 | pub struct Buffer { |
17 | // The buffer. |
18 | buf: Box<[MaybeUninit<u8>]>, |
19 | // The current seek offset into `buf`, must always be <= `filled`. |
20 | pos: usize, |
21 | // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are |
22 | // initialized with bytes from a read. |
23 | filled: usize, |
24 | // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we |
25 | // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its |
26 | // defensive initialization as possible. Note that while this often the same as `filled`, it |
27 | // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and |
28 | // omitting this is a huge perf regression for `Read` impls that do not. |
29 | initialized: usize, |
30 | } |
31 | |
32 | impl Buffer { |
33 | #[inline ] |
34 | pub fn with_capacity(capacity: usize) -> Self { |
35 | let buf = Box::new_uninit_slice(capacity); |
36 | Self { buf, pos: 0, filled: 0, initialized: 0 } |
37 | } |
38 | |
39 | #[inline ] |
40 | pub fn try_with_capacity(capacity: usize) -> io::Result<Self> { |
41 | match Box::try_new_uninit_slice(capacity) { |
42 | Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), |
43 | Err(_) => { |
44 | Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer" )) |
45 | } |
46 | } |
47 | } |
48 | |
49 | #[inline ] |
50 | pub fn buffer(&self) -> &[u8] { |
51 | // SAFETY: self.pos and self.cap are valid, and self.cap => self.pos, and |
52 | // that region is initialized because those are all invariants of this type. |
53 | unsafe { self.buf.get_unchecked(self.pos..self.filled).assume_init_ref() } |
54 | } |
55 | |
56 | #[inline ] |
57 | pub fn capacity(&self) -> usize { |
58 | self.buf.len() |
59 | } |
60 | |
61 | #[inline ] |
62 | pub fn filled(&self) -> usize { |
63 | self.filled |
64 | } |
65 | |
66 | #[inline ] |
67 | pub fn pos(&self) -> usize { |
68 | self.pos |
69 | } |
70 | |
71 | // This is only used by a test which asserts that the initialization-tracking is correct. |
72 | #[cfg (test)] |
73 | pub fn initialized(&self) -> usize { |
74 | self.initialized |
75 | } |
76 | |
77 | #[inline ] |
78 | pub fn discard_buffer(&mut self) { |
79 | self.pos = 0; |
80 | self.filled = 0; |
81 | } |
82 | |
83 | #[inline ] |
84 | pub fn consume(&mut self, amt: usize) { |
85 | self.pos = cmp::min(self.pos + amt, self.filled); |
86 | } |
87 | |
88 | /// If there are `amt` bytes available in the buffer, pass a slice containing those bytes to |
89 | /// `visitor` and return true. If there are not enough bytes available, return false. |
90 | #[inline ] |
91 | pub fn consume_with<V>(&mut self, amt: usize, mut visitor: V) -> bool |
92 | where |
93 | V: FnMut(&[u8]), |
94 | { |
95 | if let Some(claimed) = self.buffer().get(..amt) { |
96 | visitor(claimed); |
97 | // If the indexing into self.buffer() succeeds, amt must be a valid increment. |
98 | self.pos += amt; |
99 | true |
100 | } else { |
101 | false |
102 | } |
103 | } |
104 | |
105 | #[inline ] |
106 | pub fn unconsume(&mut self, amt: usize) { |
107 | self.pos = self.pos.saturating_sub(amt); |
108 | } |
109 | |
110 | /// Read more bytes into the buffer without discarding any of its contents |
111 | pub fn read_more(&mut self, mut reader: impl Read) -> io::Result<usize> { |
112 | let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); |
113 | let old_init = self.initialized - self.filled; |
114 | unsafe { |
115 | buf.set_init(old_init); |
116 | } |
117 | reader.read_buf(buf.unfilled())?; |
118 | self.filled += buf.len(); |
119 | self.initialized += buf.init_len() - old_init; |
120 | Ok(buf.len()) |
121 | } |
122 | |
123 | /// Remove bytes that have already been read from the buffer. |
124 | pub fn backshift(&mut self) { |
125 | self.buf.copy_within(self.pos.., 0); |
126 | self.filled -= self.pos; |
127 | self.pos = 0; |
128 | } |
129 | |
130 | #[inline ] |
131 | pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> { |
132 | // If we've reached the end of our internal buffer then we need to fetch |
133 | // some more data from the reader. |
134 | // Branch using `>=` instead of the more correct `==` |
135 | // to tell the compiler that the pos..cap slice is always valid. |
136 | if self.pos >= self.filled { |
137 | debug_assert!(self.pos == self.filled); |
138 | |
139 | let mut buf = BorrowedBuf::from(&mut *self.buf); |
140 | // SAFETY: `self.filled` bytes will always have been initialized. |
141 | unsafe { |
142 | buf.set_init(self.initialized); |
143 | } |
144 | |
145 | let result = reader.read_buf(buf.unfilled()); |
146 | |
147 | self.pos = 0; |
148 | self.filled = buf.len(); |
149 | self.initialized = buf.init_len(); |
150 | |
151 | result?; |
152 | } |
153 | Ok(self.buffer()) |
154 | } |
155 | } |
156 | |