1use crate::{engine::Engine, DecodeError, PAD_BYTE};
2use std::{cmp, fmt, io};
3
4// This should be large, but it has to fit on the stack.
5pub(crate) const BUF_SIZE: usize = 1024;
6
7// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8const BASE64_CHUNK_SIZE: usize = 4;
9const DECODED_CHUNK_SIZE: usize = 3;
10
11/// A `Read` implementation that decodes base64 data read from an underlying reader.
12///
13/// # Examples
14///
15/// ```
16/// use std::io::Read;
17/// use std::io::Cursor;
18/// use base64::engine::general_purpose;
19///
20/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22/// let mut decoder = base64::read::DecoderReader::new(
23/// &mut wrapped_reader,
24/// &general_purpose::STANDARD);
25///
26/// // handle errors as you normally would
27/// let mut result = Vec::new();
28/// decoder.read_to_end(&mut result).unwrap();
29///
30/// assert_eq!(b"asdf", &result[..]);
31///
32/// ```
33pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34 engine: &'e E,
35 /// Where b64 data is read from
36 inner: R,
37
38 // Holds b64 data read from the delegate reader.
39 b64_buffer: [u8; BUF_SIZE],
40 // The start of the pending buffered data in b64_buffer.
41 b64_offset: usize,
42 // The amount of buffered b64 data.
43 b64_len: usize,
44 // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45 // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46 // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
47 // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48 // into here, which seems like a lot of complexity for 1 extra byte of storage.
49 decoded_buffer: [u8; DECODED_CHUNK_SIZE],
50 // index of start of decoded data
51 decoded_offset: usize,
52 // length of decoded data
53 decoded_len: usize,
54 // used to provide accurate offsets in errors
55 total_b64_decoded: usize,
56 // offset of previously seen padding, if any
57 padding_offset: Option<usize>,
58}
59
60impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 f&mut DebugStruct<'_, '_>.debug_struct("DecoderReader")
63 .field("b64_offset", &self.b64_offset)
64 .field("b64_len", &self.b64_len)
65 .field("decoded_buffer", &self.decoded_buffer)
66 .field("decoded_offset", &self.decoded_offset)
67 .field("decoded_len", &self.decoded_len)
68 .field("total_b64_decoded", &self.total_b64_decoded)
69 .field(name:"padding_offset", &self.padding_offset)
70 .finish()
71 }
72}
73
74impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
75 /// Create a new decoder that will read from the provided reader `r`.
76 pub fn new(reader: R, engine: &'e E) -> Self {
77 DecoderReader {
78 engine,
79 inner: reader,
80 b64_buffer: [0; BUF_SIZE],
81 b64_offset: 0,
82 b64_len: 0,
83 decoded_buffer: [0; DECODED_CHUNK_SIZE],
84 decoded_offset: 0,
85 decoded_len: 0,
86 total_b64_decoded: 0,
87 padding_offset: None,
88 }
89 }
90
91 /// Write as much as possible of the decoded buffer into the target buffer.
92 /// Must only be called when there is something to write and space to write into.
93 /// Returns a Result with the number of (decoded) bytes copied.
94 fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
95 debug_assert!(self.decoded_len > 0);
96 debug_assert!(!buf.is_empty());
97
98 let copy_len = cmp::min(self.decoded_len, buf.len());
99 debug_assert!(copy_len > 0);
100 debug_assert!(copy_len <= self.decoded_len);
101
102 buf[..copy_len].copy_from_slice(
103 &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
104 );
105
106 self.decoded_offset += copy_len;
107 self.decoded_len -= copy_len;
108
109 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
110
111 Ok(copy_len)
112 }
113
114 /// Read into the remaining space in the buffer after the current contents.
115 /// Must only be called when there is space to read into in the buffer.
116 /// Returns the number of bytes read.
117 fn read_from_delegate(&mut self) -> io::Result<usize> {
118 debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
119
120 let read = self
121 .inner
122 .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
123 self.b64_len += read;
124
125 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
126
127 Ok(read)
128 }
129
130 /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
131 /// caller's responsibility to choose the number of b64 bytes to decode correctly.
132 ///
133 /// Returns a Result with the number of decoded bytes written to `buf`.
134 fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
135 debug_assert!(self.b64_len >= b64_len_to_decode);
136 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
137 debug_assert!(!buf.is_empty());
138
139 let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
140 let decode_metadata = self
141 .engine
142 .internal_decode(
143 b64_to_decode,
144 buf,
145 self.engine.internal_decoded_len_estimate(b64_len_to_decode),
146 )
147 .map_err(|e| match e {
148 DecodeError::InvalidByte(offset, byte) => {
149 // This can be incorrect, but not in a way that probably matters to anyone:
150 // if there was padding handled in a previous decode, and we are now getting
151 // InvalidByte due to more padding, we should arguably report InvalidByte with
152 // PAD_BYTE at the original padding position (`self.padding_offset`), but we
153 // don't have a good way to tie those two cases together, so instead we
154 // just report the invalid byte as if the previous padding, and its possibly
155 // related downgrade to a now invalid byte, didn't happen.
156 DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
157 }
158 DecodeError::InvalidLength => DecodeError::InvalidLength,
159 DecodeError::InvalidLastSymbol(offset, byte) => {
160 DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
161 }
162 DecodeError::InvalidPadding => DecodeError::InvalidPadding,
163 })
164 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
165
166 if let Some(offset) = self.padding_offset {
167 // we've already seen padding
168 if decode_metadata.decoded_len > 0 {
169 // we read more after already finding padding; report error at first padding byte
170 return Err(io::Error::new(
171 io::ErrorKind::InvalidData,
172 DecodeError::InvalidByte(offset, PAD_BYTE),
173 ));
174 }
175 }
176
177 self.padding_offset = self.padding_offset.or(decode_metadata
178 .padding_offset
179 .map(|offset| self.total_b64_decoded + offset));
180 self.total_b64_decoded += b64_len_to_decode;
181 self.b64_offset += b64_len_to_decode;
182 self.b64_len -= b64_len_to_decode;
183
184 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
185
186 Ok(decode_metadata.decoded_len)
187 }
188
189 /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
190 /// input from.
191 ///
192 /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
193 /// unspecified. This function is mainly provided because the inner reader type may provide
194 /// additional functionality beyond the `Read` implementation which may still be useful.
195 pub fn into_inner(self) -> R {
196 self.inner
197 }
198}
199
200impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
201 /// Decode input from the wrapped reader.
202 ///
203 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
204 /// written in `buf`.
205 ///
206 /// Where possible, this function buffers base64 to minimize the number of read() calls to the
207 /// delegate reader.
208 ///
209 /// # Errors
210 ///
211 /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
212 /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
213 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
214 if buf.is_empty() {
215 return Ok(0);
216 }
217
218 // offset == BUF_SIZE when we copied it all last time
219 debug_assert!(self.b64_offset <= BUF_SIZE);
220 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
221 debug_assert!(if self.b64_offset == BUF_SIZE {
222 self.b64_len == 0
223 } else {
224 self.b64_len <= BUF_SIZE
225 });
226
227 debug_assert!(if self.decoded_len == 0 {
228 // can be = when we were able to copy the complete chunk
229 self.decoded_offset <= DECODED_CHUNK_SIZE
230 } else {
231 self.decoded_offset < DECODED_CHUNK_SIZE
232 });
233
234 // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
235 // byte into the provided buf, so the effective length should only be 3 momentarily between
236 // when we decode and when we copy into the target buffer.
237 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
238 debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
239
240 if self.decoded_len > 0 {
241 // we have a few leftover decoded bytes; flush that rather than pull in more b64
242 self.flush_decoded_buf(buf)
243 } else {
244 let mut at_eof = false;
245 while self.b64_len < BASE64_CHUNK_SIZE {
246 // Copy any bytes we have to the start of the buffer.
247 self.b64_buffer
248 .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
249 self.b64_offset = 0;
250
251 // then fill in more data
252 let read = self.read_from_delegate()?;
253 if read == 0 {
254 // we never read into an empty buf, so 0 => we've hit EOF
255 at_eof = true;
256 break;
257 }
258 }
259
260 if self.b64_len == 0 {
261 debug_assert!(at_eof);
262 // we must be at EOF, and we have no data left to decode
263 return Ok(0);
264 };
265
266 debug_assert!(if at_eof {
267 // if we are at eof, we may not have a complete chunk
268 self.b64_len > 0
269 } else {
270 // otherwise, we must have at least one chunk
271 self.b64_len >= BASE64_CHUNK_SIZE
272 });
273
274 debug_assert_eq!(0, self.decoded_len);
275
276 if buf.len() < DECODED_CHUNK_SIZE {
277 // caller requested an annoyingly short read
278 // have to write to a tmp buf first to avoid double mutable borrow
279 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
280 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
281 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
282 // tokens, not 1, since 1 token can't decode to 1 byte).
283 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
284
285 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
286 self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
287
288 self.decoded_offset = 0;
289 self.decoded_len = decoded;
290
291 // can be less than 3 on last block due to padding
292 debug_assert!(decoded <= 3);
293
294 self.flush_decoded_buf(buf)
295 } else {
296 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
297 .checked_mul(BASE64_CHUNK_SIZE)
298 .expect("too many chunks");
299 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
300
301 let b64_bytes_available_to_decode = if at_eof {
302 self.b64_len
303 } else {
304 // only use complete chunks
305 self.b64_len - self.b64_len % 4
306 };
307
308 let actual_decode_len = cmp::min(
309 b64_bytes_that_can_decode_into_buf,
310 b64_bytes_available_to_decode,
311 );
312 self.decode_to_buf(actual_decode_len, buf)
313 }
314 }
315 }
316}
317