decoder.rs source code [crates/base64/src/read/decoder.rs]

1	use crate::{engine::Engine, DecodeError, PAD_BYTE};
2	use std::{cmp, fmt, io};
3
4	// This should be large, but it has to fit on the stack.
5	pub(crate) const BUF_SIZE: usize = `1024`;
6
7	// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8	const BASE64_CHUNK_SIZE: usize = `4`;
9	const DECODED_CHUNK_SIZE: usize = `3`;
10
11	/// A `Read` implementation that decodes base64 data read from an underlying reader.
12	///
13	/// # Examples
14	///
15	/// ```
16	/// use std::io::Read;
17	/// use std::io::Cursor;
18	/// use base64::engine::general_purpose;
19	///
20	/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21	/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22	/// let mut decoder = base64::read::DecoderReader::new(
23	/// &mut wrapped_reader,
24	/// &general_purpose::STANDARD);
25	///
26	/// // handle errors as you normally would
27	/// let mut result = Vec::new();
28	/// decoder.read_to_end(&mut result).unwrap();
29	///
30	/// assert_eq!(b"asdf", &result[..]);
31	///
32	/// ```
33	pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34	engine: &'e E,
35	/// Where b64 data is read from
36	inner: R,
37
38	// Holds b64 data read from the delegate reader.
39	b64_buffer: [u8; BUF_SIZE],
40	// The start of the pending buffered data in b64_buffer.
41	b64_offset: usize,
42	// The amount of buffered b64 data.
43	b64_len: usize,
44	// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45	// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46	// Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
47	// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48	// into here, which seems like a lot of complexity for 1 extra byte of storage.
49	decoded_buffer: [u8; DECODED_CHUNK_SIZE],
50	// index of start of decoded data
51	decoded_offset: usize,
52	// length of decoded data
53	decoded_len: usize,
54	// used to provide accurate offsets in errors
55	total_b64_decoded: usize,
56	// offset of previously seen padding, if any
57	padding_offset: Option<usize>,
58	}
59
60	impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
61	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62	f&mut DebugStruct<'_, '_>.debug_struct("DecoderReader")
63	.field("b64_offset", &self.b64_offset)
64	.field("b64_len", &self.b64_len)
65	.field("decoded_buffer", &self.decoded_buffer)
66	.field("decoded_offset", &self.decoded_offset)
67	.field("decoded_len", &self.decoded_len)
68	.field("total_b64_decoded", &self.total_b64_decoded)
69	.field(name:"padding_offset", &self.padding_offset)
70	.finish()
71	}
72	}
73
74	impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
75	/// Create a new decoder that will read from the provided reader `r`.
76	pub fn new(reader: R, engine: &'e E) -> Self {
77	DecoderReader {
78	engine,
79	inner: reader,
80	b64_buffer: [`0`; BUF_SIZE],
81	b64_offset: `0`,
82	b64_len: `0`,
83	decoded_buffer: [`0`; DECODED_CHUNK_SIZE],
84	decoded_offset: `0`,
85	decoded_len: `0`,
86	total_b64_decoded: `0`,
87	padding_offset: None,
88	}
89	}
90
91	/// Write as much as possible of the decoded buffer into the target buffer.
92	/// Must only be called when there is something to write and space to write into.
93	/// Returns a Result with the number of (decoded) bytes copied.
94	fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
95	debug_assert!(self.decoded_len > `0`);
96	debug_assert!(!buf.is_empty());
97
98	let copy_len = cmp::min(self.decoded_len, buf.len());
99	debug_assert!(copy_len > `0`);
100	debug_assert!(copy_len <= self.decoded_len);
101
102	buf[..copy_len].copy_from_slice(
103	&self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
104	);
105
106	self.decoded_offset += copy_len;
107	self.decoded_len -= copy_len;
108
109	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
110
111	Ok(copy_len)
112	}
113
114	/// Read into the remaining space in the buffer after the current contents.
115	/// Must only be called when there is space to read into in the buffer.
116	/// Returns the number of bytes read.
117	fn read_from_delegate(&mut self) -> io::Result<usize> {
118	debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
119
120	let read = self
121	.inner
122	.read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
123	self.b64_len += read;
124
125	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
126
127	Ok(read)
128	}
129
130	/// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
131	/// caller's responsibility to choose the number of b64 bytes to decode correctly.
132	///
133	/// Returns a Result with the number of decoded bytes written to `buf`.
134	fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
135	debug_assert!(self.b64_len >= b64_len_to_decode);
136	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
137	debug_assert!(!buf.is_empty());
138
139	let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
140	let decode_metadata = self
141	.engine
142	.internal_decode(
143	b64_to_decode,
144	buf,
145	self.engine.internal_decoded_len_estimate(b64_len_to_decode),
146	)
147	.map_err(\|e\| match e {
148	DecodeError::InvalidByte(offset, byte) => {
149	// This can be incorrect, but not in a way that probably matters to anyone:
150	// if there was padding handled in a previous decode, and we are now getting
151	// InvalidByte due to more padding, we should arguably report InvalidByte with
152	// PAD_BYTE at the original padding position (`self.padding_offset`), but we
153	// don't have a good way to tie those two cases together, so instead we
154	// just report the invalid byte as if the previous padding, and its possibly
155	// related downgrade to a now invalid byte, didn't happen.
156	DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
157	}
158	DecodeError::InvalidLength => DecodeError::InvalidLength,
159	DecodeError::InvalidLastSymbol(offset, byte) => {
160	DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
161	}
162	DecodeError::InvalidPadding => DecodeError::InvalidPadding,
163	})
164	.map_err(\|e\| io::Error::new(io::ErrorKind::InvalidData, e))?;
165
166	if let Some(offset) = self.padding_offset {
167	// we've already seen padding
168	if decode_metadata.decoded_len > `0` {
169	// we read more after already finding padding; report error at first padding byte
170	return Err(io::Error::new(
171	io::ErrorKind::InvalidData,
172	DecodeError::InvalidByte(offset, PAD_BYTE),
173	));
174	}
175	}
176
177	self.padding_offset = self.padding_offset.or(decode_metadata
178	.padding_offset
179	.map(\|offset\| self.total_b64_decoded + offset));
180	self.total_b64_decoded += b64_len_to_decode;
181	self.b64_offset += b64_len_to_decode;
182	self.b64_len -= b64_len_to_decode;
183
184	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
185
186	Ok(decode_metadata.decoded_len)
187	}
188
189	/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
190	/// input from.
191	///
192	/// Because `DecoderReader` performs internal buffering, the state of the inner reader is
193	/// unspecified. This function is mainly provided because the inner reader type may provide
194	/// additional functionality beyond the `Read` implementation which may still be useful.
195	pub fn into_inner(self) -> R {
196	self.inner
197	}
198	}
199
200	impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
201	/// Decode input from the wrapped reader.
202	///
203	/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
204	/// written in `buf`.
205	///
206	/// Where possible, this function buffers base64 to minimize the number of read() calls to the
207	/// delegate reader.
208	///
209	/// # Errors
210	///
211	/// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
212	/// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
213	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
214	if buf.is_empty() {
215	return Ok(`0`);
216	}
217
218	// offset == BUF_SIZE when we copied it all last time
219	debug_assert!(self.b64_offset <= BUF_SIZE);
220	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
221	debug_assert!(if self.b64_offset == BUF_SIZE {
222	self.b64_len == `0`
223	} else {
224	self.b64_len <= BUF_SIZE
225	});
226
227	debug_assert!(if self.decoded_len == `0` {
228	// can be = when we were able to copy the complete chunk
229	self.decoded_offset <= DECODED_CHUNK_SIZE
230	} else {
231	self.decoded_offset < DECODED_CHUNK_SIZE
232	});
233
234	// We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
235	// byte into the provided buf, so the effective length should only be 3 momentarily between
236	// when we decode and when we copy into the target buffer.
237	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
238	debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
239
240	if self.decoded_len > `0` {
241	// we have a few leftover decoded bytes; flush that rather than pull in more b64
242	self.flush_decoded_buf(buf)
243	} else {
244	let mut at_eof = `false`;
245	while self.b64_len < BASE64_CHUNK_SIZE {
246	// Copy any bytes we have to the start of the buffer.
247	self.b64_buffer
248	.copy_within(self.b64_offset..self.b64_offset + self.b64_len, `0`);
249	self.b64_offset = `0`;
250
251	// then fill in more data
252	let read = self.read_from_delegate()?;
253	if read == `0` {
254	// we never read into an empty buf, so 0 => we've hit EOF
255	at_eof = `true`;
256	break;
257	}
258	}
259
260	if self.b64_len == `0` {
261	debug_assert!(at_eof);
262	// we must be at EOF, and we have no data left to decode
263	return Ok(`0`);
264	};
265
266	debug_assert!(if at_eof {
267	// if we are at eof, we may not have a complete chunk
268	self.b64_len > `0`
269	} else {
270	// otherwise, we must have at least one chunk
271	self.b64_len >= BASE64_CHUNK_SIZE
272	});
273
274	debug_assert_eq!(`0`, self.decoded_len);
275
276	if buf.len() < DECODED_CHUNK_SIZE {
277	// caller requested an annoyingly short read
278	// have to write to a tmp buf first to avoid double mutable borrow
279	let mut decoded_chunk = [`0_u8`; DECODED_CHUNK_SIZE];
280	// if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
281	// to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
282	// tokens, not 1, since 1 token can't decode to 1 byte).
283	let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
284
285	let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
286	self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
287
288	self.decoded_offset = `0`;
289	self.decoded_len = decoded;
290
291	// can be less than 3 on last block due to padding
292	debug_assert!(decoded <= `3`);
293
294	self.flush_decoded_buf(buf)
295	} else {
296	let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
297	.checked_mul(BASE64_CHUNK_SIZE)
298	.expect("too many chunks");
299	debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
300
301	let b64_bytes_available_to_decode = if at_eof {
302	self.b64_len
303	} else {
304	// only use complete chunks
305	self.b64_len - self.b64_len % `4`
306	};
307
308	let actual_decode_len = cmp::min(
309	b64_bytes_that_can_decode_into_buf,
310	b64_bytes_available_to_decode,
311	);
312	self.decode_to_buf(actual_decode_len, buf)
313	}
314	}
315	}
316	}
317