decoder.rs source code [crates/base64/src/read/decoder.rs]

1	use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE};
2	use std::{cmp, fmt, io};
3
4	// This should be large, but it has to fit on the stack.
5	pub(crate) const BUF_SIZE: usize = `1024`;
6
7	// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8	const BASE64_CHUNK_SIZE: usize = `4`;
9	const DECODED_CHUNK_SIZE: usize = `3`;
10
11	/// A `Read` implementation that decodes base64 data read from an underlying reader.
12	///
13	/// # Examples
14	///
15	/// ```
16	/// use std::io::Read;
17	/// use std::io::Cursor;
18	/// use base64::engine::general_purpose;
19	///
20	/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21	/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22	/// let mut decoder = base64::read::DecoderReader::new(
23	/// &mut wrapped_reader,
24	/// &general_purpose::STANDARD);
25	///
26	/// // handle errors as you normally would
27	/// let mut result = Vec::new();
28	/// decoder.read_to_end(&mut result).unwrap();
29	///
30	/// assert_eq!(b"asdf", &result[..]);
31	///
32	/// ```
33	pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34	engine: &'e E,
35	/// Where b64 data is read from
36	inner: R,
37
38	/// Holds b64 data read from the delegate reader.
39	b64_buffer: [u8; BUF_SIZE],
40	/// The start of the pending buffered data in `b64_buffer`.
41	b64_offset: usize,
42	/// The amount of buffered b64 data after `b64_offset` in `b64_len`.
43	b64_len: usize,
44	/// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45	/// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46	/// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to
47	/// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48	/// into here, which seems like a lot of complexity for 1 extra byte of storage.
49	decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE],
50	/// Index of start of decoded data in `decoded_chunk_buffer`
51	decoded_offset: usize,
52	/// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer`
53	decoded_len: usize,
54	/// Input length consumed so far.
55	/// Used to provide accurate offsets in errors
56	input_consumed_len: usize,
57	/// offset of previously seen padding, if any
58	padding_offset: Option<usize>,
59	}
60
61	// exclude b64_buffer as it's uselessly large
62	impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
63	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64	f&mut DebugStruct<'_, '_>.debug_struct("DecoderReader")
65	.field("b64_offset", &self.b64_offset)
66	.field("b64_len", &self.b64_len)
67	.field("decoded_chunk_buffer", &self.decoded_chunk_buffer)
68	.field("decoded_offset", &self.decoded_offset)
69	.field("decoded_len", &self.decoded_len)
70	.field("input_consumed_len", &self.input_consumed_len)
71	.field(name:"padding_offset", &self.padding_offset)
72	.finish()
73	}
74	}
75
76	impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
77	/// Create a new decoder that will read from the provided reader `r`.
78	pub fn new(reader: R, engine: &'e E) -> Self {
79	DecoderReader {
80	engine,
81	inner: reader,
82	b64_buffer: [`0`; BUF_SIZE],
83	b64_offset: `0`,
84	b64_len: `0`,
85	decoded_chunk_buffer: [`0`; DECODED_CHUNK_SIZE],
86	decoded_offset: `0`,
87	decoded_len: `0`,
88	input_consumed_len: `0`,
89	padding_offset: None,
90	}
91	}
92
93	/// Write as much as possible of the decoded buffer into the target buffer.
94	/// Must only be called when there is something to write and space to write into.
95	/// Returns a Result with the number of (decoded) bytes copied.
96	fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97	debug_assert!(self.decoded_len > `0`);
98	debug_assert!(!buf.is_empty());
99
100	let copy_len = cmp::min(self.decoded_len, buf.len());
101	debug_assert!(copy_len > `0`);
102	debug_assert!(copy_len <= self.decoded_len);
103
104	buf[..copy_len].copy_from_slice(
105	&self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len],
106	);
107
108	self.decoded_offset += copy_len;
109	self.decoded_len -= copy_len;
110
111	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
112
113	Ok(copy_len)
114	}
115
116	/// Read into the remaining space in the buffer after the current contents.
117	/// Must only be called when there is space to read into in the buffer.
118	/// Returns the number of bytes read.
119	fn read_from_delegate(&mut self) -> io::Result<usize> {
120	debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
121
122	let read = self
123	.inner
124	.read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
125	self.b64_len += read;
126
127	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
128
129	Ok(read)
130	}
131
132	/// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
133	/// caller's responsibility to choose the number of b64 bytes to decode correctly.
134	///
135	/// Returns a Result with the number of decoded bytes written to `buf`.
136	///
137	/// # Panics
138	///
139	/// panics if `buf` is too small
140	fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
141	debug_assert!(self.b64_len >= b64_len_to_decode);
142	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
143	debug_assert!(!buf.is_empty());
144
145	let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
146	let decode_metadata = self
147	.engine
148	.internal_decode(
149	b64_to_decode,
150	buf,
151	self.engine.internal_decoded_len_estimate(b64_len_to_decode),
152	)
153	.map_err(\|dse\| match dse {
154	DecodeSliceError::DecodeError(de) => {
155	match de {
156	DecodeError::InvalidByte(offset, byte) => {
157	match (byte, self.padding_offset) {
158	// if there was padding in a previous block of decoding that happened to
159	// be correct, and we now find more padding that happens to be incorrect,
160	// to be consistent with non-reader decodes, record the error at the first
161	// padding
162	(PAD_BYTE, Some(first_pad_offset)) => {
163	DecodeError::InvalidByte(first_pad_offset, PAD_BYTE)
164	}
165	_ => {
166	DecodeError::InvalidByte(self.input_consumed_len + offset, byte)
167	}
168	}
169	}
170	DecodeError::InvalidLength(len) => {
171	DecodeError::InvalidLength(self.input_consumed_len + len)
172	}
173	DecodeError::InvalidLastSymbol(offset, byte) => {
174	DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte)
175	}
176	DecodeError::InvalidPadding => DecodeError::InvalidPadding,
177	}
178	}
179	DecodeSliceError::OutputSliceTooSmall => {
180	unreachable!("buf is sized correctly in calling code")
181	}
182	})
183	.map_err(\|e\| io::Error::new(io::ErrorKind::InvalidData, e))?;
184
185	if let Some(offset) = self.padding_offset {
186	// we've already seen padding
187	if decode_metadata.decoded_len > `0` {
188	// we read more after already finding padding; report error at first padding byte
189	return Err(io::Error::new(
190	io::ErrorKind::InvalidData,
191	DecodeError::InvalidByte(offset, PAD_BYTE),
192	));
193	}
194	}
195
196	self.padding_offset = self.padding_offset.or(decode_metadata
197	.padding_offset
198	.map(\|offset\| self.input_consumed_len + offset));
199	self.input_consumed_len += b64_len_to_decode;
200	self.b64_offset += b64_len_to_decode;
201	self.b64_len -= b64_len_to_decode;
202
203	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
204
205	Ok(decode_metadata.decoded_len)
206	}
207
208	/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
209	/// input from.
210	///
211	/// Because `DecoderReader` performs internal buffering, the state of the inner reader is
212	/// unspecified. This function is mainly provided because the inner reader type may provide
213	/// additional functionality beyond the `Read` implementation which may still be useful.
214	pub fn into_inner(self) -> R {
215	self.inner
216	}
217	}
218
219	impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
220	/// Decode input from the wrapped reader.
221	///
222	/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
223	/// written in `buf`.
224	///
225	/// Where possible, this function buffers base64 to minimize the number of read() calls to the
226	/// delegate reader.
227	///
228	/// # Errors
229	///
230	/// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
231	/// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
232	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
233	if buf.is_empty() {
234	return Ok(`0`);
235	}
236
237	// offset == BUF_SIZE when we copied it all last time
238	debug_assert!(self.b64_offset <= BUF_SIZE);
239	debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
240	debug_assert!(if self.b64_offset == BUF_SIZE {
241	self.b64_len == `0`
242	} else {
243	self.b64_len <= BUF_SIZE
244	});
245
246	debug_assert!(if self.decoded_len == `0` {
247	// can be = when we were able to copy the complete chunk
248	self.decoded_offset <= DECODED_CHUNK_SIZE
249	} else {
250	self.decoded_offset < DECODED_CHUNK_SIZE
251	});
252
253	// We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
254	// byte into the provided buf, so the effective length should only be 3 momentarily between
255	// when we decode and when we copy into the target buffer.
256	debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
257	debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
258
259	if self.decoded_len > `0` {
260	// we have a few leftover decoded bytes; flush that rather than pull in more b64
261	self.flush_decoded_buf(buf)
262	} else {
263	let mut at_eof = `false`;
264	while self.b64_len < BASE64_CHUNK_SIZE {
265	// Copy any bytes we have to the start of the buffer.
266	self.b64_buffer
267	.copy_within(self.b64_offset..self.b64_offset + self.b64_len, `0`);
268	self.b64_offset = `0`;
269
270	// then fill in more data
271	let read = self.read_from_delegate()?;
272	if read == `0` {
273	// we never read into an empty buf, so 0 => we've hit EOF
274	at_eof = `true`;
275	break;
276	}
277	}
278
279	if self.b64_len == `0` {
280	debug_assert!(at_eof);
281	// we must be at EOF, and we have no data left to decode
282	return Ok(`0`);
283	};
284
285	debug_assert!(if at_eof {
286	// if we are at eof, we may not have a complete chunk
287	self.b64_len > `0`
288	} else {
289	// otherwise, we must have at least one chunk
290	self.b64_len >= BASE64_CHUNK_SIZE
291	});
292
293	debug_assert_eq!(`0`, self.decoded_len);
294
295	if buf.len() < DECODED_CHUNK_SIZE {
296	// caller requested an annoyingly short read
297	// have to write to a tmp buf first to avoid double mutable borrow
298	let mut decoded_chunk = [`0_u8`; DECODED_CHUNK_SIZE];
299	// if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
300	// to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
301	// tokens, not 1, since 1 token can't decode to 1 byte).
302	let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
303
304	let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
305	self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
306
307	self.decoded_offset = `0`;
308	self.decoded_len = decoded;
309
310	// can be less than 3 on last block due to padding
311	debug_assert!(decoded <= `3`);
312
313	self.flush_decoded_buf(buf)
314	} else {
315	let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
316	.checked_mul(BASE64_CHUNK_SIZE)
317	.expect("too many chunks");
318	debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
319
320	let b64_bytes_available_to_decode = if at_eof {
321	self.b64_len
322	} else {
323	// only use complete chunks
324	self.b64_len - self.b64_len % `4`
325	};
326
327	let actual_decode_len = cmp::min(
328	b64_bytes_that_can_decode_into_buf,
329	b64_bytes_available_to_decode,
330	);
331	self.decode_to_buf(actual_decode_len, buf)
332	}
333	}
334	}
335	}
336