encoder.rs source code [crates/base64/src/write/encoder.rs]

1	use crate::engine::Engine;
2	use std::{
3	cmp, fmt, io,
4	io::{ErrorKind, Result},
5	};
6
7	pub(crate) const BUF_SIZE: usize = `1024`;
8	/// The most bytes whose encoding will fit in `BUF_SIZE`
9	const MAX_INPUT_LEN: usize = BUF_SIZE / `4` * `3`;
10	// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
11	const MIN_ENCODE_CHUNK_SIZE: usize = `3`;
12
13	/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
14	///
15	/// Because base64 has special handling for the end of the input data (padding, etc), there's a
16	/// `finish()` method on this type that encodes any leftover input bytes and adds padding if
17	/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
18	/// any error that occurs when invoking the underlying writer will be suppressed. If you want to
19	/// handle such errors, call `finish()` yourself.
20	///
21	/// # Examples
22	///
23	/// ```
24	/// use std::io::Write;
25	/// use base64::engine::general_purpose;
26	///
27	/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
28	/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD);
29	///
30	/// // handle errors as you normally would
31	/// enc.write_all(b"asdf").unwrap();
32	///
33	/// // could leave this out to be called by Drop, if you don't care
34	/// // about handling errors or getting the delegate writer back
35	/// let delegate = enc.finish().unwrap();
36	///
37	/// // base64 was written to the writer
38	/// assert_eq!(b"YXNkZg==", &delegate[..]);
39	///
40	/// ```
41	///
42	/// # Panics
43	///
44	/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
45	/// error is invalid and will panic.
46	///
47	/// # Errors
48	///
49	/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
50	/// returned as per the contract of `Write`.
51	///
52	/// # Performance
53	///
54	/// It has some minor performance loss compared to encoding slices (a couple percent).
55	/// It does not do any heap allocation.
56	///
57	/// # Limitations
58	///
59	/// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their
60	/// implications for a buffering implementation, these methods may not behave as expected. In
61	/// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`.
62	/// See the documentation of the `Write` trait implementation for further details.
63	pub struct EncoderWriter<'e, E: Engine, W: io::Write> {
64	engine: &'e E,
65	/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
66	/// called so that finish() can return the underlying writer. None implies that finish() has
67	/// been called successfully.
68	delegate: Option<W>,
69	/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
70	/// with the next `write()`, encode it, then proceed with the rest of the input normally.
71	extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
72	/// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
73	extra_input_occupied_len: usize,
74	/// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
75	/// did not write last time.
76	output: [u8; BUF_SIZE],
77	/// How much of `output` is occupied with encoded data that couldn't be written last time
78	output_occupied_len: usize,
79	/// panic safety: don't write again in destructor if writer panicked while we were writing to it
80	panicked: bool,
81	}
82
83	impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> {
84	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85	write!(
86	f,
87	"extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
88	self.extra_input,
89	self.extra_input_occupied_len,
90	&self.output[`0`..`5`],
91	self.output_occupied_len
92	)
93	}
94	}
95
96	impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> {
97	/// Create a new encoder that will write to the provided delegate writer.
98	pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> {
99	EncoderWriter {
100	engine,
101	delegate: Some(delegate),
102	extra_input: [`0u8`; MIN_ENCODE_CHUNK_SIZE],
103	extra_input_occupied_len: `0`,
104	output: [`0u8`; BUF_SIZE],
105	output_occupied_len: `0`,
106	panicked: `false`,
107	}
108	}
109
110	/// Encode all remaining buffered data and write it, including any trailing incomplete input
111	/// triples and associated padding.
112	///
113	/// Once this succeeds, no further writes or calls to this method are allowed.
114	///
115	/// This may write to the delegate writer multiple times if the delegate writer does not accept
116	/// all input provided to its `write` each invocation.
117	///
118	/// If you don't care about error handling, it is not necessary to call this function, as the
119	/// equivalent finalization is done by the Drop impl.
120	///
121	/// Returns the writer that this was constructed around.
122	///
123	/// # Errors
124	///
125	/// The first error that is not of `ErrorKind::Interrupted` will be returned.
126	pub fn finish(&mut self) -> Result<W> {
127	// If we could consume self in finish(), we wouldn't have to worry about this case, but
128	// finish() is retryable in the face of I/O errors, so we can't consume here.
129	if self.delegate.is_none() {
130	panic!("Encoder has already had finish() called");
131	};
132
133	self.write_final_leftovers()?;
134
135	let writer = self.delegate.take().expect("Writer must be present");
136
137	Ok(writer)
138	}
139
140	/// Write any remaining buffered data to the delegate writer.
141	fn write_final_leftovers(&mut self) -> Result<()> {
142	if self.delegate.is_none() {
143	// finish() has already successfully called this, and we are now in drop() with a None
144	// writer, so just no-op
145	return Ok(());
146	}
147
148	self.write_all_encoded_output()?;
149
150	if self.extra_input_occupied_len > `0` {
151	let encoded_len = self
152	.engine
153	.encode_slice(
154	&self.extra_input[..self.extra_input_occupied_len],
155	&mut self.output[..],
156	)
157	.expect("buffer is large enough");
158
159	self.output_occupied_len = encoded_len;
160
161	self.write_all_encoded_output()?;
162
163	// write succeeded, do not write the encoding of extra again if finish() is retried
164	self.extra_input_occupied_len = `0`;
165	}
166
167	Ok(())
168	}
169
170	/// Write as much of the encoded output to the delegate writer as it will accept, and store the
171	/// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
172	///
173	/// # Errors
174	///
175	/// Errors from the delegate writer are returned. In the case of an error,
176	/// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
177	/// that no write took place.
178	fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
179	self.panicked = `true`;
180	let res = self
181	.delegate
182	.as_mut()
183	.expect("Writer must be present")
184	.write(&self.output[..current_output_len]);
185	self.panicked = `false`;
186
187	res.map(\|consumed\| {
188	debug_assert!(consumed <= current_output_len);
189
190	if consumed < current_output_len {
191	self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
192	// If we're blocking on I/O, the minor inefficiency of copying bytes to the
193	// start of the buffer is the least of our concerns...
194	// TODO Rotate moves more than we need to; copy_within now stable.
195	self.output.rotate_left(consumed);
196	} else {
197	self.output_occupied_len = `0`;
198	}
199	})
200	}
201
202	/// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
203	///
204	/// This is basically write_all for the remaining buffered data but without the undesirable
205	/// abort-on-`Ok(0)` behavior.
206	///
207	/// # Errors
208	///
209	/// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
210	/// `Interrupted`, in which case the error is ignored and writes will continue.
211	fn write_all_encoded_output(&mut self) -> Result<()> {
212	while self.output_occupied_len > `0` {
213	let remaining_len = self.output_occupied_len;
214	match self.write_to_delegate(remaining_len) {
215	// try again on interrupts ala write_all
216	Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
217	// other errors return
218	Err(e) => return Err(e),
219	// success no-ops because remaining length is already updated
220	Ok(_) => {}
221	};
222	}
223
224	debug_assert_eq!(`0`, self.output_occupied_len);
225	Ok(())
226	}
227
228	/// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output
229	/// to.
230	///
231	/// Normally this method should not be needed, since `finish()` returns the inner writer if
232	/// it completes successfully. That will also ensure all data has been flushed, which the
233	/// `into_inner()` function does not* do.*
234	///
235	/// Calling this method after `finish()` has completed successfully will panic, since the
236	/// writer has already been returned.
237	///
238	/// This method may be useful if the writer implements additional APIs beyond the `Write`
239	/// trait. Note that the inner writer might be in an error state or have an incomplete
240	/// base64 string written to it.
241	pub fn into_inner(mut self) -> W {
242	self.delegate
243	.take()
244	.expect("Encoder has already had finish() called")
245	}
246	}
247
248	impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> {
249	/// Encode input and then write to the delegate writer.
250	///
251	/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
252	/// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
253	/// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
254	/// <https://github.com/rust-lang/rust/issues/56889> for more on that.
255	///
256	/// If the previous call to `write` provided more (encoded) data than the delegate writer could
257	/// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
258	/// data is present, subsequent calls to `write` will try to write the remaining buffered data
259	/// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
260	/// an error.
261	///
262	/// # Errors
263	///
264	/// Any errors emitted by the delegate writer are returned.
265	fn write(&mut self, input: &[u8]) -> Result<usize> {
266	if self.delegate.is_none() {
267	panic!("Cannot write more after calling finish()");
268	}
269
270	if input.is_empty() {
271	return Ok(`0`);
272	}
273
274	// The contract of `Write::write` places some constraints on this implementation:
275	// - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
276	// iterate over the input and encode multiple chunks.
277	// - Errors mean that "no bytes were written to this writer", so we need to reset the
278	// internal state to what it was before the error occurred
279
280	// before reading any input, write any leftover encoded output from last time
281	if self.output_occupied_len > `0` {
282	let current_len = self.output_occupied_len;
283	return self
284	.write_to_delegate(current_len)
285	// did not read any input
286	.map(\|_\| `0`);
287	}
288
289	debug_assert_eq!(`0`, self.output_occupied_len);
290
291	// how many bytes, if any, were read into `extra` to create a triple to encode
292	let mut extra_input_read_len = `0`;
293	let mut input = input;
294
295	let orig_extra_len = self.extra_input_occupied_len;
296
297	let mut encoded_size = `0`;
298	// always a multiple of MIN_ENCODE_CHUNK_SIZE
299	let mut max_input_len = MAX_INPUT_LEN;
300
301	// process leftover un-encoded input from last write
302	if self.extra_input_occupied_len > `0` {
303	debug_assert!(self.extra_input_occupied_len < `3`);
304	if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
305	// Fill up `extra`, encode that into `output`, and consume as much of the rest of
306	// `input` as possible.
307	// We could write just the encoding of `extra` by itself but then we'd have to
308	// return after writing only 4 bytes, which is inefficient if the underlying writer
309	// would make a syscall.
310	extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
311	debug_assert!(extra_input_read_len > `0`);
312	// overwrite only bytes that weren't already used. If we need to rollback extra_len
313	// (when the subsequent write errors), the old leading bytes will still be there.
314	self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
315	.copy_from_slice(&input[`0`..extra_input_read_len]);
316
317	let len = self.engine.internal_encode(
318	&self.extra_input[`0`..MIN_ENCODE_CHUNK_SIZE],
319	&mut self.output[..],
320	);
321	debug_assert_eq!(`4`, len);
322
323	input = &input[extra_input_read_len..];
324
325	// consider extra to be used up, since we encoded it
326	self.extra_input_occupied_len = `0`;
327	// don't clobber where we just encoded to
328	encoded_size = `4`;
329	// and don't read more than can be encoded
330	max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
331
332	// fall through to normal encoding
333	} else {
334	// `extra` and `input` are non empty, but `\|extra\| + \|input\| < 3`, so there must be
335	// 1 byte in each.
336	debug_assert_eq!(`1`, input.len());
337	debug_assert_eq!(`1`, self.extra_input_occupied_len);
338
339	self.extra_input[self.extra_input_occupied_len] = input[`0`];
340	self.extra_input_occupied_len += `1`;
341	return Ok(`1`);
342	};
343	} else if input.len() < MIN_ENCODE_CHUNK_SIZE {
344	// `extra` is empty, and `input` fits inside it
345	self.extra_input[`0`..input.len()].copy_from_slice(input);
346	self.extra_input_occupied_len = input.len();
347	return Ok(input.len());
348	};
349
350	// either 0 or 1 complete chunks encoded from extra
351	debug_assert!(encoded_size == `0` \|\| encoded_size == `4`);
352	debug_assert!(
353	// didn't encode extra input
354	MAX_INPUT_LEN == max_input_len
355	// encoded one triple
356	\|\| MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
357	);
358
359	// encode complete triples only
360	let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
361	let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
362	debug_assert_eq!(`0`, max_input_len % MIN_ENCODE_CHUNK_SIZE);
363	debug_assert_eq!(`0`, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
364
365	encoded_size += self.engine.internal_encode(
366	&input[..(input_chunks_to_encode_len)],
367	&mut self.output[encoded_size..],
368	);
369
370	// not updating `self.output_occupied_len` here because if the below write fails, it should
371	// "never take place" -- the buffer contents we encoded are ignored and perhaps retried
372	// later, if the consumer chooses.
373
374	self.write_to_delegate(encoded_size)
375	// no matter whether we wrote the full encoded buffer or not, we consumed the same
376	// input
377	.map(\|_\| extra_input_read_len + input_chunks_to_encode_len)
378	.map_err(\|e\| {
379	// in case we filled and encoded `extra`, reset extra_len
380	self.extra_input_occupied_len = orig_extra_len;
381
382	e
383	})
384	}
385
386	/// Because this is usually treated as OK to call multiple times, it will not* flush any*
387	/// incomplete chunks of input or write padding.
388	/// # Errors
389	///
390	/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
391	fn flush(&mut self) -> Result<()> {
392	self.write_all_encoded_output()?;
393	self.delegate
394	.as_mut()
395	.expect("Writer must be present")
396	.flush()
397	}
398	}
399
400	impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> {
401	fn drop(&mut self) {
402	if !self.panicked {
403	// like `BufWriter`, ignore errors during drop
404	let _ = self.write_final_leftovers();
405	}
406	}
407	}
408