mod.rs source code [crates/base64/src/engine/mod.rs]

1	//! Provides the [Engine] abstraction and out of the box implementations.
2	#[cfg(any(feature = "alloc", test))]
3	use crate::chunked_encoder;
4	use crate::{
5	encode::{encode_with_padding, EncodeSliceError},
6	encoded_len, DecodeError, DecodeSliceError,
7	};
8	#[cfg(any(feature = "alloc", test))]
9	use alloc::vec::Vec;
10
11	#[cfg(any(feature = "alloc", test))]
12	use alloc::{string::String, vec};
13
14	pub mod general_purpose;
15
16	#[cfg(test)]
17	mod naive;
18
19	#[cfg(test)]
20	mod tests;
21
22	pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23
24	/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25	///
26	/// Different implementations offer different characteristics. The library currently ships with
27	/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28	/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29	///
30	/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31	/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32	/// issues, and to avoid repeating the cost of engine setup.
33	///
34	/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35	// When adding an implementation of Engine, include them in the engine test suite:
36	// - add an implementation of [engine::tests::EngineWrapper]
37	// - add the implementation to the `all_engines` macro
38	// All tests run on all engines listed in the macro.
39	pub trait Engine: Send + Sync {
40	/// The config type used by this engine
41	type Config: Config;
42	/// The decode estimate used by this engine
43	type DecodeEstimate: DecodeEstimate;
44
45	/// This is not meant to be called directly; it is only for `Engine` implementors.
46	/// See the other `encode` functions on this trait.*
47	///
48	/// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49	///
50	/// `output` will be long enough to hold the encoded data.
51	///
52	/// Returns the number of bytes written.
53	///
54	/// No padding should be written; that is handled separately.
55	///
56	/// Must not write any bytes into the output slice other than the encoded data.
57	#[doc(hidden)]
58	fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59
60	/// This is not meant to be called directly; it is only for `Engine` implementors.
61	///
62	/// As an optimization to prevent the decoded length from being calculated twice, it is
63	/// sometimes helpful to have a conservative estimate of the decoded size before doing the
64	/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65	#[doc(hidden)]
66	fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67
68	/// This is not meant to be called directly; it is only for `Engine` implementors.
69	/// See the other `decode` functions on this trait.*
70	///
71	/// Decode `input` base64 bytes into the `output` buffer.
72	///
73	/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74	/// calculating it again (expensive on short inputs).`
75	///
76	/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77	/// function must also handle the final possibly partial chunk.
78	/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79	/// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80	/// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81	///
82	/// Decoding must not write any bytes into the output slice other than the decoded data.
83	///
84	/// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85	/// errors unless the engine is configured otherwise.
86	#[doc(hidden)]
87	fn internal_decode(
88	&self,
89	input: &[u8],
90	output: &mut [u8],
91	decode_estimate: Self::DecodeEstimate,
92	) -> Result<DecodeMetadata, DecodeSliceError>;
93
94	/// Returns the config for this engine.
95	fn config(&self) -> &Self::Config;
96
97	/// Encode arbitrary octets as base64 using the provided `Engine`.
98	/// Returns a `String`.
99	///
100	/// # Example
101	///
102	/// ```rust
103	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
104	///
105	/// let b64 = general_purpose::STANDARD.encode(b"hello world~");
106	/// println!("{}", b64);
107	///
108	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
109	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
110	///
111	/// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
112	/// ```
113	#[cfg(any(feature = "alloc", test))]
114	#[inline]
115	fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
116	fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
117	where
118	E: Engine + ?Sized,
119	{
120	let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
121	.expect("integer overflow when calculating buffer size");
122
123	let mut buf = vec![`0`; encoded_size];
124
125	encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
126
127	String::from_utf8(buf).expect("Invalid UTF8")
128	}
129
130	inner(self, input.as_ref())
131	}
132
133	/// Encode arbitrary octets as base64 into a supplied `String`.
134	/// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
135	///
136	/// # Example
137	///
138	/// ```rust
139	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
140	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
141	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
142	///
143	/// fn main() {
144	/// let mut buf = String::new();
145	/// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
146	/// println!("{}", buf);
147	///
148	/// buf.clear();
149	/// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
150	/// println!("{}", buf);
151	/// }
152	/// ```
153	#[cfg(any(feature = "alloc", test))]
154	#[inline]
155	fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
156	fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
157	where
158	E: Engine + ?Sized,
159	{
160	let mut sink = chunked_encoder::StringSink::new(output_buf);
161
162	chunked_encoder::ChunkedEncoder::new(engine)
163	.encode(input_bytes, &mut sink)
164	.expect("Writing to a String shouldn't fail");
165	}
166
167	inner(self, input.as_ref(), output_buf)
168	}
169
170	/// Encode arbitrary octets as base64 into a supplied slice.
171	/// Writes into the supplied output buffer.
172	///
173	/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
174	/// or statically-allocated buffer).
175	///
176	/// # Example
177	///
178	#[cfg_attr(feature = "alloc", doc = "```")]
179	#[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
180	/// use base64::{Engine as _, engine::general_purpose};
181	/// let s = b"hello internet!";
182	/// let mut buf = Vec::new();
183	/// // make sure we'll have a slice big enough for base64 + padding
184	/// buf.resize(s.len() * `4` / `3` + `4`, `0`);
185	///
186	/// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
187	///
188	/// // shorten our vec down to just what was written
189	/// buf.truncate(bytes_written);
190	///
191	/// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
192	/// ```
193	#[inline]
194	fn encode_slice<T: AsRef<[u8]>>(
195	&self,
196	input: T,
197	output_buf: &mut [u8],
198	) -> Result<usize, EncodeSliceError> {
199	fn inner<E>(
200	engine: &E,
201	input_bytes: &[u8],
202	output_buf: &mut [u8],
203	) -> Result<usize, EncodeSliceError>
204	where
205	E: Engine + ?Sized,
206	{
207	let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
208	.expect("usize overflow when calculating buffer size");
209
210	if output_buf.len() < encoded_size {
211	return Err(EncodeSliceError::OutputSliceTooSmall);
212	}
213
214	let b64_output = &mut output_buf[`0`..encoded_size];
215
216	encode_with_padding(input_bytes, b64_output, engine, encoded_size);
217
218	Ok(encoded_size)
219	}
220
221	inner(self, input.as_ref(), output_buf)
222	}
223
224	/// Decode the input into a new `Vec`.
225	///
226	/// # Example
227	///
228	/// ```rust
229	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
230	///
231	/// let bytes = general_purpose::STANDARD
232	/// .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
233	/// println!("{:?}", bytes);
234	///
235	/// // custom engine setup
236	/// let bytes_url = engine::GeneralPurpose::new(
237	/// &alphabet::URL_SAFE,
238	/// general_purpose::NO_PAD)
239	/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
240	/// println!("{:?}", bytes_url);
241	/// ```
242	#[cfg(any(feature = "alloc", test))]
243	#[inline]
244	fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
245	fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
246	where
247	E: Engine + ?Sized,
248	{
249	let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
250	let mut buffer = vec![`0`; estimate.decoded_len_estimate()];
251
252	let bytes_written = engine
253	.internal_decode(input_bytes, &mut buffer, estimate)
254	.map_err(\|e\| match e {
255	DecodeSliceError::DecodeError(e) => e,
256	DecodeSliceError::OutputSliceTooSmall => {
257	unreachable!("Vec is sized conservatively")
258	}
259	})?
260	.decoded_len;
261
262	buffer.truncate(bytes_written);
263
264	Ok(buffer)
265	}
266
267	inner(self, input.as_ref())
268	}
269
270	/// Decode the `input` into the supplied `buffer`.
271	///
272	/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
273	/// Returns a `Result` containing an empty tuple, aka `()`.
274	///
275	/// # Example
276	///
277	/// ```rust
278	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
279	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
280	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
281	///
282	/// fn main() {
283	/// use base64::Engine;
284	/// let mut buffer = Vec::<u8>::new();
285	/// // with the default engine
286	/// general_purpose::STANDARD
287	/// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
288	/// println!("{:?}", buffer);
289	///
290	/// buffer.clear();
291	///
292	/// // with a custom engine
293	/// CUSTOM_ENGINE.decode_vec(
294	/// "aGVsbG8gaW50ZXJuZXR-Cg==",
295	/// &mut buffer,
296	/// ).unwrap();
297	/// println!("{:?}", buffer);
298	/// }
299	/// ```
300	#[cfg(any(feature = "alloc", test))]
301	#[inline]
302	fn decode_vec<T: AsRef<[u8]>>(
303	&self,
304	input: T,
305	buffer: &mut Vec<u8>,
306	) -> Result<(), DecodeError> {
307	fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
308	where
309	E: Engine + ?Sized,
310	{
311	let starting_output_len = buffer.len();
312	let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
313
314	let total_len_estimate = estimate
315	.decoded_len_estimate()
316	.checked_add(starting_output_len)
317	.expect("Overflow when calculating output buffer length");
318
319	buffer.resize(total_len_estimate, `0`);
320
321	let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
322
323	let bytes_written = engine
324	.internal_decode(input_bytes, buffer_slice, estimate)
325	.map_err(\|e\| match e {
326	DecodeSliceError::DecodeError(e) => e,
327	DecodeSliceError::OutputSliceTooSmall => {
328	unreachable!("Vec is sized conservatively")
329	}
330	})?
331	.decoded_len;
332
333	buffer.truncate(starting_output_len + bytes_written);
334
335	Ok(())
336	}
337
338	inner(self, input.as_ref(), buffer)
339	}
340
341	/// Decode the input into the provided output slice.
342	///
343	/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
344	/// the estimated decoded length.
345	///
346	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
347	///
348	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
349	///
350	/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
351	/// if the output buffer is too small.
352	#[inline]
353	fn decode_slice<T: AsRef<[u8]>>(
354	&self,
355	input: T,
356	output: &mut [u8],
357	) -> Result<usize, DecodeSliceError> {
358	fn inner<E>(
359	engine: &E,
360	input_bytes: &[u8],
361	output: &mut [u8],
362	) -> Result<usize, DecodeSliceError>
363	where
364	E: Engine + ?Sized,
365	{
366	engine
367	.internal_decode(
368	input_bytes,
369	output,
370	engine.internal_decoded_len_estimate(input_bytes.len()),
371	)
372	.map(\|dm\| dm.decoded_len)
373	}
374
375	inner(self, input.as_ref(), output)
376	}
377
378	/// Decode the input into the provided output slice.
379	///
380	/// Returns the number of bytes written to the slice.
381	///
382	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
383	///
384	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
385	///
386	/// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
387	/// buffer is too small.
388	///
389	/// # Panics
390	///
391	/// Panics if the provided output buffer is too small for the decoded data.
392	#[inline]
393	fn decode_slice_unchecked<T: AsRef<[u8]>>(
394	&self,
395	input: T,
396	output: &mut [u8],
397	) -> Result<usize, DecodeError> {
398	fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
399	where
400	E: Engine + ?Sized,
401	{
402	engine
403	.internal_decode(
404	input_bytes,
405	output,
406	engine.internal_decoded_len_estimate(input_bytes.len()),
407	)
408	.map(\|dm\| dm.decoded_len)
409	.map_err(\|e\| match e {
410	DecodeSliceError::DecodeError(e) => e,
411	DecodeSliceError::OutputSliceTooSmall => {
412	panic!("Output slice is too small")
413	}
414	})
415	}
416
417	inner(self, input.as_ref(), output)
418	}
419	}
420
421	/// The minimal level of configuration that engines must support.
422	pub trait Config {
423	/// Returns `true` if padding should be added after the encoded output.
424	///
425	/// Padding is added outside the engine's encode() since the engine may be used
426	/// to encode only a chunk of the overall output, so it can't always know when
427	/// the output is "done" and would therefore need padding (if configured).
428	// It could be provided as a separate parameter when encoding, but that feels like
429	// leaking an implementation detail to the user, and it's hopefully more convenient
430	// to have to only pass one thing (the engine) to any part of the API.
431	fn encode_padding(&self) -> bool;
432	}
433
434	/// The decode estimate used by an engine implementation. Users do not need to interact with this;
435	/// it is only for engine implementors.
436	///
437	/// Implementors may store relevant data here when constructing this to avoid having to calculate
438	/// them again during actual decoding.
439	pub trait DecodeEstimate {
440	/// Returns a conservative (err on the side of too big) estimate of the decoded length to use
441	/// for pre-allocating buffers, etc.
442	///
443	/// The estimate must be no larger than the next largest complete triple of decoded bytes.
444	/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
445	fn decoded_len_estimate(&self) -> usize;
446	}
447
448	/// Controls how pad bytes are handled when decoding.
449	///
450	/// Each [Engine] must support at least the behavior indicated by
451	/// [DecodePaddingMode::RequireCanonical], and may support other modes.
452	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
453	pub enum DecodePaddingMode {
454	/// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
455	Indifferent,
456	/// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
457	RequireCanonical,
458	/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
459	RequireNone,
460	}
461
462	/// Metadata about the result of a decode operation
463	#[derive(PartialEq, Eq, Debug)]
464	pub struct DecodeMetadata {
465	/// Number of decoded bytes output
466	pub(crate) decoded_len: usize,
467	/// Offset of the first padding byte in the input, if any
468	pub(crate) padding_offset: Option<usize>,
469	}
470
471	impl DecodeMetadata {
472	pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
473	Self {
474	decoded_len: decoded_bytes,
475	padding_offset: padding_index,
476	}
477	}
478	}
479