mod.rs source code [crates/base64/src/engine/mod.rs]

1	//! Provides the [Engine] abstraction and out of the box implementations.
2	#[cfg(any(feature = "alloc", test))]
3	use crate::chunked_encoder;
4	use crate::{
5	encode::{encode_with_padding, EncodeSliceError},
6	encoded_len, DecodeError, DecodeSliceError,
7	};
8	#[cfg(any(feature = "alloc", test))]
9	use alloc::vec::Vec;
10
11	#[cfg(any(feature = "alloc", test))]
12	use alloc::{string::String, vec};
13
14	pub mod general_purpose;
15
16	#[cfg(test)]
17	mod naive;
18
19	#[cfg(test)]
20	mod tests;
21
22	pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23
24	/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25	///
26	/// Different implementations offer different characteristics. The library currently ships with
27	/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28	/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29	///
30	/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31	/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32	/// issues, and to avoid repeating the cost of engine setup.
33	///
34	/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35	// When adding an implementation of Engine, include them in the engine test suite:
36	// - add an implementation of [engine::tests::EngineWrapper]
37	// - add the implementation to the `all_engines` macro
38	// All tests run on all engines listed in the macro.
39	pub trait Engine: Send + Sync {
40	/// The config type used by this engine
41	type Config: Config;
42	/// The decode estimate used by this engine
43	type DecodeEstimate: DecodeEstimate;
44
45	/// This is not meant to be called directly; it is only for `Engine` implementors.
46	/// See the other `encode` functions on this trait.*
47	///
48	/// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49	///
50	/// `output` will be long enough to hold the encoded data.
51	///
52	/// Returns the number of bytes written.
53	///
54	/// No padding should be written; that is handled separately.
55	///
56	/// Must not write any bytes into the output slice other than the encoded data.
57	#[doc(hidden)]
58	fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59
60	/// This is not meant to be called directly; it is only for `Engine` implementors.
61	///
62	/// As an optimization to prevent the decoded length from being calculated twice, it is
63	/// sometimes helpful to have a conservative estimate of the decoded size before doing the
64	/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65	#[doc(hidden)]
66	fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67
68	/// This is not meant to be called directly; it is only for `Engine` implementors.
69	/// See the other `decode` functions on this trait.*
70	///
71	/// Decode `input` base64 bytes into the `output` buffer.
72	///
73	/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74	/// calculating it again (expensive on short inputs).`
75	///
76	/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77	/// function must also handle the final possibly partial chunk.
78	/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79	/// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80	/// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81	///
82	/// Decoding must not write any bytes into the output slice other than the decoded data.
83	///
84	/// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85	/// errors unless the engine is configured otherwise.
86	///
87	/// # Panics
88	///
89	/// Panics if `output` is too small.
90	#[doc(hidden)]
91	fn internal_decode(
92	&self,
93	input: &[u8],
94	output: &mut [u8],
95	decode_estimate: Self::DecodeEstimate,
96	) -> Result<DecodeMetadata, DecodeError>;
97
98	/// Returns the config for this engine.
99	fn config(&self) -> &Self::Config;
100
101	/// Encode arbitrary octets as base64 using the provided `Engine`.
102	/// Returns a `String`.
103	///
104	/// # Example
105	///
106	/// ```rust
107	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
108	///
109	/// let b64 = general_purpose::STANDARD.encode(b"hello world~");
110	/// println!("{}", b64);
111	///
112	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
113	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
114	///
115	/// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
116	#[cfg(any(feature = "alloc", test))]
117	#[inline]
118	fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
119	fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
120	where
121	E: Engine + ?Sized,
122	{
123	let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
124	.expect("integer overflow when calculating buffer size");
125
126	let mut buf = vec![`0`; encoded_size];
127
128	encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
129
130	String::from_utf8(buf).expect("Invalid UTF8")
131	}
132
133	inner(self, input.as_ref())
134	}
135
136	/// Encode arbitrary octets as base64 into a supplied `String`.
137	/// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
138	///
139	/// # Example
140	///
141	/// ```rust
142	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
143	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
144	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
145	///
146	/// fn main() {
147	/// let mut buf = String::new();
148	/// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
149	/// println!("{}", buf);
150	///
151	/// buf.clear();
152	/// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
153	/// println!("{}", buf);
154	/// }
155	/// ```
156	#[cfg(any(feature = "alloc", test))]
157	#[inline]
158	fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
159	fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
160	where
161	E: Engine + ?Sized,
162	{
163	let mut sink = chunked_encoder::StringSink::new(output_buf);
164
165	chunked_encoder::ChunkedEncoder::new(engine)
166	.encode(input_bytes, &mut sink)
167	.expect("Writing to a String shouldn't fail");
168	}
169
170	inner(self, input.as_ref(), output_buf)
171	}
172
173	/// Encode arbitrary octets as base64 into a supplied slice.
174	/// Writes into the supplied output buffer.
175	///
176	/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
177	/// or statically-allocated buffer).
178	///
179	/// # Example
180	///
181	#[cfg_attr(feature = "alloc", doc = "```")]
182	#[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
183	/// use base64::{Engine as _, engine::general_purpose};
184	/// let s = b"hello internet!";
185	/// let mut buf = Vec::new();
186	/// // make sure we'll have a slice big enough for base64 + padding
187	/// buf.resize(s.len() * `4` / `3` + `4`, `0`);
188	///
189	/// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
190	///
191	/// // shorten our vec down to just what was written
192	/// buf.truncate(bytes_written);
193	///
194	/// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
195	/// ```
196	#[inline]
197	fn encode_slice<T: AsRef<[u8]>>(
198	&self,
199	input: T,
200	output_buf: &mut [u8],
201	) -> Result<usize, EncodeSliceError> {
202	fn inner<E>(
203	engine: &E,
204	input_bytes: &[u8],
205	output_buf: &mut [u8],
206	) -> Result<usize, EncodeSliceError>
207	where
208	E: Engine + ?Sized,
209	{
210	let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
211	.expect("usize overflow when calculating buffer size");
212
213	if output_buf.len() < encoded_size {
214	return Err(EncodeSliceError::OutputSliceTooSmall);
215	}
216
217	let b64_output = &mut output_buf[`0`..encoded_size];
218
219	encode_with_padding(input_bytes, b64_output, engine, encoded_size);
220
221	Ok(encoded_size)
222	}
223
224	inner(self, input.as_ref(), output_buf)
225	}
226
227	/// Decode the input into a new `Vec`.
228	///
229	/// # Example
230	///
231	/// ```rust
232	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
233	///
234	/// let bytes = general_purpose::STANDARD
235	/// .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
236	/// println!("{:?}", bytes);
237	///
238	/// // custom engine setup
239	/// let bytes_url = engine::GeneralPurpose::new(
240	/// &alphabet::URL_SAFE,
241	/// general_purpose::NO_PAD)
242	/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
243	/// println!("{:?}", bytes_url);
244	/// ```
245	#[cfg(any(feature = "alloc", test))]
246	#[inline]
247	fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
248	fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
249	where
250	E: Engine + ?Sized,
251	{
252	let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
253	let mut buffer = vec![`0`; estimate.decoded_len_estimate()];
254
255	let bytes_written = engine
256	.internal_decode(input_bytes, &mut buffer, estimate)?
257	.decoded_len;
258
259	buffer.truncate(bytes_written);
260
261	Ok(buffer)
262	}
263
264	inner(self, input.as_ref())
265	}
266
267	/// Decode the `input` into the supplied `buffer`.
268	///
269	/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
270	/// Returns a `Result` containing an empty tuple, aka `()`.
271	///
272	/// # Example
273	///
274	/// ```rust
275	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
276	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
277	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
278	///
279	/// fn main() {
280	/// use base64::Engine;
281	/// let mut buffer = Vec::<u8>::new();
282	/// // with the default engine
283	/// general_purpose::STANDARD
284	/// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
285	/// println!("{:?}", buffer);
286	///
287	/// buffer.clear();
288	///
289	/// // with a custom engine
290	/// CUSTOM_ENGINE.decode_vec(
291	/// "aGVsbG8gaW50ZXJuZXR-Cg==",
292	/// &mut buffer,
293	/// ).unwrap();
294	/// println!("{:?}", buffer);
295	/// }
296	/// ```
297	#[cfg(any(feature = "alloc", test))]
298	#[inline]
299	fn decode_vec<T: AsRef<[u8]>>(
300	&self,
301	input: T,
302	buffer: &mut Vec<u8>,
303	) -> Result<(), DecodeError> {
304	fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
305	where
306	E: Engine + ?Sized,
307	{
308	let starting_output_len = buffer.len();
309	let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
310
311	let total_len_estimate = estimate
312	.decoded_len_estimate()
313	.checked_add(starting_output_len)
314	.expect("Overflow when calculating output buffer length");
315
316	buffer.resize(total_len_estimate, `0`);
317
318	let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
319
320	let bytes_written = engine
321	.internal_decode(input_bytes, buffer_slice, estimate)?
322	.decoded_len;
323
324	buffer.truncate(starting_output_len + bytes_written);
325
326	Ok(())
327	}
328
329	inner(self, input.as_ref(), buffer)
330	}
331
332	/// Decode the input into the provided output slice.
333	///
334	/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
335	/// the estimated decoded length.
336	///
337	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
338	///
339	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
340	///
341	/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
342	/// if the output buffer is too small.
343	#[inline]
344	fn decode_slice<T: AsRef<[u8]>>(
345	&self,
346	input: T,
347	output: &mut [u8],
348	) -> Result<usize, DecodeSliceError> {
349	fn inner<E>(
350	engine: &E,
351	input_bytes: &[u8],
352	output: &mut [u8],
353	) -> Result<usize, DecodeSliceError>
354	where
355	E: Engine + ?Sized,
356	{
357	let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
358
359	if output.len() < estimate.decoded_len_estimate() {
360	return Err(DecodeSliceError::OutputSliceTooSmall);
361	}
362
363	engine
364	.internal_decode(input_bytes, output, estimate)
365	.map_err(\|e\| e.into())
366	.map(\|dm\| dm.decoded_len)
367	}
368
369	inner(self, input.as_ref(), output)
370	}
371
372	/// Decode the input into the provided output slice.
373	///
374	/// Returns the number of bytes written to the slice.
375	///
376	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
377	///
378	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
379	///
380	/// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
381	/// buffer is too small.
382	///
383	/// # Panics
384	///
385	/// Panics if the provided output buffer is too small for the decoded data.
386	#[inline]
387	fn decode_slice_unchecked<T: AsRef<[u8]>>(
388	&self,
389	input: T,
390	output: &mut [u8],
391	) -> Result<usize, DecodeError> {
392	fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
393	where
394	E: Engine + ?Sized,
395	{
396	engine
397	.internal_decode(
398	input_bytes,
399	output,
400	engine.internal_decoded_len_estimate(input_bytes.len()),
401	)
402	.map(\|dm\| dm.decoded_len)
403	}
404
405	inner(self, input.as_ref(), output)
406	}
407	}
408
409	/// The minimal level of configuration that engines must support.
410	pub trait Config {
411	/// Returns `true` if padding should be added after the encoded output.
412	///
413	/// Padding is added outside the engine's encode() since the engine may be used
414	/// to encode only a chunk of the overall output, so it can't always know when
415	/// the output is "done" and would therefore need padding (if configured).
416	// It could be provided as a separate parameter when encoding, but that feels like
417	// leaking an implementation detail to the user, and it's hopefully more convenient
418	// to have to only pass one thing (the engine) to any part of the API.
419	fn encode_padding(&self) -> bool;
420	}
421
422	/// The decode estimate used by an engine implementation. Users do not need to interact with this;
423	/// it is only for engine implementors.
424	///
425	/// Implementors may store relevant data here when constructing this to avoid having to calculate
426	/// them again during actual decoding.
427	pub trait DecodeEstimate {
428	/// Returns a conservative (err on the side of too big) estimate of the decoded length to use
429	/// for pre-allocating buffers, etc.
430	///
431	/// The estimate must be no larger than the next largest complete triple of decoded bytes.
432	/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
433	fn decoded_len_estimate(&self) -> usize;
434	}
435
436	/// Controls how pad bytes are handled when decoding.
437	///
438	/// Each [Engine] must support at least the behavior indicated by
439	/// [DecodePaddingMode::RequireCanonical], and may support other modes.
440	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
441	pub enum DecodePaddingMode {
442	/// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
443	Indifferent,
444	/// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
445	RequireCanonical,
446	/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
447	RequireNone,
448	}
449
450	/// Metadata about the result of a decode operation
451	#[derive(PartialEq, Eq, Debug)]
452	pub struct DecodeMetadata {
453	/// Number of decoded bytes output
454	pub(crate) decoded_len: usize,
455	/// Offset of the first padding byte in the input, if any
456	pub(crate) padding_offset: Option<usize>,
457	}
458
459	impl DecodeMetadata {
460	pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
461	Self {
462	decoded_len: decoded_bytes,
463	padding_offset: padding_index,
464	}
465	}
466	}
467