mod.rs source code [crates/base64-0.21.2/src/engine/mod.rs]

1	//! Provides the [Engine] abstraction and out of the box implementations.
2	#[cfg(any(feature = "alloc", feature = "std", test))]
3	use crate::chunked_encoder;
4	use crate::{
5	encode::{encode_with_padding, EncodeSliceError},
6	encoded_len, DecodeError, DecodeSliceError,
7	};
8	#[cfg(any(feature = "alloc", feature = "std", test))]
9	use alloc::vec::Vec;
10
11	#[cfg(any(feature = "alloc", feature = "std", test))]
12	use alloc::{string::String, vec};
13
14	pub mod general_purpose;
15
16	#[cfg(test)]
17	mod naive;
18
19	#[cfg(test)]
20	mod tests;
21
22	pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23
24	/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25	///
26	/// Different implementations offer different characteristics. The library currently ships with
27	/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28	/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29	///
30	/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31	/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32	/// issues, and to avoid repeating the cost of engine setup.
33	///
34	/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35	// When adding an implementation of Engine, include them in the engine test suite:
36	// - add an implementation of [engine::tests::EngineWrapper]
37	// - add the implementation to the `all_engines` macro
38	// All tests run on all engines listed in the macro.
39	pub trait Engine: Send + Sync {
40	/// The config type used by this engine
41	type Config: Config;
42	/// The decode estimate used by this engine
43	type DecodeEstimate: DecodeEstimate;
44
45	/// This is not meant to be called directly; it is only for `Engine` implementors.
46	/// See the other `encode` functions on this trait.*
47	///
48	/// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49	///
50	/// `output` will be long enough to hold the encoded data.
51	///
52	/// Returns the number of bytes written.
53	///
54	/// No padding should be written; that is handled separately.
55	///
56	/// Must not write any bytes into the output slice other than the encoded data.
57	#[doc(hidden)]
58	fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59
60	/// This is not meant to be called directly; it is only for `Engine` implementors.
61	///
62	/// As an optimization to prevent the decoded length from being calculated twice, it is
63	/// sometimes helpful to have a conservative estimate of the decoded size before doing the
64	/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65	#[doc(hidden)]
66	fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67
68	/// This is not meant to be called directly; it is only for `Engine` implementors.
69	/// See the other `decode` functions on this trait.*
70	///
71	/// Decode `input` base64 bytes into the `output` buffer.
72	///
73	/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74	/// calculating it again (expensive on short inputs).`
75	///
76	/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77	/// function must also handle the final possibly partial chunk.
78	/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79	/// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80	/// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81	///
82	/// Decoding must not write any bytes into the output slice other than the decoded data.
83	///
84	/// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85	/// errors unless the engine is configured otherwise.
86	///
87	/// # Panics
88	///
89	/// Panics if `output` is too small.
90	#[doc(hidden)]
91	fn internal_decode(
92	&self,
93	input: &[u8],
94	output: &mut [u8],
95	decode_estimate: Self::DecodeEstimate,
96	) -> Result<DecodeMetadata, DecodeError>;
97
98	/// Returns the config for this engine.
99	fn config(&self) -> &Self::Config;
100
101	/// Encode arbitrary octets as base64 using the provided `Engine`.
102	/// Returns a `String`.
103	///
104	/// # Example
105	///
106	/// ```rust
107	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
108	///
109	/// let b64 = general_purpose::STANDARD.encode(b"hello world~");
110	/// println!("{}", b64);
111	///
112	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
113	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
114	///
115	/// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
116	#[cfg(any(feature = "alloc", feature = "std", test))]
117	fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
118	let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding())
119	.expect("integer overflow when calculating buffer size");
120	let mut buf = vec![`0`; encoded_size];
121
122	encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size);
123
124	String::from_utf8(buf).expect("Invalid UTF8")
125	}
126
127	/// Encode arbitrary octets as base64 into a supplied `String`.
128	/// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
129	///
130	/// # Example
131	///
132	/// ```rust
133	/// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
134	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
135	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
136	///
137	/// fn main() {
138	/// let mut buf = String::new();
139	/// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
140	/// println!("{}", buf);
141	///
142	/// buf.clear();
143	/// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
144	/// println!("{}", buf);
145	/// }
146	/// ```
147	#[cfg(any(feature = "alloc", feature = "std", test))]
148	fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
149	let input_bytes = input.as_ref();
150
151	{
152	let mut sink = chunked_encoder::StringSink::new(output_buf);
153
154	chunked_encoder::ChunkedEncoder::new(self)
155	.encode(input_bytes, &mut sink)
156	.expect("Writing to a String shouldn't fail");
157	}
158	}
159
160	/// Encode arbitrary octets as base64 into a supplied slice.
161	/// Writes into the supplied output buffer.
162	///
163	/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
164	/// or statically-allocated buffer).
165	///
166	/// # Example
167	///
168	/// ```rust
169	/// use base64::{Engine as _, engine::general_purpose};
170	/// let s = b"hello internet!";
171	/// let mut buf = Vec::new();
172	/// // make sure we'll have a slice big enough for base64 + padding
173	/// buf.resize(s.len() * `4` / `3` + `4`, `0`);
174	///
175	/// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
176	///
177	/// // shorten our vec down to just what was written
178	/// buf.truncate(bytes_written);
179	///
180	/// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
181	/// ```
182	fn encode_slice<T: AsRef<[u8]>>(
183	&self,
184	input: T,
185	output_buf: &mut [u8],
186	) -> Result<usize, EncodeSliceError> {
187	let input_bytes = input.as_ref();
188
189	let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding())
190	.expect("usize overflow when calculating buffer size");
191
192	if output_buf.len() < encoded_size {
193	return Err(EncodeSliceError::OutputSliceTooSmall);
194	}
195
196	let b64_output = &mut output_buf[`0`..encoded_size];
197
198	encode_with_padding(input_bytes, b64_output, self, encoded_size);
199
200	Ok(encoded_size)
201	}
202
203	/// Decode the input into a new `Vec`.
204	///
205	/// # Example
206	///
207	/// ```rust
208	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
209	///
210	/// let bytes = general_purpose::STANDARD
211	/// .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
212	/// println!("{:?}", bytes);
213	///
214	/// // custom engine setup
215	/// let bytes_url = engine::GeneralPurpose::new(
216	/// &alphabet::URL_SAFE,
217	/// general_purpose::NO_PAD)
218	/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
219	/// println!("{:?}", bytes_url);
220	/// ```
221	#[cfg(any(feature = "alloc", feature = "std", test))]
222	fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
223	let input_bytes = input.as_ref();
224
225	let estimate = self.internal_decoded_len_estimate(input_bytes.len());
226	let mut buffer = vec![`0`; estimate.decoded_len_estimate()];
227
228	let bytes_written = self
229	.internal_decode(input_bytes, &mut buffer, estimate)?
230	.decoded_len;
231	buffer.truncate(bytes_written);
232
233	Ok(buffer)
234	}
235
236	/// Decode the `input` into the supplied `buffer`.
237	///
238	/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
239	/// Returns a `Result` containing an empty tuple, aka `()`.
240	///
241	/// # Example
242	///
243	/// ```rust
244	/// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
245	/// const CUSTOM_ENGINE: engine::GeneralPurpose =
246	/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
247	///
248	/// fn main() {
249	/// use base64::Engine;
250	/// let mut buffer = Vec::<u8>::new();
251	/// // with the default engine
252	/// general_purpose::STANDARD
253	/// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
254	/// println!("{:?}", buffer);
255	///
256	/// buffer.clear();
257	///
258	/// // with a custom engine
259	/// CUSTOM_ENGINE.decode_vec(
260	/// "aGVsbG8gaW50ZXJuZXR-Cg==",
261	/// &mut buffer,
262	/// ).unwrap();
263	/// println!("{:?}", buffer);
264	/// }
265	/// ```
266	#[cfg(any(feature = "alloc", feature = "std", test))]
267	fn decode_vec<T: AsRef<[u8]>>(
268	&self,
269	input: T,
270	buffer: &mut Vec<u8>,
271	) -> Result<(), DecodeError> {
272	let input_bytes = input.as_ref();
273
274	let starting_output_len = buffer.len();
275
276	let estimate = self.internal_decoded_len_estimate(input_bytes.len());
277	let total_len_estimate = estimate
278	.decoded_len_estimate()
279	.checked_add(starting_output_len)
280	.expect("Overflow when calculating output buffer length");
281	buffer.resize(total_len_estimate, `0`);
282
283	let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
284	let bytes_written = self
285	.internal_decode(input_bytes, buffer_slice, estimate)?
286	.decoded_len;
287
288	buffer.truncate(starting_output_len + bytes_written);
289
290	Ok(())
291	}
292
293	/// Decode the input into the provided output slice.
294	///
295	/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
296	/// the estimated decoded length.
297	///
298	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
299	///
300	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
301	///
302	/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
303	/// if the output buffer is too small.
304	fn decode_slice<T: AsRef<[u8]>>(
305	&self,
306	input: T,
307	output: &mut [u8],
308	) -> Result<usize, DecodeSliceError> {
309	let input_bytes = input.as_ref();
310
311	let estimate = self.internal_decoded_len_estimate(input_bytes.len());
312	if output.len() < estimate.decoded_len_estimate() {
313	return Err(DecodeSliceError::OutputSliceTooSmall);
314	}
315
316	self.internal_decode(input_bytes, output, estimate)
317	.map_err(\|e\| e.into())
318	.map(\|dm\| dm.decoded_len)
319	}
320
321	/// Decode the input into the provided output slice.
322	///
323	/// Returns the number of bytes written to the slice.
324	///
325	/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
326	///
327	/// See [crate::decoded_len_estimate] for calculating buffer sizes.
328	///
329	/// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
330	/// buffer is too small.
331	///
332	/// # Panics
333	///
334	/// Panics if the provided output buffer is too small for the decoded data.
335	fn decode_slice_unchecked<T: AsRef<[u8]>>(
336	&self,
337	input: T,
338	output: &mut [u8],
339	) -> Result<usize, DecodeError> {
340	let input_bytes = input.as_ref();
341
342	self.internal_decode(
343	input_bytes,
344	output,
345	self.internal_decoded_len_estimate(input_bytes.len()),
346	)
347	.map(\|dm\| dm.decoded_len)
348	}
349	}
350
351	/// The minimal level of configuration that engines must support.
352	pub trait Config {
353	/// Returns `true` if padding should be added after the encoded output.
354	///
355	/// Padding is added outside the engine's encode() since the engine may be used
356	/// to encode only a chunk of the overall output, so it can't always know when
357	/// the output is "done" and would therefore need padding (if configured).
358	// It could be provided as a separate parameter when encoding, but that feels like
359	// leaking an implementation detail to the user, and it's hopefully more convenient
360	// to have to only pass one thing (the engine) to any part of the API.
361	fn encode_padding(&self) -> bool;
362	}
363
364	/// The decode estimate used by an engine implementation. Users do not need to interact with this;
365	/// it is only for engine implementors.
366	///
367	/// Implementors may store relevant data here when constructing this to avoid having to calculate
368	/// them again during actual decoding.
369	pub trait DecodeEstimate {
370	/// Returns a conservative (err on the side of too big) estimate of the decoded length to use
371	/// for pre-allocating buffers, etc.
372	///
373	/// The estimate must be no larger than the next largest complete triple of decoded bytes.
374	/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
375	fn decoded_len_estimate(&self) -> usize;
376	}
377
378	/// Controls how pad bytes are handled when decoding.
379	///
380	/// Each [Engine] must support at least the behavior indicated by
381	/// [DecodePaddingMode::RequireCanonical], and may support other modes.
382	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
383	pub enum DecodePaddingMode {
384	/// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
385	Indifferent,
386	/// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
387	RequireCanonical,
388	/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
389	RequireNone,
390	}
391
392	/// Metadata about the result of a decode operation
393	#[derive(PartialEq, Eq, Debug)]
394	pub struct DecodeMetadata {
395	/// Number of decoded bytes output
396	pub(crate) decoded_len: usize,
397	/// Offset of the first padding byte in the input, if any
398	pub(crate) padding_offset: Option<usize>,
399	}
400
401	impl DecodeMetadata {
402	pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
403	Self {
404	decoded_len: decoded_bytes,
405	padding_offset: padding_index,
406	}
407	}
408	}
409