1//! Provides the [Engine] abstraction and out of the box implementations.
2#[cfg(any(feature = "alloc", feature = "std", test))]
3use crate::chunked_encoder;
4use crate::{
5 encode::{encode_with_padding, EncodeSliceError},
6 encoded_len, DecodeError, DecodeSliceError,
7};
8#[cfg(any(feature = "alloc", feature = "std", test))]
9use alloc::vec::Vec;
10
11#[cfg(any(feature = "alloc", feature = "std", test))]
12use alloc::{string::String, vec};
13
14pub mod general_purpose;
15
16#[cfg(test)]
17mod naive;
18
19#[cfg(test)]
20mod tests;
21
22pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23
24/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25///
26/// Different implementations offer different characteristics. The library currently ships with
27/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29///
30/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31/// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32/// issues, and to avoid repeating the cost of engine setup.
33///
34/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35// When adding an implementation of Engine, include them in the engine test suite:
36// - add an implementation of [engine::tests::EngineWrapper]
37// - add the implementation to the `all_engines` macro
38// All tests run on all engines listed in the macro.
39pub trait Engine: Send + Sync {
40 /// The config type used by this engine
41 type Config: Config;
42 /// The decode estimate used by this engine
43 type DecodeEstimate: DecodeEstimate;
44
45 /// This is not meant to be called directly; it is only for `Engine` implementors.
46 /// See the other `encode*` functions on this trait.
47 ///
48 /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49 ///
50 /// `output` will be long enough to hold the encoded data.
51 ///
52 /// Returns the number of bytes written.
53 ///
54 /// No padding should be written; that is handled separately.
55 ///
56 /// Must not write any bytes into the output slice other than the encoded data.
57 #[doc(hidden)]
58 fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59
60 /// This is not meant to be called directly; it is only for `Engine` implementors.
61 ///
62 /// As an optimization to prevent the decoded length from being calculated twice, it is
63 /// sometimes helpful to have a conservative estimate of the decoded size before doing the
64 /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65 #[doc(hidden)]
66 fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67
68 /// This is not meant to be called directly; it is only for `Engine` implementors.
69 /// See the other `decode*` functions on this trait.
70 ///
71 /// Decode `input` base64 bytes into the `output` buffer.
72 ///
73 /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74 /// calculating it again (expensive on short inputs).`
75 ///
76 /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77 /// function must also handle the final possibly partial chunk.
78 /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79 /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80 /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81 ///
82 /// Decoding must not write any bytes into the output slice other than the decoded data.
83 ///
84 /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85 /// errors unless the engine is configured otherwise.
86 ///
87 /// # Panics
88 ///
89 /// Panics if `output` is too small.
90 #[doc(hidden)]
91 fn internal_decode(
92 &self,
93 input: &[u8],
94 output: &mut [u8],
95 decode_estimate: Self::DecodeEstimate,
96 ) -> Result<DecodeMetadata, DecodeError>;
97
98 /// Returns the config for this engine.
99 fn config(&self) -> &Self::Config;
100
101 /// Encode arbitrary octets as base64 using the provided `Engine`.
102 /// Returns a `String`.
103 ///
104 /// # Example
105 ///
106 /// ```rust
107 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
108 ///
109 /// let b64 = general_purpose::STANDARD.encode(b"hello world~");
110 /// println!("{}", b64);
111 ///
112 /// const CUSTOM_ENGINE: engine::GeneralPurpose =
113 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
114 ///
115 /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
116 #[cfg(any(feature = "alloc", feature = "std", test))]
117 fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
118 let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding())
119 .expect("integer overflow when calculating buffer size");
120 let mut buf = vec![0; encoded_size];
121
122 encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size);
123
124 String::from_utf8(buf).expect("Invalid UTF8")
125 }
126
127 /// Encode arbitrary octets as base64 into a supplied `String`.
128 /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
129 ///
130 /// # Example
131 ///
132 /// ```rust
133 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
134 /// const CUSTOM_ENGINE: engine::GeneralPurpose =
135 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
136 ///
137 /// fn main() {
138 /// let mut buf = String::new();
139 /// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
140 /// println!("{}", buf);
141 ///
142 /// buf.clear();
143 /// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
144 /// println!("{}", buf);
145 /// }
146 /// ```
147 #[cfg(any(feature = "alloc", feature = "std", test))]
148 fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
149 let input_bytes = input.as_ref();
150
151 {
152 let mut sink = chunked_encoder::StringSink::new(output_buf);
153
154 chunked_encoder::ChunkedEncoder::new(self)
155 .encode(input_bytes, &mut sink)
156 .expect("Writing to a String shouldn't fail");
157 }
158 }
159
160 /// Encode arbitrary octets as base64 into a supplied slice.
161 /// Writes into the supplied output buffer.
162 ///
163 /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
164 /// or statically-allocated buffer).
165 ///
166 /// # Example
167 ///
168 /// ```rust
169 /// use base64::{Engine as _, engine::general_purpose};
170 /// let s = b"hello internet!";
171 /// let mut buf = Vec::new();
172 /// // make sure we'll have a slice big enough for base64 + padding
173 /// buf.resize(s.len() * 4 / 3 + 4, 0);
174 ///
175 /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
176 ///
177 /// // shorten our vec down to just what was written
178 /// buf.truncate(bytes_written);
179 ///
180 /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
181 /// ```
182 fn encode_slice<T: AsRef<[u8]>>(
183 &self,
184 input: T,
185 output_buf: &mut [u8],
186 ) -> Result<usize, EncodeSliceError> {
187 let input_bytes = input.as_ref();
188
189 let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding())
190 .expect("usize overflow when calculating buffer size");
191
192 if output_buf.len() < encoded_size {
193 return Err(EncodeSliceError::OutputSliceTooSmall);
194 }
195
196 let b64_output = &mut output_buf[0..encoded_size];
197
198 encode_with_padding(input_bytes, b64_output, self, encoded_size);
199
200 Ok(encoded_size)
201 }
202
203 /// Decode the input into a new `Vec`.
204 ///
205 /// # Example
206 ///
207 /// ```rust
208 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
209 ///
210 /// let bytes = general_purpose::STANDARD
211 /// .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
212 /// println!("{:?}", bytes);
213 ///
214 /// // custom engine setup
215 /// let bytes_url = engine::GeneralPurpose::new(
216 /// &alphabet::URL_SAFE,
217 /// general_purpose::NO_PAD)
218 /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
219 /// println!("{:?}", bytes_url);
220 /// ```
221 #[cfg(any(feature = "alloc", feature = "std", test))]
222 fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
223 let input_bytes = input.as_ref();
224
225 let estimate = self.internal_decoded_len_estimate(input_bytes.len());
226 let mut buffer = vec![0; estimate.decoded_len_estimate()];
227
228 let bytes_written = self
229 .internal_decode(input_bytes, &mut buffer, estimate)?
230 .decoded_len;
231 buffer.truncate(bytes_written);
232
233 Ok(buffer)
234 }
235
236 /// Decode the `input` into the supplied `buffer`.
237 ///
238 /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
239 /// Returns a `Result` containing an empty tuple, aka `()`.
240 ///
241 /// # Example
242 ///
243 /// ```rust
244 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
245 /// const CUSTOM_ENGINE: engine::GeneralPurpose =
246 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
247 ///
248 /// fn main() {
249 /// use base64::Engine;
250 /// let mut buffer = Vec::<u8>::new();
251 /// // with the default engine
252 /// general_purpose::STANDARD
253 /// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
254 /// println!("{:?}", buffer);
255 ///
256 /// buffer.clear();
257 ///
258 /// // with a custom engine
259 /// CUSTOM_ENGINE.decode_vec(
260 /// "aGVsbG8gaW50ZXJuZXR-Cg==",
261 /// &mut buffer,
262 /// ).unwrap();
263 /// println!("{:?}", buffer);
264 /// }
265 /// ```
266 #[cfg(any(feature = "alloc", feature = "std", test))]
267 fn decode_vec<T: AsRef<[u8]>>(
268 &self,
269 input: T,
270 buffer: &mut Vec<u8>,
271 ) -> Result<(), DecodeError> {
272 let input_bytes = input.as_ref();
273
274 let starting_output_len = buffer.len();
275
276 let estimate = self.internal_decoded_len_estimate(input_bytes.len());
277 let total_len_estimate = estimate
278 .decoded_len_estimate()
279 .checked_add(starting_output_len)
280 .expect("Overflow when calculating output buffer length");
281 buffer.resize(total_len_estimate, 0);
282
283 let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
284 let bytes_written = self
285 .internal_decode(input_bytes, buffer_slice, estimate)?
286 .decoded_len;
287
288 buffer.truncate(starting_output_len + bytes_written);
289
290 Ok(())
291 }
292
293 /// Decode the input into the provided output slice.
294 ///
295 /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
296 /// the estimated decoded length.
297 ///
298 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
299 ///
300 /// See [crate::decoded_len_estimate] for calculating buffer sizes.
301 ///
302 /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
303 /// if the output buffer is too small.
304 fn decode_slice<T: AsRef<[u8]>>(
305 &self,
306 input: T,
307 output: &mut [u8],
308 ) -> Result<usize, DecodeSliceError> {
309 let input_bytes = input.as_ref();
310
311 let estimate = self.internal_decoded_len_estimate(input_bytes.len());
312 if output.len() < estimate.decoded_len_estimate() {
313 return Err(DecodeSliceError::OutputSliceTooSmall);
314 }
315
316 self.internal_decode(input_bytes, output, estimate)
317 .map_err(|e| e.into())
318 .map(|dm| dm.decoded_len)
319 }
320
321 /// Decode the input into the provided output slice.
322 ///
323 /// Returns the number of bytes written to the slice.
324 ///
325 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
326 ///
327 /// See [crate::decoded_len_estimate] for calculating buffer sizes.
328 ///
329 /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
330 /// buffer is too small.
331 ///
332 /// # Panics
333 ///
334 /// Panics if the provided output buffer is too small for the decoded data.
335 fn decode_slice_unchecked<T: AsRef<[u8]>>(
336 &self,
337 input: T,
338 output: &mut [u8],
339 ) -> Result<usize, DecodeError> {
340 let input_bytes = input.as_ref();
341
342 self.internal_decode(
343 input_bytes,
344 output,
345 self.internal_decoded_len_estimate(input_bytes.len()),
346 )
347 .map(|dm| dm.decoded_len)
348 }
349}
350
351/// The minimal level of configuration that engines must support.
352pub trait Config {
353 /// Returns `true` if padding should be added after the encoded output.
354 ///
355 /// Padding is added outside the engine's encode() since the engine may be used
356 /// to encode only a chunk of the overall output, so it can't always know when
357 /// the output is "done" and would therefore need padding (if configured).
358 // It could be provided as a separate parameter when encoding, but that feels like
359 // leaking an implementation detail to the user, and it's hopefully more convenient
360 // to have to only pass one thing (the engine) to any part of the API.
361 fn encode_padding(&self) -> bool;
362}
363
364/// The decode estimate used by an engine implementation. Users do not need to interact with this;
365/// it is only for engine implementors.
366///
367/// Implementors may store relevant data here when constructing this to avoid having to calculate
368/// them again during actual decoding.
369pub trait DecodeEstimate {
370 /// Returns a conservative (err on the side of too big) estimate of the decoded length to use
371 /// for pre-allocating buffers, etc.
372 ///
373 /// The estimate must be no larger than the next largest complete triple of decoded bytes.
374 /// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
375 fn decoded_len_estimate(&self) -> usize;
376}
377
378/// Controls how pad bytes are handled when decoding.
379///
380/// Each [Engine] must support at least the behavior indicated by
381/// [DecodePaddingMode::RequireCanonical], and may support other modes.
382#[derive(Clone, Copy, Debug, PartialEq, Eq)]
383pub enum DecodePaddingMode {
384 /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
385 Indifferent,
386 /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
387 RequireCanonical,
388 /// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
389 RequireNone,
390}
391
392/// Metadata about the result of a decode operation
393#[derive(PartialEq, Eq, Debug)]
394pub struct DecodeMetadata {
395 /// Number of decoded bytes output
396 pub(crate) decoded_len: usize,
397 /// Offset of the first padding byte in the input, if any
398 pub(crate) padding_offset: Option<usize>,
399}
400
401impl DecodeMetadata {
402 pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
403 Self {
404 decoded_len: decoded_bytes,
405 padding_offset: padding_index,
406 }
407 }
408}
409