1 | //! Provides the [Engine] abstraction and out of the box implementations. |
2 | #[cfg (any(feature = "alloc" , test))] |
3 | use crate::chunked_encoder; |
4 | use crate::{ |
5 | encode::{encode_with_padding, EncodeSliceError}, |
6 | encoded_len, DecodeError, DecodeSliceError, |
7 | }; |
8 | #[cfg (any(feature = "alloc" , test))] |
9 | use alloc::vec::Vec; |
10 | |
11 | #[cfg (any(feature = "alloc" , test))] |
12 | use alloc::{string::String, vec}; |
13 | |
14 | pub mod general_purpose; |
15 | |
16 | #[cfg (test)] |
17 | mod naive; |
18 | |
19 | #[cfg (test)] |
20 | mod tests; |
21 | |
22 | pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; |
23 | |
24 | /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. |
25 | /// |
26 | /// Different implementations offer different characteristics. The library currently ships with |
27 | /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices |
28 | /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. |
29 | /// |
30 | /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's |
31 | /// recommended to store the engine in a `const` so that references to it won't pose any lifetime |
32 | /// issues, and to avoid repeating the cost of engine setup. |
33 | /// |
34 | /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. |
35 | // When adding an implementation of Engine, include them in the engine test suite: |
36 | // - add an implementation of [engine::tests::EngineWrapper] |
37 | // - add the implementation to the `all_engines` macro |
38 | // All tests run on all engines listed in the macro. |
39 | pub trait Engine: Send + Sync { |
40 | /// The config type used by this engine |
41 | type Config: Config; |
42 | /// The decode estimate used by this engine |
43 | type DecodeEstimate: DecodeEstimate; |
44 | |
45 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
46 | /// See the other `encode*` functions on this trait. |
47 | /// |
48 | /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. |
49 | /// |
50 | /// `output` will be long enough to hold the encoded data. |
51 | /// |
52 | /// Returns the number of bytes written. |
53 | /// |
54 | /// No padding should be written; that is handled separately. |
55 | /// |
56 | /// Must not write any bytes into the output slice other than the encoded data. |
57 | #[doc (hidden)] |
58 | fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; |
59 | |
60 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
61 | /// |
62 | /// As an optimization to prevent the decoded length from being calculated twice, it is |
63 | /// sometimes helpful to have a conservative estimate of the decoded size before doing the |
64 | /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. |
65 | #[doc (hidden)] |
66 | fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; |
67 | |
68 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
69 | /// See the other `decode*` functions on this trait. |
70 | /// |
71 | /// Decode `input` base64 bytes into the `output` buffer. |
72 | /// |
73 | /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid |
74 | /// calculating it again (expensive on short inputs).` |
75 | /// |
76 | /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this |
77 | /// function must also handle the final possibly partial chunk. |
78 | /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, |
79 | /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the |
80 | /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). |
81 | /// |
82 | /// Decoding must not write any bytes into the output slice other than the decoded data. |
83 | /// |
84 | /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as |
85 | /// errors unless the engine is configured otherwise. |
86 | #[doc (hidden)] |
87 | fn internal_decode( |
88 | &self, |
89 | input: &[u8], |
90 | output: &mut [u8], |
91 | decode_estimate: Self::DecodeEstimate, |
92 | ) -> Result<DecodeMetadata, DecodeSliceError>; |
93 | |
94 | /// Returns the config for this engine. |
95 | fn config(&self) -> &Self::Config; |
96 | |
97 | /// Encode arbitrary octets as base64 using the provided `Engine`. |
98 | /// Returns a `String`. |
99 | /// |
100 | /// # Example |
101 | /// |
102 | /// ```rust |
103 | /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; |
104 | /// |
105 | /// let b64 = general_purpose::STANDARD.encode(b"hello world~" ); |
106 | /// println!("{}" , b64); |
107 | /// |
108 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
109 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); |
110 | /// |
111 | /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~" ); |
112 | /// ``` |
113 | #[cfg (any(feature = "alloc" , test))] |
114 | #[inline ] |
115 | fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { |
116 | fn inner<E>(engine: &E, input_bytes: &[u8]) -> String |
117 | where |
118 | E: Engine + ?Sized, |
119 | { |
120 | let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) |
121 | .expect("integer overflow when calculating buffer size" ); |
122 | |
123 | let mut buf = vec![0; encoded_size]; |
124 | |
125 | encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size); |
126 | |
127 | String::from_utf8(buf).expect("Invalid UTF8" ) |
128 | } |
129 | |
130 | inner(self, input.as_ref()) |
131 | } |
132 | |
133 | /// Encode arbitrary octets as base64 into a supplied `String`. |
134 | /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. |
135 | /// |
136 | /// # Example |
137 | /// |
138 | /// ```rust |
139 | /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; |
140 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
141 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); |
142 | /// |
143 | /// fn main() { |
144 | /// let mut buf = String::new(); |
145 | /// general_purpose::STANDARD.encode_string(b"hello world~" , &mut buf); |
146 | /// println!("{}" , buf); |
147 | /// |
148 | /// buf.clear(); |
149 | /// CUSTOM_ENGINE.encode_string(b"hello internet~" , &mut buf); |
150 | /// println!("{}" , buf); |
151 | /// } |
152 | /// ``` |
153 | #[cfg (any(feature = "alloc" , test))] |
154 | #[inline ] |
155 | fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { |
156 | fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String) |
157 | where |
158 | E: Engine + ?Sized, |
159 | { |
160 | let mut sink = chunked_encoder::StringSink::new(output_buf); |
161 | |
162 | chunked_encoder::ChunkedEncoder::new(engine) |
163 | .encode(input_bytes, &mut sink) |
164 | .expect("Writing to a String shouldn't fail" ); |
165 | } |
166 | |
167 | inner(self, input.as_ref(), output_buf) |
168 | } |
169 | |
170 | /// Encode arbitrary octets as base64 into a supplied slice. |
171 | /// Writes into the supplied output buffer. |
172 | /// |
173 | /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident |
174 | /// or statically-allocated buffer). |
175 | /// |
176 | /// # Example |
177 | /// |
178 | #[cfg_attr (feature = "alloc" , doc = "```" )] |
179 | #[cfg_attr (not(feature = "alloc" ), doc = "```ignore" )] |
180 | /// use base64::{Engine as _, engine::general_purpose}; |
181 | /// let s = b"hello internet!" ; |
182 | /// let mut buf = Vec::new(); |
183 | /// // make sure we'll have a slice big enough for base64 + padding |
184 | /// buf.resize(s.len() * 4 / 3 + 4, 0); |
185 | /// |
186 | /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); |
187 | /// |
188 | /// // shorten our vec down to just what was written |
189 | /// buf.truncate(bytes_written); |
190 | /// |
191 | /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); |
192 | /// ``` |
193 | #[inline ] |
194 | fn encode_slice<T: AsRef<[u8]>>( |
195 | &self, |
196 | input: T, |
197 | output_buf: &mut [u8], |
198 | ) -> Result<usize, EncodeSliceError> { |
199 | fn inner<E>( |
200 | engine: &E, |
201 | input_bytes: &[u8], |
202 | output_buf: &mut [u8], |
203 | ) -> Result<usize, EncodeSliceError> |
204 | where |
205 | E: Engine + ?Sized, |
206 | { |
207 | let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) |
208 | .expect("usize overflow when calculating buffer size" ); |
209 | |
210 | if output_buf.len() < encoded_size { |
211 | return Err(EncodeSliceError::OutputSliceTooSmall); |
212 | } |
213 | |
214 | let b64_output = &mut output_buf[0..encoded_size]; |
215 | |
216 | encode_with_padding(input_bytes, b64_output, engine, encoded_size); |
217 | |
218 | Ok(encoded_size) |
219 | } |
220 | |
221 | inner(self, input.as_ref(), output_buf) |
222 | } |
223 | |
224 | /// Decode the input into a new `Vec`. |
225 | /// |
226 | /// # Example |
227 | /// |
228 | /// ```rust |
229 | /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; |
230 | /// |
231 | /// let bytes = general_purpose::STANDARD |
232 | /// .decode("aGVsbG8gd29ybGR+Cg==" ).unwrap(); |
233 | /// println!("{:?}" , bytes); |
234 | /// |
235 | /// // custom engine setup |
236 | /// let bytes_url = engine::GeneralPurpose::new( |
237 | /// &alphabet::URL_SAFE, |
238 | /// general_purpose::NO_PAD) |
239 | /// .decode("aGVsbG8gaW50ZXJuZXR-Cg" ).unwrap(); |
240 | /// println!("{:?}" , bytes_url); |
241 | /// ``` |
242 | #[cfg (any(feature = "alloc" , test))] |
243 | #[inline ] |
244 | fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { |
245 | fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError> |
246 | where |
247 | E: Engine + ?Sized, |
248 | { |
249 | let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); |
250 | let mut buffer = vec![0; estimate.decoded_len_estimate()]; |
251 | |
252 | let bytes_written = engine |
253 | .internal_decode(input_bytes, &mut buffer, estimate) |
254 | .map_err(|e| match e { |
255 | DecodeSliceError::DecodeError(e) => e, |
256 | DecodeSliceError::OutputSliceTooSmall => { |
257 | unreachable!("Vec is sized conservatively" ) |
258 | } |
259 | })? |
260 | .decoded_len; |
261 | |
262 | buffer.truncate(bytes_written); |
263 | |
264 | Ok(buffer) |
265 | } |
266 | |
267 | inner(self, input.as_ref()) |
268 | } |
269 | |
270 | /// Decode the `input` into the supplied `buffer`. |
271 | /// |
272 | /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. |
273 | /// Returns a `Result` containing an empty tuple, aka `()`. |
274 | /// |
275 | /// # Example |
276 | /// |
277 | /// ```rust |
278 | /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; |
279 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
280 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); |
281 | /// |
282 | /// fn main() { |
283 | /// use base64::Engine; |
284 | /// let mut buffer = Vec::<u8>::new(); |
285 | /// // with the default engine |
286 | /// general_purpose::STANDARD |
287 | /// .decode_vec("aGVsbG8gd29ybGR+Cg==" , &mut buffer,).unwrap(); |
288 | /// println!("{:?}" , buffer); |
289 | /// |
290 | /// buffer.clear(); |
291 | /// |
292 | /// // with a custom engine |
293 | /// CUSTOM_ENGINE.decode_vec( |
294 | /// "aGVsbG8gaW50ZXJuZXR-Cg==" , |
295 | /// &mut buffer, |
296 | /// ).unwrap(); |
297 | /// println!("{:?}" , buffer); |
298 | /// } |
299 | /// ``` |
300 | #[cfg (any(feature = "alloc" , test))] |
301 | #[inline ] |
302 | fn decode_vec<T: AsRef<[u8]>>( |
303 | &self, |
304 | input: T, |
305 | buffer: &mut Vec<u8>, |
306 | ) -> Result<(), DecodeError> { |
307 | fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError> |
308 | where |
309 | E: Engine + ?Sized, |
310 | { |
311 | let starting_output_len = buffer.len(); |
312 | let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); |
313 | |
314 | let total_len_estimate = estimate |
315 | .decoded_len_estimate() |
316 | .checked_add(starting_output_len) |
317 | .expect("Overflow when calculating output buffer length" ); |
318 | |
319 | buffer.resize(total_len_estimate, 0); |
320 | |
321 | let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; |
322 | |
323 | let bytes_written = engine |
324 | .internal_decode(input_bytes, buffer_slice, estimate) |
325 | .map_err(|e| match e { |
326 | DecodeSliceError::DecodeError(e) => e, |
327 | DecodeSliceError::OutputSliceTooSmall => { |
328 | unreachable!("Vec is sized conservatively" ) |
329 | } |
330 | })? |
331 | .decoded_len; |
332 | |
333 | buffer.truncate(starting_output_len + bytes_written); |
334 | |
335 | Ok(()) |
336 | } |
337 | |
338 | inner(self, input.as_ref(), buffer) |
339 | } |
340 | |
341 | /// Decode the input into the provided output slice. |
342 | /// |
343 | /// Returns the number of bytes written to the slice, or an error if `output` is smaller than |
344 | /// the estimated decoded length. |
345 | /// |
346 | /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). |
347 | /// |
348 | /// See [crate::decoded_len_estimate] for calculating buffer sizes. |
349 | /// |
350 | /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error |
351 | /// if the output buffer is too small. |
352 | #[inline ] |
353 | fn decode_slice<T: AsRef<[u8]>>( |
354 | &self, |
355 | input: T, |
356 | output: &mut [u8], |
357 | ) -> Result<usize, DecodeSliceError> { |
358 | fn inner<E>( |
359 | engine: &E, |
360 | input_bytes: &[u8], |
361 | output: &mut [u8], |
362 | ) -> Result<usize, DecodeSliceError> |
363 | where |
364 | E: Engine + ?Sized, |
365 | { |
366 | engine |
367 | .internal_decode( |
368 | input_bytes, |
369 | output, |
370 | engine.internal_decoded_len_estimate(input_bytes.len()), |
371 | ) |
372 | .map(|dm| dm.decoded_len) |
373 | } |
374 | |
375 | inner(self, input.as_ref(), output) |
376 | } |
377 | |
378 | /// Decode the input into the provided output slice. |
379 | /// |
380 | /// Returns the number of bytes written to the slice. |
381 | /// |
382 | /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). |
383 | /// |
384 | /// See [crate::decoded_len_estimate] for calculating buffer sizes. |
385 | /// |
386 | /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output |
387 | /// buffer is too small. |
388 | /// |
389 | /// # Panics |
390 | /// |
391 | /// Panics if the provided output buffer is too small for the decoded data. |
392 | #[inline ] |
393 | fn decode_slice_unchecked<T: AsRef<[u8]>>( |
394 | &self, |
395 | input: T, |
396 | output: &mut [u8], |
397 | ) -> Result<usize, DecodeError> { |
398 | fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> |
399 | where |
400 | E: Engine + ?Sized, |
401 | { |
402 | engine |
403 | .internal_decode( |
404 | input_bytes, |
405 | output, |
406 | engine.internal_decoded_len_estimate(input_bytes.len()), |
407 | ) |
408 | .map(|dm| dm.decoded_len) |
409 | .map_err(|e| match e { |
410 | DecodeSliceError::DecodeError(e) => e, |
411 | DecodeSliceError::OutputSliceTooSmall => { |
412 | panic!("Output slice is too small" ) |
413 | } |
414 | }) |
415 | } |
416 | |
417 | inner(self, input.as_ref(), output) |
418 | } |
419 | } |
420 | |
421 | /// The minimal level of configuration that engines must support. |
422 | pub trait Config { |
423 | /// Returns `true` if padding should be added after the encoded output. |
424 | /// |
425 | /// Padding is added outside the engine's encode() since the engine may be used |
426 | /// to encode only a chunk of the overall output, so it can't always know when |
427 | /// the output is "done" and would therefore need padding (if configured). |
428 | // It could be provided as a separate parameter when encoding, but that feels like |
429 | // leaking an implementation detail to the user, and it's hopefully more convenient |
430 | // to have to only pass one thing (the engine) to any part of the API. |
431 | fn encode_padding(&self) -> bool; |
432 | } |
433 | |
434 | /// The decode estimate used by an engine implementation. Users do not need to interact with this; |
435 | /// it is only for engine implementors. |
436 | /// |
437 | /// Implementors may store relevant data here when constructing this to avoid having to calculate |
438 | /// them again during actual decoding. |
439 | pub trait DecodeEstimate { |
440 | /// Returns a conservative (err on the side of too big) estimate of the decoded length to use |
441 | /// for pre-allocating buffers, etc. |
442 | /// |
443 | /// The estimate must be no larger than the next largest complete triple of decoded bytes. |
444 | /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. |
445 | fn decoded_len_estimate(&self) -> usize; |
446 | } |
447 | |
448 | /// Controls how pad bytes are handled when decoding. |
449 | /// |
450 | /// Each [Engine] must support at least the behavior indicated by |
451 | /// [DecodePaddingMode::RequireCanonical], and may support other modes. |
452 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
453 | pub enum DecodePaddingMode { |
454 | /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. |
455 | Indifferent, |
456 | /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). |
457 | RequireCanonical, |
458 | /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. |
459 | RequireNone, |
460 | } |
461 | |
462 | /// Metadata about the result of a decode operation |
463 | #[derive (PartialEq, Eq, Debug)] |
464 | pub struct DecodeMetadata { |
465 | /// Number of decoded bytes output |
466 | pub(crate) decoded_len: usize, |
467 | /// Offset of the first padding byte in the input, if any |
468 | pub(crate) padding_offset: Option<usize>, |
469 | } |
470 | |
471 | impl DecodeMetadata { |
472 | pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self { |
473 | Self { |
474 | decoded_len: decoded_bytes, |
475 | padding_offset: padding_index, |
476 | } |
477 | } |
478 | } |
479 | |