| 1 | //! Provides the [Engine] abstraction and out of the box implementations. |
| 2 | #[cfg (any(feature = "alloc" , test))] |
| 3 | use crate::chunked_encoder; |
| 4 | use crate::{ |
| 5 | encode::{encode_with_padding, EncodeSliceError}, |
| 6 | encoded_len, DecodeError, DecodeSliceError, |
| 7 | }; |
| 8 | #[cfg (any(feature = "alloc" , test))] |
| 9 | use alloc::vec::Vec; |
| 10 | |
| 11 | #[cfg (any(feature = "alloc" , test))] |
| 12 | use alloc::{string::String, vec}; |
| 13 | |
| 14 | pub mod general_purpose; |
| 15 | |
| 16 | #[cfg (test)] |
| 17 | mod naive; |
| 18 | |
| 19 | #[cfg (test)] |
| 20 | mod tests; |
| 21 | |
| 22 | pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; |
| 23 | |
| 24 | /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. |
| 25 | /// |
| 26 | /// Different implementations offer different characteristics. The library currently ships with |
| 27 | /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices |
| 28 | /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. |
| 29 | /// |
| 30 | /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's |
| 31 | /// recommended to store the engine in a `const` so that references to it won't pose any lifetime |
| 32 | /// issues, and to avoid repeating the cost of engine setup. |
| 33 | /// |
| 34 | /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. |
| 35 | // When adding an implementation of Engine, include them in the engine test suite: |
| 36 | // - add an implementation of [engine::tests::EngineWrapper] |
| 37 | // - add the implementation to the `all_engines` macro |
| 38 | // All tests run on all engines listed in the macro. |
| 39 | pub trait Engine: Send + Sync { |
| 40 | /// The config type used by this engine |
| 41 | type Config: Config; |
| 42 | /// The decode estimate used by this engine |
| 43 | type DecodeEstimate: DecodeEstimate; |
| 44 | |
| 45 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
| 46 | /// See the other `encode*` functions on this trait. |
| 47 | /// |
| 48 | /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. |
| 49 | /// |
| 50 | /// `output` will be long enough to hold the encoded data. |
| 51 | /// |
| 52 | /// Returns the number of bytes written. |
| 53 | /// |
| 54 | /// No padding should be written; that is handled separately. |
| 55 | /// |
| 56 | /// Must not write any bytes into the output slice other than the encoded data. |
| 57 | #[doc (hidden)] |
| 58 | fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; |
| 59 | |
| 60 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
| 61 | /// |
| 62 | /// As an optimization to prevent the decoded length from being calculated twice, it is |
| 63 | /// sometimes helpful to have a conservative estimate of the decoded size before doing the |
| 64 | /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. |
| 65 | #[doc (hidden)] |
| 66 | fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; |
| 67 | |
| 68 | /// This is not meant to be called directly; it is only for `Engine` implementors. |
| 69 | /// See the other `decode*` functions on this trait. |
| 70 | /// |
| 71 | /// Decode `input` base64 bytes into the `output` buffer. |
| 72 | /// |
| 73 | /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid |
| 74 | /// calculating it again (expensive on short inputs).` |
| 75 | /// |
| 76 | /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this |
| 77 | /// function must also handle the final possibly partial chunk. |
| 78 | /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, |
| 79 | /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the |
| 80 | /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). |
| 81 | /// |
| 82 | /// Decoding must not write any bytes into the output slice other than the decoded data. |
| 83 | /// |
| 84 | /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as |
| 85 | /// errors unless the engine is configured otherwise. |
| 86 | /// |
| 87 | /// # Panics |
| 88 | /// |
| 89 | /// Panics if `output` is too small. |
| 90 | #[doc (hidden)] |
| 91 | fn internal_decode( |
| 92 | &self, |
| 93 | input: &[u8], |
| 94 | output: &mut [u8], |
| 95 | decode_estimate: Self::DecodeEstimate, |
| 96 | ) -> Result<DecodeMetadata, DecodeError>; |
| 97 | |
| 98 | /// Returns the config for this engine. |
| 99 | fn config(&self) -> &Self::Config; |
| 100 | |
| 101 | /// Encode arbitrary octets as base64 using the provided `Engine`. |
| 102 | /// Returns a `String`. |
| 103 | /// |
| 104 | /// # Example |
| 105 | /// |
| 106 | /// ```rust |
| 107 | /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; |
| 108 | /// |
| 109 | /// let b64 = general_purpose::STANDARD.encode(b"hello world~" ); |
| 110 | /// println!("{}" , b64); |
| 111 | /// |
| 112 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
| 113 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); |
| 114 | /// |
| 115 | /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~" ); |
| 116 | #[cfg (any(feature = "alloc" , test))] |
| 117 | #[inline ] |
| 118 | fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { |
| 119 | fn inner<E>(engine: &E, input_bytes: &[u8]) -> String |
| 120 | where |
| 121 | E: Engine + ?Sized, |
| 122 | { |
| 123 | let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) |
| 124 | .expect("integer overflow when calculating buffer size" ); |
| 125 | |
| 126 | let mut buf = vec![0; encoded_size]; |
| 127 | |
| 128 | encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size); |
| 129 | |
| 130 | String::from_utf8(buf).expect("Invalid UTF8" ) |
| 131 | } |
| 132 | |
| 133 | inner(self, input.as_ref()) |
| 134 | } |
| 135 | |
| 136 | /// Encode arbitrary octets as base64 into a supplied `String`. |
| 137 | /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. |
| 138 | /// |
| 139 | /// # Example |
| 140 | /// |
| 141 | /// ```rust |
| 142 | /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; |
| 143 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
| 144 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); |
| 145 | /// |
| 146 | /// fn main() { |
| 147 | /// let mut buf = String::new(); |
| 148 | /// general_purpose::STANDARD.encode_string(b"hello world~" , &mut buf); |
| 149 | /// println!("{}" , buf); |
| 150 | /// |
| 151 | /// buf.clear(); |
| 152 | /// CUSTOM_ENGINE.encode_string(b"hello internet~" , &mut buf); |
| 153 | /// println!("{}" , buf); |
| 154 | /// } |
| 155 | /// ``` |
| 156 | #[cfg (any(feature = "alloc" , test))] |
| 157 | #[inline ] |
| 158 | fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { |
| 159 | fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String) |
| 160 | where |
| 161 | E: Engine + ?Sized, |
| 162 | { |
| 163 | let mut sink = chunked_encoder::StringSink::new(output_buf); |
| 164 | |
| 165 | chunked_encoder::ChunkedEncoder::new(engine) |
| 166 | .encode(input_bytes, &mut sink) |
| 167 | .expect("Writing to a String shouldn't fail" ); |
| 168 | } |
| 169 | |
| 170 | inner(self, input.as_ref(), output_buf) |
| 171 | } |
| 172 | |
| 173 | /// Encode arbitrary octets as base64 into a supplied slice. |
| 174 | /// Writes into the supplied output buffer. |
| 175 | /// |
| 176 | /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident |
| 177 | /// or statically-allocated buffer). |
| 178 | /// |
| 179 | /// # Example |
| 180 | /// |
| 181 | #[cfg_attr (feature = "alloc" , doc = "```" )] |
| 182 | #[cfg_attr (not(feature = "alloc" ), doc = "```ignore" )] |
| 183 | /// use base64::{Engine as _, engine::general_purpose}; |
| 184 | /// let s = b"hello internet!" ; |
| 185 | /// let mut buf = Vec::new(); |
| 186 | /// // make sure we'll have a slice big enough for base64 + padding |
| 187 | /// buf.resize(s.len() * 4 / 3 + 4, 0); |
| 188 | /// |
| 189 | /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); |
| 190 | /// |
| 191 | /// // shorten our vec down to just what was written |
| 192 | /// buf.truncate(bytes_written); |
| 193 | /// |
| 194 | /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); |
| 195 | /// ``` |
| 196 | #[inline ] |
| 197 | fn encode_slice<T: AsRef<[u8]>>( |
| 198 | &self, |
| 199 | input: T, |
| 200 | output_buf: &mut [u8], |
| 201 | ) -> Result<usize, EncodeSliceError> { |
| 202 | fn inner<E>( |
| 203 | engine: &E, |
| 204 | input_bytes: &[u8], |
| 205 | output_buf: &mut [u8], |
| 206 | ) -> Result<usize, EncodeSliceError> |
| 207 | where |
| 208 | E: Engine + ?Sized, |
| 209 | { |
| 210 | let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) |
| 211 | .expect("usize overflow when calculating buffer size" ); |
| 212 | |
| 213 | if output_buf.len() < encoded_size { |
| 214 | return Err(EncodeSliceError::OutputSliceTooSmall); |
| 215 | } |
| 216 | |
| 217 | let b64_output = &mut output_buf[0..encoded_size]; |
| 218 | |
| 219 | encode_with_padding(input_bytes, b64_output, engine, encoded_size); |
| 220 | |
| 221 | Ok(encoded_size) |
| 222 | } |
| 223 | |
| 224 | inner(self, input.as_ref(), output_buf) |
| 225 | } |
| 226 | |
| 227 | /// Decode the input into a new `Vec`. |
| 228 | /// |
| 229 | /// # Example |
| 230 | /// |
| 231 | /// ```rust |
| 232 | /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; |
| 233 | /// |
| 234 | /// let bytes = general_purpose::STANDARD |
| 235 | /// .decode("aGVsbG8gd29ybGR+Cg==" ).unwrap(); |
| 236 | /// println!("{:?}" , bytes); |
| 237 | /// |
| 238 | /// // custom engine setup |
| 239 | /// let bytes_url = engine::GeneralPurpose::new( |
| 240 | /// &alphabet::URL_SAFE, |
| 241 | /// general_purpose::NO_PAD) |
| 242 | /// .decode("aGVsbG8gaW50ZXJuZXR-Cg" ).unwrap(); |
| 243 | /// println!("{:?}" , bytes_url); |
| 244 | /// ``` |
| 245 | #[cfg (any(feature = "alloc" , test))] |
| 246 | #[inline ] |
| 247 | fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { |
| 248 | fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError> |
| 249 | where |
| 250 | E: Engine + ?Sized, |
| 251 | { |
| 252 | let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); |
| 253 | let mut buffer = vec![0; estimate.decoded_len_estimate()]; |
| 254 | |
| 255 | let bytes_written = engine |
| 256 | .internal_decode(input_bytes, &mut buffer, estimate)? |
| 257 | .decoded_len; |
| 258 | |
| 259 | buffer.truncate(bytes_written); |
| 260 | |
| 261 | Ok(buffer) |
| 262 | } |
| 263 | |
| 264 | inner(self, input.as_ref()) |
| 265 | } |
| 266 | |
| 267 | /// Decode the `input` into the supplied `buffer`. |
| 268 | /// |
| 269 | /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. |
| 270 | /// Returns a `Result` containing an empty tuple, aka `()`. |
| 271 | /// |
| 272 | /// # Example |
| 273 | /// |
| 274 | /// ```rust |
| 275 | /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; |
| 276 | /// const CUSTOM_ENGINE: engine::GeneralPurpose = |
| 277 | /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); |
| 278 | /// |
| 279 | /// fn main() { |
| 280 | /// use base64::Engine; |
| 281 | /// let mut buffer = Vec::<u8>::new(); |
| 282 | /// // with the default engine |
| 283 | /// general_purpose::STANDARD |
| 284 | /// .decode_vec("aGVsbG8gd29ybGR+Cg==" , &mut buffer,).unwrap(); |
| 285 | /// println!("{:?}" , buffer); |
| 286 | /// |
| 287 | /// buffer.clear(); |
| 288 | /// |
| 289 | /// // with a custom engine |
| 290 | /// CUSTOM_ENGINE.decode_vec( |
| 291 | /// "aGVsbG8gaW50ZXJuZXR-Cg==" , |
| 292 | /// &mut buffer, |
| 293 | /// ).unwrap(); |
| 294 | /// println!("{:?}" , buffer); |
| 295 | /// } |
| 296 | /// ``` |
| 297 | #[cfg (any(feature = "alloc" , test))] |
| 298 | #[inline ] |
| 299 | fn decode_vec<T: AsRef<[u8]>>( |
| 300 | &self, |
| 301 | input: T, |
| 302 | buffer: &mut Vec<u8>, |
| 303 | ) -> Result<(), DecodeError> { |
| 304 | fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError> |
| 305 | where |
| 306 | E: Engine + ?Sized, |
| 307 | { |
| 308 | let starting_output_len = buffer.len(); |
| 309 | let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); |
| 310 | |
| 311 | let total_len_estimate = estimate |
| 312 | .decoded_len_estimate() |
| 313 | .checked_add(starting_output_len) |
| 314 | .expect("Overflow when calculating output buffer length" ); |
| 315 | |
| 316 | buffer.resize(total_len_estimate, 0); |
| 317 | |
| 318 | let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; |
| 319 | |
| 320 | let bytes_written = engine |
| 321 | .internal_decode(input_bytes, buffer_slice, estimate)? |
| 322 | .decoded_len; |
| 323 | |
| 324 | buffer.truncate(starting_output_len + bytes_written); |
| 325 | |
| 326 | Ok(()) |
| 327 | } |
| 328 | |
| 329 | inner(self, input.as_ref(), buffer) |
| 330 | } |
| 331 | |
| 332 | /// Decode the input into the provided output slice. |
| 333 | /// |
| 334 | /// Returns the number of bytes written to the slice, or an error if `output` is smaller than |
| 335 | /// the estimated decoded length. |
| 336 | /// |
| 337 | /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). |
| 338 | /// |
| 339 | /// See [crate::decoded_len_estimate] for calculating buffer sizes. |
| 340 | /// |
| 341 | /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error |
| 342 | /// if the output buffer is too small. |
| 343 | #[inline ] |
| 344 | fn decode_slice<T: AsRef<[u8]>>( |
| 345 | &self, |
| 346 | input: T, |
| 347 | output: &mut [u8], |
| 348 | ) -> Result<usize, DecodeSliceError> { |
| 349 | fn inner<E>( |
| 350 | engine: &E, |
| 351 | input_bytes: &[u8], |
| 352 | output: &mut [u8], |
| 353 | ) -> Result<usize, DecodeSliceError> |
| 354 | where |
| 355 | E: Engine + ?Sized, |
| 356 | { |
| 357 | let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); |
| 358 | |
| 359 | if output.len() < estimate.decoded_len_estimate() { |
| 360 | return Err(DecodeSliceError::OutputSliceTooSmall); |
| 361 | } |
| 362 | |
| 363 | engine |
| 364 | .internal_decode(input_bytes, output, estimate) |
| 365 | .map_err(|e| e.into()) |
| 366 | .map(|dm| dm.decoded_len) |
| 367 | } |
| 368 | |
| 369 | inner(self, input.as_ref(), output) |
| 370 | } |
| 371 | |
| 372 | /// Decode the input into the provided output slice. |
| 373 | /// |
| 374 | /// Returns the number of bytes written to the slice. |
| 375 | /// |
| 376 | /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). |
| 377 | /// |
| 378 | /// See [crate::decoded_len_estimate] for calculating buffer sizes. |
| 379 | /// |
| 380 | /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output |
| 381 | /// buffer is too small. |
| 382 | /// |
| 383 | /// # Panics |
| 384 | /// |
| 385 | /// Panics if the provided output buffer is too small for the decoded data. |
| 386 | #[inline ] |
| 387 | fn decode_slice_unchecked<T: AsRef<[u8]>>( |
| 388 | &self, |
| 389 | input: T, |
| 390 | output: &mut [u8], |
| 391 | ) -> Result<usize, DecodeError> { |
| 392 | fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> |
| 393 | where |
| 394 | E: Engine + ?Sized, |
| 395 | { |
| 396 | engine |
| 397 | .internal_decode( |
| 398 | input_bytes, |
| 399 | output, |
| 400 | engine.internal_decoded_len_estimate(input_bytes.len()), |
| 401 | ) |
| 402 | .map(|dm| dm.decoded_len) |
| 403 | } |
| 404 | |
| 405 | inner(self, input.as_ref(), output) |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | /// The minimal level of configuration that engines must support. |
| 410 | pub trait Config { |
| 411 | /// Returns `true` if padding should be added after the encoded output. |
| 412 | /// |
| 413 | /// Padding is added outside the engine's encode() since the engine may be used |
| 414 | /// to encode only a chunk of the overall output, so it can't always know when |
| 415 | /// the output is "done" and would therefore need padding (if configured). |
| 416 | // It could be provided as a separate parameter when encoding, but that feels like |
| 417 | // leaking an implementation detail to the user, and it's hopefully more convenient |
| 418 | // to have to only pass one thing (the engine) to any part of the API. |
| 419 | fn encode_padding(&self) -> bool; |
| 420 | } |
| 421 | |
| 422 | /// The decode estimate used by an engine implementation. Users do not need to interact with this; |
| 423 | /// it is only for engine implementors. |
| 424 | /// |
| 425 | /// Implementors may store relevant data here when constructing this to avoid having to calculate |
| 426 | /// them again during actual decoding. |
| 427 | pub trait DecodeEstimate { |
| 428 | /// Returns a conservative (err on the side of too big) estimate of the decoded length to use |
| 429 | /// for pre-allocating buffers, etc. |
| 430 | /// |
| 431 | /// The estimate must be no larger than the next largest complete triple of decoded bytes. |
| 432 | /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. |
| 433 | fn decoded_len_estimate(&self) -> usize; |
| 434 | } |
| 435 | |
| 436 | /// Controls how pad bytes are handled when decoding. |
| 437 | /// |
| 438 | /// Each [Engine] must support at least the behavior indicated by |
| 439 | /// [DecodePaddingMode::RequireCanonical], and may support other modes. |
| 440 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
| 441 | pub enum DecodePaddingMode { |
| 442 | /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. |
| 443 | Indifferent, |
| 444 | /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). |
| 445 | RequireCanonical, |
| 446 | /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. |
| 447 | RequireNone, |
| 448 | } |
| 449 | |
| 450 | /// Metadata about the result of a decode operation |
| 451 | #[derive (PartialEq, Eq, Debug)] |
| 452 | pub struct DecodeMetadata { |
| 453 | /// Number of decoded bytes output |
| 454 | pub(crate) decoded_len: usize, |
| 455 | /// Offset of the first padding byte in the input, if any |
| 456 | pub(crate) padding_offset: Option<usize>, |
| 457 | } |
| 458 | |
| 459 | impl DecodeMetadata { |
| 460 | pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self { |
| 461 | Self { |
| 462 | decoded_len: decoded_bytes, |
| 463 | padding_offset: padding_index, |
| 464 | } |
| 465 | } |
| 466 | } |
| 467 | |