| 1 | //! Provides the [GeneralPurpose] engine and associated config types. |
| 2 | use crate::{ |
| 3 | alphabet, |
| 4 | alphabet::Alphabet, |
| 5 | engine::{Config, DecodeMetadata, DecodePaddingMode}, |
| 6 | DecodeError, |
| 7 | }; |
| 8 | use core::convert::TryInto; |
| 9 | |
| 10 | mod decode; |
| 11 | pub(crate) mod decode_suffix; |
| 12 | |
| 13 | pub use decode::GeneralPurposeEstimate; |
| 14 | |
| 15 | pub(crate) const INVALID_VALUE: u8 = 255; |
| 16 | |
| 17 | /// A general-purpose base64 engine. |
| 18 | /// |
| 19 | /// - It uses no vector CPU instructions, so it will work on any system. |
| 20 | /// - It is reasonably fast (~2-3GiB/s). |
| 21 | /// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation. |
| 22 | |
| 23 | #[derive (Debug, Clone)] |
| 24 | pub struct GeneralPurpose { |
| 25 | encode_table: [u8; 64], |
| 26 | decode_table: [u8; 256], |
| 27 | config: GeneralPurposeConfig, |
| 28 | } |
| 29 | |
| 30 | impl GeneralPurpose { |
| 31 | /// Create a `GeneralPurpose` engine from an [Alphabet]. |
| 32 | /// |
| 33 | /// While not very expensive to initialize, ideally these should be cached |
| 34 | /// if the engine will be used repeatedly. |
| 35 | pub const fn new(alphabet: &Alphabet, config: GeneralPurposeConfig) -> Self { |
| 36 | Self { |
| 37 | encode_table: encode_table(alphabet), |
| 38 | decode_table: decode_table(alphabet), |
| 39 | config, |
| 40 | } |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | impl super::Engine for GeneralPurpose { |
| 45 | type Config = GeneralPurposeConfig; |
| 46 | type DecodeEstimate = GeneralPurposeEstimate; |
| 47 | |
| 48 | fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize { |
| 49 | let mut input_index: usize = 0; |
| 50 | |
| 51 | const BLOCKS_PER_FAST_LOOP: usize = 4; |
| 52 | const LOW_SIX_BITS: u64 = 0x3F; |
| 53 | |
| 54 | // we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need |
| 55 | // 2 trailing bytes to be available to read.. |
| 56 | let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2); |
| 57 | let mut output_index = 0; |
| 58 | |
| 59 | if last_fast_index > 0 { |
| 60 | while input_index <= last_fast_index { |
| 61 | // Major performance wins from letting the optimizer do the bounds check once, mostly |
| 62 | // on the output side |
| 63 | let input_chunk = |
| 64 | &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))]; |
| 65 | let output_chunk = |
| 66 | &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)]; |
| 67 | |
| 68 | // Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent |
| 69 | // to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for |
| 70 | // large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect |
| 71 | // that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte |
| 72 | // SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once. |
| 73 | // Plus, single-digit percentage performance differences might well be quite different |
| 74 | // on different hardware. |
| 75 | |
| 76 | let input_u64 = read_u64(&input_chunk[0..]); |
| 77 | |
| 78 | output_chunk[0] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; |
| 79 | output_chunk[1] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; |
| 80 | output_chunk[2] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; |
| 81 | output_chunk[3] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; |
| 82 | output_chunk[4] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; |
| 83 | output_chunk[5] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; |
| 84 | output_chunk[6] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; |
| 85 | output_chunk[7] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; |
| 86 | |
| 87 | let input_u64 = read_u64(&input_chunk[6..]); |
| 88 | |
| 89 | output_chunk[8] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; |
| 90 | output_chunk[9] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; |
| 91 | output_chunk[10] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; |
| 92 | output_chunk[11] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; |
| 93 | output_chunk[12] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; |
| 94 | output_chunk[13] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; |
| 95 | output_chunk[14] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; |
| 96 | output_chunk[15] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; |
| 97 | |
| 98 | let input_u64 = read_u64(&input_chunk[12..]); |
| 99 | |
| 100 | output_chunk[16] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; |
| 101 | output_chunk[17] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; |
| 102 | output_chunk[18] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; |
| 103 | output_chunk[19] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; |
| 104 | output_chunk[20] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; |
| 105 | output_chunk[21] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; |
| 106 | output_chunk[22] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; |
| 107 | output_chunk[23] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; |
| 108 | |
| 109 | let input_u64 = read_u64(&input_chunk[18..]); |
| 110 | |
| 111 | output_chunk[24] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; |
| 112 | output_chunk[25] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; |
| 113 | output_chunk[26] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; |
| 114 | output_chunk[27] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; |
| 115 | output_chunk[28] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; |
| 116 | output_chunk[29] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; |
| 117 | output_chunk[30] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; |
| 118 | output_chunk[31] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; |
| 119 | |
| 120 | output_index += BLOCKS_PER_FAST_LOOP * 8; |
| 121 | input_index += BLOCKS_PER_FAST_LOOP * 6; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | // Encode what's left after the fast loop. |
| 126 | |
| 127 | const LOW_SIX_BITS_U8: u8 = 0x3F; |
| 128 | |
| 129 | let rem = input.len() % 3; |
| 130 | let start_of_rem = input.len() - rem; |
| 131 | |
| 132 | // start at the first index not handled by fast loop, which may be 0. |
| 133 | |
| 134 | while input_index < start_of_rem { |
| 135 | let input_chunk = &input[input_index..(input_index + 3)]; |
| 136 | let output_chunk = &mut output[output_index..(output_index + 4)]; |
| 137 | |
| 138 | output_chunk[0] = self.encode_table[(input_chunk[0] >> 2) as usize]; |
| 139 | output_chunk[1] = self.encode_table |
| 140 | [((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize]; |
| 141 | output_chunk[2] = self.encode_table |
| 142 | [((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize]; |
| 143 | output_chunk[3] = self.encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize]; |
| 144 | |
| 145 | input_index += 3; |
| 146 | output_index += 4; |
| 147 | } |
| 148 | |
| 149 | if rem == 2 { |
| 150 | output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; |
| 151 | output[output_index + 1] = |
| 152 | self.encode_table[((input[start_of_rem] << 4 | input[start_of_rem + 1] >> 4) |
| 153 | & LOW_SIX_BITS_U8) as usize]; |
| 154 | output[output_index + 2] = |
| 155 | self.encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize]; |
| 156 | output_index += 3; |
| 157 | } else if rem == 1 { |
| 158 | output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; |
| 159 | output[output_index + 1] = |
| 160 | self.encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize]; |
| 161 | output_index += 2; |
| 162 | } |
| 163 | |
| 164 | output_index |
| 165 | } |
| 166 | |
| 167 | fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate { |
| 168 | GeneralPurposeEstimate::new(input_len) |
| 169 | } |
| 170 | |
| 171 | fn internal_decode( |
| 172 | &self, |
| 173 | input: &[u8], |
| 174 | output: &mut [u8], |
| 175 | estimate: Self::DecodeEstimate, |
| 176 | ) -> Result<DecodeMetadata, DecodeError> { |
| 177 | decode::decode_helper( |
| 178 | input, |
| 179 | estimate, |
| 180 | output, |
| 181 | &self.decode_table, |
| 182 | self.config.decode_allow_trailing_bits, |
| 183 | self.config.decode_padding_mode, |
| 184 | ) |
| 185 | } |
| 186 | |
| 187 | fn config(&self) -> &Self::Config { |
| 188 | &self.config |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | /// Returns a table mapping a 6-bit index to the ASCII byte encoding of the index |
| 193 | pub(crate) const fn encode_table(alphabet: &Alphabet) -> [u8; 64] { |
| 194 | // the encode table is just the alphabet: |
| 195 | // 6-bit index lookup -> printable byte |
| 196 | let mut encode_table: [u8; 64] = [0_u8; 64]; |
| 197 | { |
| 198 | let mut index: usize = 0; |
| 199 | while index < 64 { |
| 200 | encode_table[index] = alphabet.symbols[index]; |
| 201 | index += 1; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | encode_table |
| 206 | } |
| 207 | |
| 208 | /// Returns a table mapping base64 bytes as the lookup index to either: |
| 209 | /// - [INVALID_VALUE] for bytes that aren't members of the alphabet |
| 210 | /// - a byte whose lower 6 bits are the value that was encoded into the index byte |
| 211 | pub(crate) const fn decode_table(alphabet: &Alphabet) -> [u8; 256] { |
| 212 | let mut decode_table: [u8; 256] = [INVALID_VALUE; 256]; |
| 213 | |
| 214 | // Since the table is full of `INVALID_VALUE` already, we only need to overwrite |
| 215 | // the parts that are valid. |
| 216 | let mut index: usize = 0; |
| 217 | while index < 64 { |
| 218 | // The index in the alphabet is the 6-bit value we care about. |
| 219 | // Since the index is in 0-63, it is safe to cast to u8. |
| 220 | decode_table[alphabet.symbols[index] as usize] = index as u8; |
| 221 | index += 1; |
| 222 | } |
| 223 | |
| 224 | decode_table |
| 225 | } |
| 226 | |
| 227 | #[inline ] |
| 228 | fn read_u64(s: &[u8]) -> u64 { |
| 229 | u64::from_be_bytes(s[..8].try_into().unwrap()) |
| 230 | } |
| 231 | |
| 232 | /// Contains configuration parameters for base64 encoding and decoding. |
| 233 | /// |
| 234 | /// ``` |
| 235 | /// # use base64::engine::GeneralPurposeConfig; |
| 236 | /// let config = GeneralPurposeConfig::new() |
| 237 | /// .with_encode_padding(false); |
| 238 | /// // further customize using `.with_*` methods as needed |
| 239 | /// ``` |
| 240 | /// |
| 241 | /// The constants [PAD] and [NO_PAD] cover most use cases. |
| 242 | /// |
| 243 | /// To specify the characters used, see [Alphabet]. |
| 244 | #[derive (Clone, Copy, Debug)] |
| 245 | pub struct GeneralPurposeConfig { |
| 246 | encode_padding: bool, |
| 247 | decode_allow_trailing_bits: bool, |
| 248 | decode_padding_mode: DecodePaddingMode, |
| 249 | } |
| 250 | |
| 251 | impl GeneralPurposeConfig { |
| 252 | /// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and |
| 253 | /// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`. |
| 254 | /// |
| 255 | /// This probably matches most people's expectations, but consider disabling padding to save |
| 256 | /// a few bytes unless you specifically need it for compatibility with some legacy system. |
| 257 | pub const fn new() -> Self { |
| 258 | Self { |
| 259 | // RFC states that padding must be applied by default |
| 260 | encode_padding: true, |
| 261 | decode_allow_trailing_bits: false, |
| 262 | decode_padding_mode: DecodePaddingMode::RequireCanonical, |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | /// Create a new config based on `self` with an updated `padding` setting. |
| 267 | /// |
| 268 | /// If `padding` is `true`, encoding will append either 1 or 2 `=` padding characters as needed |
| 269 | /// to produce an output whose length is a multiple of 4. |
| 270 | /// |
| 271 | /// Padding is not needed for correct decoding and only serves to waste bytes, but it's in the |
| 272 | /// [spec](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2). |
| 273 | /// |
| 274 | /// For new applications, consider not using padding if the decoders you're using don't require |
| 275 | /// padding to be present. |
| 276 | pub const fn with_encode_padding(self, padding: bool) -> Self { |
| 277 | Self { |
| 278 | encode_padding: padding, |
| 279 | ..self |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | /// Create a new config based on `self` with an updated `decode_allow_trailing_bits` setting. |
| 284 | /// |
| 285 | /// Most users will not need to configure this. It's useful if you need to decode base64 |
| 286 | /// produced by a buggy encoder that has bits set in the unused space on the last base64 |
| 287 | /// character as per [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode). |
| 288 | /// If invalid trailing bits are present and this is `true`, those bits will |
| 289 | /// be silently ignored, else `DecodeError::InvalidLastSymbol` will be emitted. |
| 290 | pub const fn with_decode_allow_trailing_bits(self, allow: bool) -> Self { |
| 291 | Self { |
| 292 | decode_allow_trailing_bits: allow, |
| 293 | ..self |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | /// Create a new config based on `self` with an updated `decode_padding_mode` setting. |
| 298 | /// |
| 299 | /// Padding is not useful in terms of representing encoded data -- it makes no difference to |
| 300 | /// the decoder if padding is present or not, so if you have some un-padded input to decode, it |
| 301 | /// is perfectly fine to use `DecodePaddingMode::Indifferent` to prevent errors from being |
| 302 | /// emitted. |
| 303 | /// |
| 304 | /// However, since in practice |
| 305 | /// [people who learned nothing from BER vs DER seem to expect base64 to have one canonical encoding](https://eprint.iacr.org/2022/361), |
| 306 | /// the default setting is the stricter `DecodePaddingMode::RequireCanonicalPadding`. |
| 307 | /// |
| 308 | /// Or, if "canonical" in your circumstance means _no_ padding rather than padding to the |
| 309 | /// next multiple of four, there's `DecodePaddingMode::RequireNoPadding`. |
| 310 | pub const fn with_decode_padding_mode(self, mode: DecodePaddingMode) -> Self { |
| 311 | Self { |
| 312 | decode_padding_mode: mode, |
| 313 | ..self |
| 314 | } |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | impl Default for GeneralPurposeConfig { |
| 319 | /// Delegates to [GeneralPurposeConfig::new]. |
| 320 | fn default() -> Self { |
| 321 | Self::new() |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | impl Config for GeneralPurposeConfig { |
| 326 | fn encode_padding(&self) -> bool { |
| 327 | self.encode_padding |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | /// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [PAD] config. |
| 332 | pub const STANDARD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, PAD); |
| 333 | |
| 334 | /// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [NO_PAD] config. |
| 335 | pub const STANDARD_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); |
| 336 | |
| 337 | /// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [PAD] config. |
| 338 | pub const URL_SAFE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, PAD); |
| 339 | |
| 340 | /// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [NO_PAD] config. |
| 341 | pub const URL_SAFE_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD); |
| 342 | |
| 343 | /// Include padding bytes when encoding, and require that they be present when decoding. |
| 344 | /// |
| 345 | /// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves |
| 346 | /// little purpose in practice. |
| 347 | pub const PAD: GeneralPurposeConfig = GeneralPurposeConfig::new(); |
| 348 | |
| 349 | /// Don't add padding when encoding, and require no padding when decoding. |
| 350 | pub const NO_PAD: GeneralPurposeConfig = GeneralPurposeConfigGeneralPurposeConfig::new() |
| 351 | .with_encode_padding(false) |
| 352 | .with_decode_padding_mode(DecodePaddingMode::RequireNone); |
| 353 | |