| 1 | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. |
| 2 | |
| 3 | use crate::PAD_BYTE; |
| 4 | use core::{convert, fmt}; |
| 5 | #[cfg (any(feature = "std" , test))] |
| 6 | use std::error; |
| 7 | |
| 8 | const ALPHABET_SIZE: usize = 64; |
| 9 | |
| 10 | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. |
| 11 | /// |
| 12 | /// Common alphabets are provided as constants, and custom alphabets |
| 13 | /// can be made via `from_str` or the `TryFrom<str>` implementation. |
| 14 | /// |
| 15 | /// # Examples |
| 16 | /// |
| 17 | /// Building and using a custom Alphabet: |
| 18 | /// |
| 19 | /// ``` |
| 20 | /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ).unwrap(); |
| 21 | /// |
| 22 | /// let engine = base64::engine::GeneralPurpose::new( |
| 23 | /// &custom, |
| 24 | /// base64::engine::general_purpose::PAD); |
| 25 | /// ``` |
| 26 | /// |
| 27 | /// Building a const: |
| 28 | /// |
| 29 | /// ``` |
| 30 | /// use base64::alphabet::Alphabet; |
| 31 | /// |
| 32 | /// static CUSTOM: Alphabet = { |
| 33 | /// // Result::unwrap() isn't const yet, but panic!() is OK |
| 34 | /// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) { |
| 35 | /// Ok(x) => x, |
| 36 | /// Err(_) => panic!("creation of alphabet failed" ), |
| 37 | /// } |
| 38 | /// }; |
| 39 | /// ``` |
| 40 | /// |
| 41 | /// Building lazily: |
| 42 | /// |
| 43 | /// ``` |
| 44 | /// use base64::{ |
| 45 | /// alphabet::Alphabet, |
| 46 | /// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, |
| 47 | /// }; |
| 48 | /// use once_cell::sync::Lazy; |
| 49 | /// |
| 50 | /// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| |
| 51 | /// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ).unwrap() |
| 52 | /// ); |
| 53 | /// ``` |
| 54 | #[derive (Clone, Debug, Eq, PartialEq)] |
| 55 | pub struct Alphabet { |
| 56 | pub(crate) symbols: [u8; ALPHABET_SIZE], |
| 57 | } |
| 58 | |
| 59 | impl Alphabet { |
| 60 | /// Performs no checks so that it can be const. |
| 61 | /// Used only for known-valid strings. |
| 62 | const fn from_str_unchecked(alphabet: &str) -> Self { |
| 63 | let mut symbols = [0_u8; ALPHABET_SIZE]; |
| 64 | let source_bytes = alphabet.as_bytes(); |
| 65 | |
| 66 | // a way to copy that's allowed in const fn |
| 67 | let mut index = 0; |
| 68 | while index < ALPHABET_SIZE { |
| 69 | symbols[index] = source_bytes[index]; |
| 70 | index += 1; |
| 71 | } |
| 72 | |
| 73 | Self { symbols } |
| 74 | } |
| 75 | |
| 76 | /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. |
| 77 | /// |
| 78 | /// The `=` byte is not allowed as it is used for padding. |
| 79 | pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { |
| 80 | let bytes = alphabet.as_bytes(); |
| 81 | if bytes.len() != ALPHABET_SIZE { |
| 82 | return Err(ParseAlphabetError::InvalidLength); |
| 83 | } |
| 84 | |
| 85 | { |
| 86 | let mut index = 0; |
| 87 | while index < ALPHABET_SIZE { |
| 88 | let byte = bytes[index]; |
| 89 | |
| 90 | // must be ascii printable. 127 (DEL) is commonly considered printable |
| 91 | // for some reason but clearly unsuitable for base64. |
| 92 | if !(byte >= 32_u8 && byte <= 126_u8) { |
| 93 | return Err(ParseAlphabetError::UnprintableByte(byte)); |
| 94 | } |
| 95 | // = is assumed to be padding, so cannot be used as a symbol |
| 96 | if byte == PAD_BYTE { |
| 97 | return Err(ParseAlphabetError::ReservedByte(byte)); |
| 98 | } |
| 99 | |
| 100 | // Check for duplicates while staying within what const allows. |
| 101 | // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit |
| 102 | // microsecond range. |
| 103 | |
| 104 | let mut probe_index = 0; |
| 105 | while probe_index < ALPHABET_SIZE { |
| 106 | if probe_index == index { |
| 107 | probe_index += 1; |
| 108 | continue; |
| 109 | } |
| 110 | |
| 111 | let probe_byte = bytes[probe_index]; |
| 112 | |
| 113 | if byte == probe_byte { |
| 114 | return Err(ParseAlphabetError::DuplicatedByte(byte)); |
| 115 | } |
| 116 | |
| 117 | probe_index += 1; |
| 118 | } |
| 119 | |
| 120 | index += 1; |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | Ok(Self::from_str_unchecked(alphabet)) |
| 125 | } |
| 126 | |
| 127 | /// Create a `&str` from the symbols in the `Alphabet` |
| 128 | pub fn as_str(&self) -> &str { |
| 129 | core::str::from_utf8(&self.symbols).unwrap() |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | impl convert::TryFrom<&str> for Alphabet { |
| 134 | type Error = ParseAlphabetError; |
| 135 | |
| 136 | fn try_from(value: &str) -> Result<Self, Self::Error> { |
| 137 | Self::new(alphabet:value) |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | /// Possible errors when constructing an [Alphabet] from a `str`. |
| 142 | #[derive (Debug, Eq, PartialEq)] |
| 143 | pub enum ParseAlphabetError { |
| 144 | /// Alphabets must be 64 ASCII bytes |
| 145 | InvalidLength, |
| 146 | /// All bytes must be unique |
| 147 | DuplicatedByte(u8), |
| 148 | /// All bytes must be printable (in the range `[32, 126]`). |
| 149 | UnprintableByte(u8), |
| 150 | /// `=` cannot be used |
| 151 | ReservedByte(u8), |
| 152 | } |
| 153 | |
| 154 | impl fmt::Display for ParseAlphabetError { |
| 155 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 156 | match self { |
| 157 | Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes" ), |
| 158 | Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}" , b), |
| 159 | Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}" , b), |
| 160 | Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}" , b), |
| 161 | } |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | #[cfg (any(feature = "std" , test))] |
| 166 | impl error::Error for ParseAlphabetError {} |
| 167 | |
| 168 | /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. |
| 169 | /// |
| 170 | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 |
| 171 | pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( |
| 172 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" , |
| 173 | ); |
| 174 | |
| 175 | /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. |
| 176 | /// |
| 177 | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 |
| 178 | pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( |
| 179 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" , |
| 180 | ); |
| 181 | |
| 182 | /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). |
| 183 | /// |
| 184 | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. |
| 185 | pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( |
| 186 | alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" , |
| 187 | ); |
| 188 | |
| 189 | /// The bcrypt alphabet. |
| 190 | pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( |
| 191 | alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" , |
| 192 | ); |
| 193 | |
| 194 | /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). |
| 195 | /// |
| 196 | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) |
| 197 | pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( |
| 198 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+," , |
| 199 | ); |
| 200 | |
| 201 | /// The alphabet used in BinHex 4.0 files. |
| 202 | /// |
| 203 | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) |
| 204 | pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( |
| 205 | alphabet:"! \"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr" , |
| 206 | ); |
| 207 | |
| 208 | #[cfg (test)] |
| 209 | mod tests { |
| 210 | use crate::alphabet::*; |
| 211 | use core::convert::TryFrom as _; |
| 212 | |
| 213 | #[test ] |
| 214 | fn detects_duplicate_start() { |
| 215 | assert_eq!( |
| 216 | ParseAlphabetError::DuplicatedByte(b'A' ), |
| 217 | Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
| 218 | .unwrap_err() |
| 219 | ); |
| 220 | } |
| 221 | |
| 222 | #[test ] |
| 223 | fn detects_duplicate_end() { |
| 224 | assert_eq!( |
| 225 | ParseAlphabetError::DuplicatedByte(b'/' ), |
| 226 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//" ) |
| 227 | .unwrap_err() |
| 228 | ); |
| 229 | } |
| 230 | |
| 231 | #[test ] |
| 232 | fn detects_duplicate_middle() { |
| 233 | assert_eq!( |
| 234 | ParseAlphabetError::DuplicatedByte(b'Z' ), |
| 235 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" ) |
| 236 | .unwrap_err() |
| 237 | ); |
| 238 | } |
| 239 | |
| 240 | #[test ] |
| 241 | fn detects_length() { |
| 242 | assert_eq!( |
| 243 | ParseAlphabetError::InvalidLength, |
| 244 | Alphabet::new( |
| 245 | "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" , |
| 246 | ) |
| 247 | .unwrap_err() |
| 248 | ); |
| 249 | } |
| 250 | |
| 251 | #[test ] |
| 252 | fn detects_padding() { |
| 253 | assert_eq!( |
| 254 | ParseAlphabetError::ReservedByte(b'=' ), |
| 255 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=" ) |
| 256 | .unwrap_err() |
| 257 | ); |
| 258 | } |
| 259 | |
| 260 | #[test ] |
| 261 | fn detects_unprintable() { |
| 262 | // form feed |
| 263 | assert_eq!( |
| 264 | ParseAlphabetError::UnprintableByte(0xc), |
| 265 | Alphabet::new(" \x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
| 266 | .unwrap_err() |
| 267 | ); |
| 268 | } |
| 269 | |
| 270 | #[test ] |
| 271 | fn same_as_unchecked() { |
| 272 | assert_eq!( |
| 273 | STANDARD, |
| 274 | Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
| 275 | .unwrap() |
| 276 | ); |
| 277 | } |
| 278 | |
| 279 | #[test ] |
| 280 | fn str_same_as_input() { |
| 281 | let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
| 282 | let a = Alphabet::try_from(alphabet).unwrap(); |
| 283 | assert_eq!(alphabet, a.as_str()) |
| 284 | } |
| 285 | } |
| 286 | |