1//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3use crate::PAD_BYTE;
4use core::{convert, fmt};
5#[cfg(any(feature = "std", test))]
6use std::error;
7
8const ALPHABET_SIZE: usize = 64;
9
10/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11///
12/// Common alphabets are provided as constants, and custom alphabets
13/// can be made via `from_str` or the `TryFrom<str>` implementation.
14///
15/// # Examples
16///
17/// Building and using a custom Alphabet:
18///
19/// ```
20/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21///
22/// let engine = base64::engine::GeneralPurpose::new(
23/// &custom,
24/// base64::engine::general_purpose::PAD);
25/// ```
26///
27/// Building a const:
28///
29/// ```
30/// use base64::alphabet::Alphabet;
31///
32/// static CUSTOM: Alphabet = {
33/// // Result::unwrap() isn't const yet, but panic!() is OK
34/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35/// Ok(x) => x,
36/// Err(_) => panic!("creation of alphabet failed"),
37/// }
38/// };
39/// ```
40///
41/// Building lazily:
42///
43/// ```
44/// use base64::{
45/// alphabet::Alphabet,
46/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47/// };
48/// use once_cell::sync::Lazy;
49///
50/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
51/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52/// );
53/// ```
54#[derive(Clone, Debug, Eq, PartialEq)]
55pub struct Alphabet {
56 pub(crate) symbols: [u8; ALPHABET_SIZE],
57}
58
59impl Alphabet {
60 /// Performs no checks so that it can be const.
61 /// Used only for known-valid strings.
62 const fn from_str_unchecked(alphabet: &str) -> Self {
63 let mut symbols = [0_u8; ALPHABET_SIZE];
64 let source_bytes = alphabet.as_bytes();
65
66 // a way to copy that's allowed in const fn
67 let mut index = 0;
68 while index < ALPHABET_SIZE {
69 symbols[index] = source_bytes[index];
70 index += 1;
71 }
72
73 Self { symbols }
74 }
75
76 /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77 ///
78 /// The `=` byte is not allowed as it is used for padding.
79 pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80 let bytes = alphabet.as_bytes();
81 if bytes.len() != ALPHABET_SIZE {
82 return Err(ParseAlphabetError::InvalidLength);
83 }
84
85 {
86 let mut index = 0;
87 while index < ALPHABET_SIZE {
88 let byte = bytes[index];
89
90 // must be ascii printable. 127 (DEL) is commonly considered printable
91 // for some reason but clearly unsuitable for base64.
92 if !(byte >= 32_u8 && byte <= 126_u8) {
93 return Err(ParseAlphabetError::UnprintableByte(byte));
94 }
95 // = is assumed to be padding, so cannot be used as a symbol
96 if byte == PAD_BYTE {
97 return Err(ParseAlphabetError::ReservedByte(byte));
98 }
99
100 // Check for duplicates while staying within what const allows.
101 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102 // microsecond range.
103
104 let mut probe_index = 0;
105 while probe_index < ALPHABET_SIZE {
106 if probe_index == index {
107 probe_index += 1;
108 continue;
109 }
110
111 let probe_byte = bytes[probe_index];
112
113 if byte == probe_byte {
114 return Err(ParseAlphabetError::DuplicatedByte(byte));
115 }
116
117 probe_index += 1;
118 }
119
120 index += 1;
121 }
122 }
123
124 Ok(Self::from_str_unchecked(alphabet))
125 }
126
127 /// Create a `&str` from the symbols in the `Alphabet`
128 pub fn as_str(&self) -> &str {
129 core::str::from_utf8(&self.symbols).unwrap()
130 }
131}
132
133impl convert::TryFrom<&str> for Alphabet {
134 type Error = ParseAlphabetError;
135
136 fn try_from(value: &str) -> Result<Self, Self::Error> {
137 Self::new(alphabet:value)
138 }
139}
140
141/// Possible errors when constructing an [Alphabet] from a `str`.
142#[derive(Debug, Eq, PartialEq)]
143pub enum ParseAlphabetError {
144 /// Alphabets must be 64 ASCII bytes
145 InvalidLength,
146 /// All bytes must be unique
147 DuplicatedByte(u8),
148 /// All bytes must be printable (in the range `[32, 126]`).
149 UnprintableByte(u8),
150 /// `=` cannot be used
151 ReservedByte(u8),
152}
153
154impl fmt::Display for ParseAlphabetError {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 match self {
157 Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158 Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}", b),
159 Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}", b),
160 Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}", b),
161 }
162 }
163}
164
165#[cfg(any(feature = "std", test))]
166impl error::Error for ParseAlphabetError {}
167
168/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169///
170/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173);
174
175/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176///
177/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180);
181
182/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183///
184/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186 alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187);
188
189/// The bcrypt alphabet.
190pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191 alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192);
193
194/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195///
196/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199);
200
201/// The alphabet used in BinHex 4.0 files.
202///
203/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205 alphabet:"!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
206);
207
208#[cfg(test)]
209mod tests {
210 use crate::alphabet::*;
211 use core::convert::TryFrom as _;
212
213 #[test]
214 fn detects_duplicate_start() {
215 assert_eq!(
216 ParseAlphabetError::DuplicatedByte(b'A'),
217 Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218 .unwrap_err()
219 );
220 }
221
222 #[test]
223 fn detects_duplicate_end() {
224 assert_eq!(
225 ParseAlphabetError::DuplicatedByte(b'/'),
226 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227 .unwrap_err()
228 );
229 }
230
231 #[test]
232 fn detects_duplicate_middle() {
233 assert_eq!(
234 ParseAlphabetError::DuplicatedByte(b'Z'),
235 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236 .unwrap_err()
237 );
238 }
239
240 #[test]
241 fn detects_length() {
242 assert_eq!(
243 ParseAlphabetError::InvalidLength,
244 Alphabet::new(
245 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246 )
247 .unwrap_err()
248 );
249 }
250
251 #[test]
252 fn detects_padding() {
253 assert_eq!(
254 ParseAlphabetError::ReservedByte(b'='),
255 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256 .unwrap_err()
257 );
258 }
259
260 #[test]
261 fn detects_unprintable() {
262 // form feed
263 assert_eq!(
264 ParseAlphabetError::UnprintableByte(0xc),
265 Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266 .unwrap_err()
267 );
268 }
269
270 #[test]
271 fn same_as_unchecked() {
272 assert_eq!(
273 STANDARD,
274 Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275 .unwrap()
276 );
277 }
278
279 #[test]
280 fn str_same_as_input() {
281 let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282 let a = Alphabet::try_from(alphabet).unwrap();
283 assert_eq!(alphabet, a.as_str())
284 }
285}
286