1//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3use crate::PAD_BYTE;
4use core::fmt;
5#[cfg(any(feature = "std", test))]
6use std::error;
7
8const ALPHABET_SIZE: usize = 64;
9
10/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11///
12/// Common alphabets are provided as constants, and custom alphabets
13/// can be made via `from_str` or the `TryFrom<str>` implementation.
14///
15/// ```
16/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
17///
18/// let engine = base64::engine::GeneralPurpose::new(
19/// &custom,
20/// base64::engine::general_purpose::PAD);
21/// ```
22#[derive(Clone, Debug, Eq, PartialEq)]
23pub struct Alphabet {
24 pub(crate) symbols: [u8; ALPHABET_SIZE],
25}
26
27impl Alphabet {
28 /// Performs no checks so that it can be const.
29 /// Used only for known-valid strings.
30 const fn from_str_unchecked(alphabet: &str) -> Self {
31 let mut symbols = [0_u8; ALPHABET_SIZE];
32 let source_bytes = alphabet.as_bytes();
33
34 // a way to copy that's allowed in const fn
35 let mut index = 0;
36 while index < ALPHABET_SIZE {
37 symbols[index] = source_bytes[index];
38 index += 1;
39 }
40
41 Self { symbols }
42 }
43
44 /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
45 ///
46 /// The `=` byte is not allowed as it is used for padding.
47 pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
48 let bytes = alphabet.as_bytes();
49 if bytes.len() != ALPHABET_SIZE {
50 return Err(ParseAlphabetError::InvalidLength);
51 }
52
53 {
54 let mut index = 0;
55 while index < ALPHABET_SIZE {
56 let byte = bytes[index];
57
58 // must be ascii printable. 127 (DEL) is commonly considered printable
59 // for some reason but clearly unsuitable for base64.
60 if !(byte >= 32_u8 && byte <= 126_u8) {
61 return Err(ParseAlphabetError::UnprintableByte(byte));
62 }
63 // = is assumed to be padding, so cannot be used as a symbol
64 if byte == PAD_BYTE {
65 return Err(ParseAlphabetError::ReservedByte(byte));
66 }
67
68 // Check for duplicates while staying within what const allows.
69 // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
70 // microsecond range.
71
72 let mut probe_index = 0;
73 while probe_index < ALPHABET_SIZE {
74 if probe_index == index {
75 probe_index += 1;
76 continue;
77 }
78
79 let probe_byte = bytes[probe_index];
80
81 if byte == probe_byte {
82 return Err(ParseAlphabetError::DuplicatedByte(byte));
83 }
84
85 probe_index += 1;
86 }
87
88 index += 1;
89 }
90 }
91
92 Ok(Self::from_str_unchecked(alphabet))
93 }
94}
95
96impl TryFrom<&str> for Alphabet {
97 type Error = ParseAlphabetError;
98
99 fn try_from(value: &str) -> Result<Self, Self::Error> {
100 Self::new(alphabet:value)
101 }
102}
103
104/// Possible errors when constructing an [Alphabet] from a `str`.
105#[derive(Debug, Eq, PartialEq)]
106pub enum ParseAlphabetError {
107 /// Alphabets must be 64 ASCII bytes
108 InvalidLength,
109 /// All bytes must be unique
110 DuplicatedByte(u8),
111 /// All bytes must be printable (in the range `[32, 126]`).
112 UnprintableByte(u8),
113 /// `=` cannot be used
114 ReservedByte(u8),
115}
116
117impl fmt::Display for ParseAlphabetError {
118 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119 match self {
120 Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
121 Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}", b),
122 Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}", b),
123 Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}", b),
124 }
125 }
126}
127
128#[cfg(any(feature = "std", test))]
129impl error::Error for ParseAlphabetError {}
130
131/// The standard alphabet (uses `+` and `/`).
132///
133/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
134pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
135 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
136);
137
138/// The URL safe alphabet (uses `-` and `_`).
139///
140/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
141pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
142 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
143);
144
145/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
146///
147/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
148pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
149 alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
150);
151
152/// The bcrypt alphabet.
153pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
154 alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
155);
156
157/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
158///
159/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
160pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
161 alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
162);
163
164/// The alphabet used in BinHex 4.0 files.
165///
166/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
167pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
168 alphabet:"!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
169);
170
171#[cfg(test)]
172mod tests {
173 use crate::alphabet::*;
174 use std::convert::TryFrom as _;
175
176 #[test]
177 fn detects_duplicate_start() {
178 assert_eq!(
179 ParseAlphabetError::DuplicatedByte(b'A'),
180 Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
181 .unwrap_err()
182 );
183 }
184
185 #[test]
186 fn detects_duplicate_end() {
187 assert_eq!(
188 ParseAlphabetError::DuplicatedByte(b'/'),
189 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
190 .unwrap_err()
191 );
192 }
193
194 #[test]
195 fn detects_duplicate_middle() {
196 assert_eq!(
197 ParseAlphabetError::DuplicatedByte(b'Z'),
198 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
199 .unwrap_err()
200 );
201 }
202
203 #[test]
204 fn detects_length() {
205 assert_eq!(
206 ParseAlphabetError::InvalidLength,
207 Alphabet::new(
208 "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
209 )
210 .unwrap_err()
211 );
212 }
213
214 #[test]
215 fn detects_padding() {
216 assert_eq!(
217 ParseAlphabetError::ReservedByte(b'='),
218 Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
219 .unwrap_err()
220 );
221 }
222
223 #[test]
224 fn detects_unprintable() {
225 // form feed
226 assert_eq!(
227 ParseAlphabetError::UnprintableByte(0xc),
228 Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
229 .unwrap_err()
230 );
231 }
232
233 #[test]
234 fn same_as_unchecked() {
235 assert_eq!(
236 STANDARD,
237 Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
238 .unwrap()
239 );
240 }
241}
242