1 | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. |
2 | |
3 | use crate::PAD_BYTE; |
4 | use core::{convert, fmt}; |
5 | #[cfg (any(feature = "std" , test))] |
6 | use std::error; |
7 | |
8 | const ALPHABET_SIZE: usize = 64; |
9 | |
10 | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. |
11 | /// |
12 | /// Common alphabets are provided as constants, and custom alphabets |
13 | /// can be made via `from_str` or the `TryFrom<str>` implementation. |
14 | /// |
15 | /// # Examples |
16 | /// |
17 | /// Building and using a custom Alphabet: |
18 | /// |
19 | /// ``` |
20 | /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ).unwrap(); |
21 | /// |
22 | /// let engine = base64::engine::GeneralPurpose::new( |
23 | /// &custom, |
24 | /// base64::engine::general_purpose::PAD); |
25 | /// ``` |
26 | /// |
27 | /// Building a const: |
28 | /// |
29 | /// ``` |
30 | /// use base64::alphabet::Alphabet; |
31 | /// |
32 | /// static CUSTOM: Alphabet = { |
33 | /// // Result::unwrap() isn't const yet, but panic!() is OK |
34 | /// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) { |
35 | /// Ok(x) => x, |
36 | /// Err(_) => panic!("creation of alphabet failed" ), |
37 | /// } |
38 | /// }; |
39 | /// ``` |
40 | /// |
41 | /// Building lazily: |
42 | /// |
43 | /// ``` |
44 | /// use base64::{ |
45 | /// alphabet::Alphabet, |
46 | /// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, |
47 | /// }; |
48 | /// use once_cell::sync::Lazy; |
49 | /// |
50 | /// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| |
51 | /// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ).unwrap() |
52 | /// ); |
53 | /// ``` |
54 | #[derive (Clone, Debug, Eq, PartialEq)] |
55 | pub struct Alphabet { |
56 | pub(crate) symbols: [u8; ALPHABET_SIZE], |
57 | } |
58 | |
59 | impl Alphabet { |
60 | /// Performs no checks so that it can be const. |
61 | /// Used only for known-valid strings. |
62 | const fn from_str_unchecked(alphabet: &str) -> Self { |
63 | let mut symbols = [0_u8; ALPHABET_SIZE]; |
64 | let source_bytes = alphabet.as_bytes(); |
65 | |
66 | // a way to copy that's allowed in const fn |
67 | let mut index = 0; |
68 | while index < ALPHABET_SIZE { |
69 | symbols[index] = source_bytes[index]; |
70 | index += 1; |
71 | } |
72 | |
73 | Self { symbols } |
74 | } |
75 | |
76 | /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. |
77 | /// |
78 | /// The `=` byte is not allowed as it is used for padding. |
79 | pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { |
80 | let bytes = alphabet.as_bytes(); |
81 | if bytes.len() != ALPHABET_SIZE { |
82 | return Err(ParseAlphabetError::InvalidLength); |
83 | } |
84 | |
85 | { |
86 | let mut index = 0; |
87 | while index < ALPHABET_SIZE { |
88 | let byte = bytes[index]; |
89 | |
90 | // must be ascii printable. 127 (DEL) is commonly considered printable |
91 | // for some reason but clearly unsuitable for base64. |
92 | if !(byte >= 32_u8 && byte <= 126_u8) { |
93 | return Err(ParseAlphabetError::UnprintableByte(byte)); |
94 | } |
95 | // = is assumed to be padding, so cannot be used as a symbol |
96 | if byte == PAD_BYTE { |
97 | return Err(ParseAlphabetError::ReservedByte(byte)); |
98 | } |
99 | |
100 | // Check for duplicates while staying within what const allows. |
101 | // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit |
102 | // microsecond range. |
103 | |
104 | let mut probe_index = 0; |
105 | while probe_index < ALPHABET_SIZE { |
106 | if probe_index == index { |
107 | probe_index += 1; |
108 | continue; |
109 | } |
110 | |
111 | let probe_byte = bytes[probe_index]; |
112 | |
113 | if byte == probe_byte { |
114 | return Err(ParseAlphabetError::DuplicatedByte(byte)); |
115 | } |
116 | |
117 | probe_index += 1; |
118 | } |
119 | |
120 | index += 1; |
121 | } |
122 | } |
123 | |
124 | Ok(Self::from_str_unchecked(alphabet)) |
125 | } |
126 | |
127 | /// Create a `&str` from the symbols in the `Alphabet` |
128 | pub fn as_str(&self) -> &str { |
129 | core::str::from_utf8(&self.symbols).unwrap() |
130 | } |
131 | } |
132 | |
133 | impl convert::TryFrom<&str> for Alphabet { |
134 | type Error = ParseAlphabetError; |
135 | |
136 | fn try_from(value: &str) -> Result<Self, Self::Error> { |
137 | Self::new(alphabet:value) |
138 | } |
139 | } |
140 | |
141 | /// Possible errors when constructing an [Alphabet] from a `str`. |
142 | #[derive (Debug, Eq, PartialEq)] |
143 | pub enum ParseAlphabetError { |
144 | /// Alphabets must be 64 ASCII bytes |
145 | InvalidLength, |
146 | /// All bytes must be unique |
147 | DuplicatedByte(u8), |
148 | /// All bytes must be printable (in the range `[32, 126]`). |
149 | UnprintableByte(u8), |
150 | /// `=` cannot be used |
151 | ReservedByte(u8), |
152 | } |
153 | |
154 | impl fmt::Display for ParseAlphabetError { |
155 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
156 | match self { |
157 | Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes" ), |
158 | Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}" , b), |
159 | Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}" , b), |
160 | Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}" , b), |
161 | } |
162 | } |
163 | } |
164 | |
165 | #[cfg (any(feature = "std" , test))] |
166 | impl error::Error for ParseAlphabetError {} |
167 | |
168 | /// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. |
169 | /// |
170 | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 |
171 | pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( |
172 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" , |
173 | ); |
174 | |
175 | /// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. |
176 | /// |
177 | /// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 |
178 | pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( |
179 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" , |
180 | ); |
181 | |
182 | /// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). |
183 | /// |
184 | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. |
185 | pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( |
186 | alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" , |
187 | ); |
188 | |
189 | /// The bcrypt alphabet. |
190 | pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( |
191 | alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" , |
192 | ); |
193 | |
194 | /// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). |
195 | /// |
196 | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) |
197 | pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( |
198 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+," , |
199 | ); |
200 | |
201 | /// The alphabet used in BinHex 4.0 files. |
202 | /// |
203 | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) |
204 | pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( |
205 | alphabet:"! \"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr" , |
206 | ); |
207 | |
208 | #[cfg (test)] |
209 | mod tests { |
210 | use crate::alphabet::*; |
211 | use core::convert::TryFrom as _; |
212 | |
213 | #[test ] |
214 | fn detects_duplicate_start() { |
215 | assert_eq!( |
216 | ParseAlphabetError::DuplicatedByte(b'A' ), |
217 | Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
218 | .unwrap_err() |
219 | ); |
220 | } |
221 | |
222 | #[test ] |
223 | fn detects_duplicate_end() { |
224 | assert_eq!( |
225 | ParseAlphabetError::DuplicatedByte(b'/' ), |
226 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//" ) |
227 | .unwrap_err() |
228 | ); |
229 | } |
230 | |
231 | #[test ] |
232 | fn detects_duplicate_middle() { |
233 | assert_eq!( |
234 | ParseAlphabetError::DuplicatedByte(b'Z' ), |
235 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" ) |
236 | .unwrap_err() |
237 | ); |
238 | } |
239 | |
240 | #[test ] |
241 | fn detects_length() { |
242 | assert_eq!( |
243 | ParseAlphabetError::InvalidLength, |
244 | Alphabet::new( |
245 | "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" , |
246 | ) |
247 | .unwrap_err() |
248 | ); |
249 | } |
250 | |
251 | #[test ] |
252 | fn detects_padding() { |
253 | assert_eq!( |
254 | ParseAlphabetError::ReservedByte(b'=' ), |
255 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=" ) |
256 | .unwrap_err() |
257 | ); |
258 | } |
259 | |
260 | #[test ] |
261 | fn detects_unprintable() { |
262 | // form feed |
263 | assert_eq!( |
264 | ParseAlphabetError::UnprintableByte(0xc), |
265 | Alphabet::new(" \x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
266 | .unwrap_err() |
267 | ); |
268 | } |
269 | |
270 | #[test ] |
271 | fn same_as_unchecked() { |
272 | assert_eq!( |
273 | STANDARD, |
274 | Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
275 | .unwrap() |
276 | ); |
277 | } |
278 | |
279 | #[test ] |
280 | fn str_same_as_input() { |
281 | let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
282 | let a = Alphabet::try_from(alphabet).unwrap(); |
283 | assert_eq!(alphabet, a.as_str()) |
284 | } |
285 | } |
286 | |