1 | //! Provides [Alphabet] and constants for alphabets commonly used in the wild. |
2 | |
3 | use crate::PAD_BYTE; |
4 | use core::fmt; |
5 | #[cfg (any(feature = "std" , test))] |
6 | use std::error; |
7 | |
8 | const ALPHABET_SIZE: usize = 64; |
9 | |
10 | /// An alphabet defines the 64 ASCII characters (symbols) used for base64. |
11 | /// |
12 | /// Common alphabets are provided as constants, and custom alphabets |
13 | /// can be made via `from_str` or the `TryFrom<str>` implementation. |
14 | /// |
15 | /// ``` |
16 | /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ).unwrap(); |
17 | /// |
18 | /// let engine = base64::engine::GeneralPurpose::new( |
19 | /// &custom, |
20 | /// base64::engine::general_purpose::PAD); |
21 | /// ``` |
22 | #[derive (Clone, Debug, Eq, PartialEq)] |
23 | pub struct Alphabet { |
24 | pub(crate) symbols: [u8; ALPHABET_SIZE], |
25 | } |
26 | |
27 | impl Alphabet { |
28 | /// Performs no checks so that it can be const. |
29 | /// Used only for known-valid strings. |
30 | const fn from_str_unchecked(alphabet: &str) -> Self { |
31 | let mut symbols = [0_u8; ALPHABET_SIZE]; |
32 | let source_bytes = alphabet.as_bytes(); |
33 | |
34 | // a way to copy that's allowed in const fn |
35 | let mut index = 0; |
36 | while index < ALPHABET_SIZE { |
37 | symbols[index] = source_bytes[index]; |
38 | index += 1; |
39 | } |
40 | |
41 | Self { symbols } |
42 | } |
43 | |
44 | /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. |
45 | /// |
46 | /// The `=` byte is not allowed as it is used for padding. |
47 | pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { |
48 | let bytes = alphabet.as_bytes(); |
49 | if bytes.len() != ALPHABET_SIZE { |
50 | return Err(ParseAlphabetError::InvalidLength); |
51 | } |
52 | |
53 | { |
54 | let mut index = 0; |
55 | while index < ALPHABET_SIZE { |
56 | let byte = bytes[index]; |
57 | |
58 | // must be ascii printable. 127 (DEL) is commonly considered printable |
59 | // for some reason but clearly unsuitable for base64. |
60 | if !(byte >= 32_u8 && byte <= 126_u8) { |
61 | return Err(ParseAlphabetError::UnprintableByte(byte)); |
62 | } |
63 | // = is assumed to be padding, so cannot be used as a symbol |
64 | if byte == PAD_BYTE { |
65 | return Err(ParseAlphabetError::ReservedByte(byte)); |
66 | } |
67 | |
68 | // Check for duplicates while staying within what const allows. |
69 | // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit |
70 | // microsecond range. |
71 | |
72 | let mut probe_index = 0; |
73 | while probe_index < ALPHABET_SIZE { |
74 | if probe_index == index { |
75 | probe_index += 1; |
76 | continue; |
77 | } |
78 | |
79 | let probe_byte = bytes[probe_index]; |
80 | |
81 | if byte == probe_byte { |
82 | return Err(ParseAlphabetError::DuplicatedByte(byte)); |
83 | } |
84 | |
85 | probe_index += 1; |
86 | } |
87 | |
88 | index += 1; |
89 | } |
90 | } |
91 | |
92 | Ok(Self::from_str_unchecked(alphabet)) |
93 | } |
94 | } |
95 | |
96 | impl TryFrom<&str> for Alphabet { |
97 | type Error = ParseAlphabetError; |
98 | |
99 | fn try_from(value: &str) -> Result<Self, Self::Error> { |
100 | Self::new(alphabet:value) |
101 | } |
102 | } |
103 | |
104 | /// Possible errors when constructing an [Alphabet] from a `str`. |
105 | #[derive (Debug, Eq, PartialEq)] |
106 | pub enum ParseAlphabetError { |
107 | /// Alphabets must be 64 ASCII bytes |
108 | InvalidLength, |
109 | /// All bytes must be unique |
110 | DuplicatedByte(u8), |
111 | /// All bytes must be printable (in the range `[32, 126]`). |
112 | UnprintableByte(u8), |
113 | /// `=` cannot be used |
114 | ReservedByte(u8), |
115 | } |
116 | |
117 | impl fmt::Display for ParseAlphabetError { |
118 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
119 | match self { |
120 | Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes" ), |
121 | Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}" , b), |
122 | Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}" , b), |
123 | Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}" , b), |
124 | } |
125 | } |
126 | } |
127 | |
128 | #[cfg (any(feature = "std" , test))] |
129 | impl error::Error for ParseAlphabetError {} |
130 | |
131 | /// The standard alphabet (uses `+` and `/`). |
132 | /// |
133 | /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). |
134 | pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( |
135 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" , |
136 | ); |
137 | |
138 | /// The URL safe alphabet (uses `-` and `_`). |
139 | /// |
140 | /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). |
141 | pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( |
142 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" , |
143 | ); |
144 | |
145 | /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). |
146 | /// |
147 | /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. |
148 | pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( |
149 | alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" , |
150 | ); |
151 | |
152 | /// The bcrypt alphabet. |
153 | pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( |
154 | alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" , |
155 | ); |
156 | |
157 | /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). |
158 | /// |
159 | /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) |
160 | pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( |
161 | alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+," , |
162 | ); |
163 | |
164 | /// The alphabet used in BinHex 4.0 files. |
165 | /// |
166 | /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) |
167 | pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( |
168 | alphabet:"! \"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr" , |
169 | ); |
170 | |
171 | #[cfg (test)] |
172 | mod tests { |
173 | use crate::alphabet::*; |
174 | use std::convert::TryFrom as _; |
175 | |
176 | #[test ] |
177 | fn detects_duplicate_start() { |
178 | assert_eq!( |
179 | ParseAlphabetError::DuplicatedByte(b'A' ), |
180 | Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
181 | .unwrap_err() |
182 | ); |
183 | } |
184 | |
185 | #[test ] |
186 | fn detects_duplicate_end() { |
187 | assert_eq!( |
188 | ParseAlphabetError::DuplicatedByte(b'/' ), |
189 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//" ) |
190 | .unwrap_err() |
191 | ); |
192 | } |
193 | |
194 | #[test ] |
195 | fn detects_duplicate_middle() { |
196 | assert_eq!( |
197 | ParseAlphabetError::DuplicatedByte(b'Z' ), |
198 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" ) |
199 | .unwrap_err() |
200 | ); |
201 | } |
202 | |
203 | #[test ] |
204 | fn detects_length() { |
205 | assert_eq!( |
206 | ParseAlphabetError::InvalidLength, |
207 | Alphabet::new( |
208 | "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/" , |
209 | ) |
210 | .unwrap_err() |
211 | ); |
212 | } |
213 | |
214 | #[test ] |
215 | fn detects_padding() { |
216 | assert_eq!( |
217 | ParseAlphabetError::ReservedByte(b'=' ), |
218 | Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=" ) |
219 | .unwrap_err() |
220 | ); |
221 | } |
222 | |
223 | #[test ] |
224 | fn detects_unprintable() { |
225 | // form feed |
226 | assert_eq!( |
227 | ParseAlphabetError::UnprintableByte(0xc), |
228 | Alphabet::new(" \x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
229 | .unwrap_err() |
230 | ); |
231 | } |
232 | |
233 | #[test ] |
234 | fn same_as_unchecked() { |
235 | assert_eq!( |
236 | STANDARD, |
237 | Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ) |
238 | .unwrap() |
239 | ); |
240 | } |
241 | } |
242 | |