alphabet.rs source code [crates/base64/src/alphabet.rs]

1	//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3	use crate::PAD_BYTE;
4	use core::{convert, fmt};
5	#[cfg(any(feature = "std", test))]
6	use std::error;
7
8	const ALPHABET_SIZE: usize = `64`;
9
10	/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11	///
12	/// Common alphabets are provided as constants, and custom alphabets
13	/// can be made via `from_str` or the `TryFrom<str>` implementation.
14	///
15	/// # Examples
16	///
17	/// Building and using a custom Alphabet:
18	///
19	/// ```
20	/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21	///
22	/// let engine = base64::engine::GeneralPurpose::new(
23	/// &custom,
24	/// base64::engine::general_purpose::PAD);
25	/// ```
26	///
27	/// Building a const:
28	///
29	/// ```
30	/// use base64::alphabet::Alphabet;
31	///
32	/// static CUSTOM: Alphabet = {
33	/// // Result::unwrap() isn't const yet, but panic!() is OK
34	/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35	/// Ok(x) => x,
36	/// Err(_) => panic!("creation of alphabet failed"),
37	/// }
38	/// };
39	/// ```
40	///
41	/// Building lazily:
42	///
43	/// ```
44	/// use base64::{
45	/// alphabet::Alphabet,
46	/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47	/// };
48	/// use once_cell::sync::Lazy;
49	///
50	/// static CUSTOM: Lazy<Alphabet> = Lazy::new(\|\|
51	/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52	/// );
53	/// ```
54	#[derive(Clone, Debug, Eq, PartialEq)]
55	pub struct Alphabet {
56	pub(crate) symbols: [u8; ALPHABET_SIZE],
57	}
58
59	impl Alphabet {
60	/// Performs no checks so that it can be const.
61	/// Used only for known-valid strings.
62	const fn from_str_unchecked(alphabet: &str) -> Self {
63	let mut symbols = [`0_u8`; ALPHABET_SIZE];
64	let source_bytes = alphabet.as_bytes();
65
66	// a way to copy that's allowed in const fn
67	let mut index = `0`;
68	while index < ALPHABET_SIZE {
69	symbols[index] = source_bytes[index];
70	index += `1`;
71	}
72
73	Self { symbols }
74	}
75
76	/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77	///
78	/// The `=` byte is not allowed as it is used for padding.
79	pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80	let bytes = alphabet.as_bytes();
81	if bytes.len() != ALPHABET_SIZE {
82	return Err(ParseAlphabetError::InvalidLength);
83	}
84
85	{
86	let mut index = `0`;
87	while index < ALPHABET_SIZE {
88	let byte = bytes[index];
89
90	// must be ascii printable. 127 (DEL) is commonly considered printable
91	// for some reason but clearly unsuitable for base64.
92	if !(byte >= `32_u8` && byte <= `126_u8`) {
93	return Err(ParseAlphabetError::UnprintableByte(byte));
94	}
95	// = is assumed to be padding, so cannot be used as a symbol
96	if byte == PAD_BYTE {
97	return Err(ParseAlphabetError::ReservedByte(byte));
98	}
99
100	// Check for duplicates while staying within what const allows.
101	// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102	// microsecond range.
103
104	let mut probe_index = `0`;
105	while probe_index < ALPHABET_SIZE {
106	if probe_index == index {
107	probe_index += `1`;
108	continue;
109	}
110
111	let probe_byte = bytes[probe_index];
112
113	if byte == probe_byte {
114	return Err(ParseAlphabetError::DuplicatedByte(byte));
115	}
116
117	probe_index += `1`;
118	}
119
120	index += `1`;
121	}
122	}
123
124	Ok(Self::from_str_unchecked(alphabet))
125	}
126
127	/// Create a `&str` from the symbols in the `Alphabet`
128	pub fn as_str(&self) -> &str {
129	core::str::from_utf8(&self.symbols).unwrap()
130	}
131	}
132
133	impl convert::TryFrom<&str> for Alphabet {
134	type Error = ParseAlphabetError;
135
136	fn try_from(value: &str) -> Result<Self, Self::Error> {
137	Self::new(alphabet:value)
138	}
139	}
140
141	/// Possible errors when constructing an [Alphabet] from a `str`.
142	#[derive(Debug, Eq, PartialEq)]
143	pub enum ParseAlphabetError {
144	/// Alphabets must be 64 ASCII bytes
145	InvalidLength,
146	/// All bytes must be unique
147	DuplicatedByte(u8),
148	/// All bytes must be printable (in the range `[32, 126]`).
149	UnprintableByte(u8),
150	/// `=` cannot be used
151	ReservedByte(u8),
152	}
153
154	impl fmt::Display for ParseAlphabetError {
155	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156	match self {
157	Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158	Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#04x}", b),
159	Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#04x}", b),
160	Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#04x}", b),
161	}
162	}
163	}
164
165	#[cfg(any(feature = "std", test))]
166	impl error::Error for ParseAlphabetError {}
167
168	/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169	///
170	/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171	pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173	);
174
175	/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176	///
177	/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178	pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180	);
181
182	/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183	///
184	/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185	pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186	alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187	);
188
189	/// The bcrypt alphabet.
190	pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191	alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192	);
193
194	/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195	///
196	/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197	pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199	);
200
201	/// The alphabet used in BinHex 4.0 files.
202	///
203	/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204	pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205	alphabet:"!`\"`#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
206	);
207
208	#[cfg(test)]
209	mod tests {
210	use crate::alphabet::*;
211	use core::convert::TryFrom as _;
212
213	#[test]
214	fn detects_duplicate_start() {
215	assert_eq!(
216	ParseAlphabetError::DuplicatedByte(b'A'),
217	Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218	.unwrap_err()
219	);
220	}
221
222	#[test]
223	fn detects_duplicate_end() {
224	assert_eq!(
225	ParseAlphabetError::DuplicatedByte(b'/'),
226	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227	.unwrap_err()
228	);
229	}
230
231	#[test]
232	fn detects_duplicate_middle() {
233	assert_eq!(
234	ParseAlphabetError::DuplicatedByte(b'Z'),
235	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236	.unwrap_err()
237	);
238	}
239
240	#[test]
241	fn detects_length() {
242	assert_eq!(
243	ParseAlphabetError::InvalidLength,
244	Alphabet::new(
245	"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246	)
247	.unwrap_err()
248	);
249	}
250
251	#[test]
252	fn detects_padding() {
253	assert_eq!(
254	ParseAlphabetError::ReservedByte(b'='),
255	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256	.unwrap_err()
257	);
258	}
259
260	#[test]
261	fn detects_unprintable() {
262	// form feed
263	assert_eq!(
264	ParseAlphabetError::UnprintableByte(`0xc`),
265	Alphabet::new("`\x0c`BCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266	.unwrap_err()
267	);
268	}
269
270	#[test]
271	fn same_as_unchecked() {
272	assert_eq!(
273	STANDARD,
274	Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275	.unwrap()
276	);
277	}
278
279	#[test]
280	fn str_same_as_input() {
281	let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282	let a = Alphabet::try_from(alphabet).unwrap();
283	assert_eq!(alphabet, a.as_str())
284	}
285	}
286