alphabet.rs source code [crates/base64-0.21.2/src/alphabet.rs]

1	//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3	use crate::PAD_BYTE;
4	use core::fmt;
5	#[cfg(any(feature = "std", test))]
6	use std::error;
7
8	const ALPHABET_SIZE: usize = `64`;
9
10	/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11	///
12	/// Common alphabets are provided as constants, and custom alphabets
13	/// can be made via `from_str` or the `TryFrom<str>` implementation.
14	///
15	/// ```
16	/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
17	///
18	/// let engine = base64::engine::GeneralPurpose::new(
19	/// &custom,
20	/// base64::engine::general_purpose::PAD);
21	/// ```
22	#[derive(Clone, Debug, Eq, PartialEq)]
23	pub struct Alphabet {
24	pub(crate) symbols: [u8; ALPHABET_SIZE],
25	}
26
27	impl Alphabet {
28	/// Performs no checks so that it can be const.
29	/// Used only for known-valid strings.
30	const fn from_str_unchecked(alphabet: &str) -> Self {
31	let mut symbols = [`0_u8`; ALPHABET_SIZE];
32	let source_bytes = alphabet.as_bytes();
33
34	// a way to copy that's allowed in const fn
35	let mut index = `0`;
36	while index < ALPHABET_SIZE {
37	symbols[index] = source_bytes[index];
38	index += `1`;
39	}
40
41	Self { symbols }
42	}
43
44	/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
45	///
46	/// The `=` byte is not allowed as it is used for padding.
47	pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
48	let bytes = alphabet.as_bytes();
49	if bytes.len() != ALPHABET_SIZE {
50	return Err(ParseAlphabetError::InvalidLength);
51	}
52
53	{
54	let mut index = `0`;
55	while index < ALPHABET_SIZE {
56	let byte = bytes[index];
57
58	// must be ascii printable. 127 (DEL) is commonly considered printable
59	// for some reason but clearly unsuitable for base64.
60	if !(byte >= `32_u8` && byte <= `126_u8`) {
61	return Err(ParseAlphabetError::UnprintableByte(byte));
62	}
63	// = is assumed to be padding, so cannot be used as a symbol
64	if byte == PAD_BYTE {
65	return Err(ParseAlphabetError::ReservedByte(byte));
66	}
67
68	// Check for duplicates while staying within what const allows.
69	// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
70	// microsecond range.
71
72	let mut probe_index = `0`;
73	while probe_index < ALPHABET_SIZE {
74	if probe_index == index {
75	probe_index += `1`;
76	continue;
77	}
78
79	let probe_byte = bytes[probe_index];
80
81	if byte == probe_byte {
82	return Err(ParseAlphabetError::DuplicatedByte(byte));
83	}
84
85	probe_index += `1`;
86	}
87
88	index += `1`;
89	}
90	}
91
92	Ok(Self::from_str_unchecked(alphabet))
93	}
94	}
95
96	impl TryFrom<&str> for Alphabet {
97	type Error = ParseAlphabetError;
98
99	fn try_from(value: &str) -> Result<Self, Self::Error> {
100	Self::new(alphabet:value)
101	}
102	}
103
104	/// Possible errors when constructing an [Alphabet] from a `str`.
105	#[derive(Debug, Eq, PartialEq)]
106	pub enum ParseAlphabetError {
107	/// Alphabets must be 64 ASCII bytes
108	InvalidLength,
109	/// All bytes must be unique
110	DuplicatedByte(u8),
111	/// All bytes must be printable (in the range `[32, 126]`).
112	UnprintableByte(u8),
113	/// `=` cannot be used
114	ReservedByte(u8),
115	}
116
117	impl fmt::Display for ParseAlphabetError {
118	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119	match self {
120	Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
121	Self::DuplicatedByte(b: &u8) => write!(f, "Duplicated byte: {:#`04`x}", b),
122	Self::UnprintableByte(b: &u8) => write!(f, "Unprintable byte: {:#`04`x}", b),
123	Self::ReservedByte(b: &u8) => write!(f, "Reserved byte: {:#`04`x}", b),
124	}
125	}
126	}
127
128	#[cfg(any(feature = "std", test))]
129	impl error::Error for ParseAlphabetError {}
130
131	/// The standard alphabet (uses `+` and `/`).
132	///
133	/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
134	pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
135	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
136	);
137
138	/// The URL safe alphabet (uses `-` and `_`).
139	///
140	/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
141	pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
142	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
143	);
144
145	/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
146	///
147	/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
148	pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
149	alphabet:"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
150	);
151
152	/// The bcrypt alphabet.
153	pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
154	alphabet:"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
155	);
156
157	/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
158	///
159	/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
160	pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
161	alphabet:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
162	);
163
164	/// The alphabet used in BinHex 4.0 files.
165	///
166	/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
167	pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
168	alphabet:"!`\"`#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
169	);
170
171	#[cfg(test)]
172	mod tests {
173	use crate::alphabet::*;
174	use std::convert::TryFrom as _;
175
176	#[test]
177	fn detects_duplicate_start() {
178	assert_eq!(
179	ParseAlphabetError::DuplicatedByte(b'A'),
180	Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
181	.unwrap_err()
182	);
183	}
184
185	#[test]
186	fn detects_duplicate_end() {
187	assert_eq!(
188	ParseAlphabetError::DuplicatedByte(b'/'),
189	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
190	.unwrap_err()
191	);
192	}
193
194	#[test]
195	fn detects_duplicate_middle() {
196	assert_eq!(
197	ParseAlphabetError::DuplicatedByte(b'Z'),
198	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
199	.unwrap_err()
200	);
201	}
202
203	#[test]
204	fn detects_length() {
205	assert_eq!(
206	ParseAlphabetError::InvalidLength,
207	Alphabet::new(
208	"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
209	)
210	.unwrap_err()
211	);
212	}
213
214	#[test]
215	fn detects_padding() {
216	assert_eq!(
217	ParseAlphabetError::ReservedByte(b'='),
218	Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
219	.unwrap_err()
220	);
221	}
222
223	#[test]
224	fn detects_unprintable() {
225	// form feed
226	assert_eq!(
227	ParseAlphabetError::UnprintableByte(`0xc`),
228	Alphabet::new("`\x0c`BCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
229	.unwrap_err()
230	);
231	}
232
233	#[test]
234	fn same_as_unchecked() {
235	assert_eq!(
236	STANDARD,
237	Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
238	.unwrap()
239	);
240	}
241	}
242