lexer.rs source code [crates/time-0.3.22/src/format_description/parse/lexer.rs]

1	//! Lexer for parsing format descriptions.
2
3	use core::iter;
4
5	use super::{unused, Error, Location, Spanned, SpannedValue};
6
7	/// An iterator over the lexed tokens.
8	pub(super) struct Lexed<I: Iterator> {
9	/// The internal iterator.
10	iter: core::iter::Peekable<I>,
11	}
12
13	impl<I: Iterator> Iterator for Lexed<I> {
14	type Item = I::Item;
15
16	fn next(&mut self) -> Option<Self::Item> {
17	self.iter.next()
18	}
19	}
20
21	impl<'iter, 'token: 'iter, I: Iterator<Item = Result<Token<'token>, Error>> + 'iter> Lexed<I> {
22	/// Peek at the next item in the iterator.
23	pub(super) fn peek(&mut self) -> Option<&I::Item> {
24	self.iter.peek()
25	}
26
27	/// Consume the next token if it is whitespace.
28	pub(super) fn next_if_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
29	if let Some(&Ok(Token::ComponentPart {
30	kind: ComponentKind::Whitespace,
31	value,
32	})) = self.peek()
33	{
34	self.next(); // consume
35	Some(value)
36	} else {
37	None
38	}
39	}
40
41	/// Consume the next token if it is a component item that is not whitespace.
42	pub(super) fn next_if_not_whitespace(&mut self) -> Option<Spanned<&'token [u8]>> {
43	if let Some(&Ok(Token::ComponentPart {
44	kind: ComponentKind::NotWhitespace,
45	value,
46	})) = self.peek()
47	{
48	self.next(); // consume
49	Some(value)
50	} else {
51	None
52	}
53	}
54
55	/// Consume the next token if it is an opening bracket.
56	pub(super) fn next_if_opening_bracket(&mut self) -> Option<Location> {
57	if let Some(&Ok(Token::Bracket {
58	kind: BracketKind::Opening,
59	location,
60	})) = self.peek()
61	{
62	self.next(); // consume
63	Some(location)
64	} else {
65	None
66	}
67	}
68
69	/// Peek at the next token if it is a closing bracket.
70	pub(super) fn peek_closing_bracket(&'iter mut self) -> Option<&'iter Location> {
71	if let Some(Ok(Token::Bracket {
72	kind: BracketKind::Closing,
73	location,
74	})) = self.peek()
75	{
76	Some(location)
77	} else {
78	None
79	}
80	}
81
82	/// Consume the next token if it is a closing bracket.
83	pub(super) fn next_if_closing_bracket(&mut self) -> Option<Location> {
84	if let Some(&Ok(Token::Bracket {
85	kind: BracketKind::Closing,
86	location,
87	})) = self.peek()
88	{
89	self.next(); // consume
90	Some(location)
91	} else {
92	None
93	}
94	}
95	}
96
97	/// A token emitted by the lexer. There is no semantic meaning at this stage.
98	pub(super) enum Token<'a> {
99	/// A literal string, formatted and parsed as-is.
100	Literal(Spanned<&'a [u8]>),
101	/// An opening or closing bracket. May or may not be the start or end of a component.
102	Bracket {
103	/// Whether the bracket is opening or closing.
104	kind: BracketKind,
105	/// Where the bracket was in the format string.
106	location: Location,
107	},
108	/// One part of a component. This could be its name, a modifier, or whitespace.
109	ComponentPart {
110	/// Whether the part is whitespace or not.
111	kind: ComponentKind,
112	/// The part itself.
113	value: Spanned<&'a [u8]>,
114	},
115	}
116
117	/// What type of bracket is present.
118	pub(super) enum BracketKind {
119	/// An opening bracket: `[`
120	Opening,
121	/// A closing bracket: `]`
122	Closing,
123	}
124
125	/// Indicates whether the component is whitespace or not.
126	pub(super) enum ComponentKind {
127	#[allow(clippy::missing_docs_in_private_items)]
128	Whitespace,
129	#[allow(clippy::missing_docs_in_private_items)]
130	NotWhitespace,
131	}
132
133	/// Attach [`Location`] information to each byte in the iterator.
134	fn attach_location<'item>(
135	iter: impl Iterator<Item = &'item u8>,
136	) -> impl Iterator<Item = (&'item u8, Location)> {
137	let mut byte_pos: u32 = `0`;
138
139	iter.map(move \|byte: &u8\| {
140	let location: Location = Location { byte: byte_pos };
141	byte_pos += `1`;
142	(byte, location)
143	})
144	}
145
146	/// Parse the string into a series of [`Token`]s.
147	///
148	/// `VERSION` controls the version of the format description that is being parsed. Currently, this
149	/// must be 1 or 2.
150	///
151	/// - When `VERSION` is 1, `[[` is the only escape sequence, resulting in a literal `[`.
152	/// - When `VERSION` is 2, all escape sequences begin with `\`. The only characters that may
153	/// currently follow are `\`, `[`, and `]`, all of which result in the literal character. All
154	/// other characters result in a lex error.
155	pub(super) fn lex<const VERSION: usize>(
156	mut input: &[u8],
157	) -> Lexed<impl Iterator<Item = Result<Token<'_>, Error>>> {
158	validate_version!(VERSION);
159
160	let mut depth: u8 = `0`;
161	let mut iter = attach_location(input.iter()).peekable();
162	let mut second_bracket_location = None;
163
164	let iter = iter::from_fn(move \|\| {
165	// The flag is only set when version is zero.
166	if version!(..=`1`) {
167	// There is a flag set to emit the second half of an escaped bracket pair.
168	if let Some(location) = second_bracket_location.take() {
169	return Some(Ok(Token::Bracket {
170	kind: BracketKind::Opening,
171	location,
172	}));
173	}
174	}
175
176	Some(Ok(match iter.next()? {
177	// possible escape sequence
178	(b'`\\`', backslash_loc) if version!(`2`..) => {
179	match iter.next() {
180	Some((b'`\\`' \| b'[' \| b']', char_loc)) => {
181	// The escaped character is emitted as-is.
182	let char = &input[`1`..`2`];
183	input = &input[`2`..];
184	if depth == `0` {
185	Token::Literal(char.spanned(backslash_loc.to(char_loc)))
186	} else {
187	Token::ComponentPart {
188	kind: ComponentKind::NotWhitespace,
189	value: char.spanned(backslash_loc.to(char_loc)),
190	}
191	}
192	}
193	Some((_, loc)) => {
194	return Some(Err(Error {
195	_inner: unused(loc.error("invalid escape sequence")),
196	public: crate::error::InvalidFormatDescription::Expected {
197	what: "valid escape sequence",
198	index: loc.byte as _,
199	},
200	}));
201	}
202	None => {
203	return Some(Err(Error {
204	_inner: unused(backslash_loc.error("unexpected end of input")),
205	public: crate::error::InvalidFormatDescription::Expected {
206	what: "valid escape sequence",
207	index: backslash_loc.byte as _,
208	},
209	}));
210	}
211	}
212	}
213	// potentially escaped opening bracket
214	(b'[', location) if version!(..=`1`) => {
215	if let Some((_, second_location)) = iter.next_if(\|&(&byte, _)\| byte == b'[') {
216	// Escaped bracket. Store the location of the second so we can emit it later.
217	second_bracket_location = Some(second_location);
218	input = &input[`2`..];
219	} else {
220	// opening bracket
221	depth += `1`;
222	input = &input[`1`..];
223	}
224
225	Token::Bracket {
226	kind: BracketKind::Opening,
227	location,
228	}
229	}
230	// opening bracket
231	(b'[', location) => {
232	depth += `1`;
233	input = &input[`1`..];
234
235	Token::Bracket {
236	kind: BracketKind::Opening,
237	location,
238	}
239	}
240	// closing bracket
241	(b']', location) if depth > `0` => {
242	depth -= `1`;
243	input = &input[`1`..];
244
245	Token::Bracket {
246	kind: BracketKind::Closing,
247	location,
248	}
249	}
250	// literal
251	(_, start_location) if depth == `0` => {
252	let mut bytes = `1`;
253	let mut end_location = start_location;
254
255	while let Some((_, location)) =
256	iter.next_if(\|&(&byte, _)\| !((version!(`2`..) && byte == b'`\\`') \|\| byte == b'['))
257	{
258	end_location = location;
259	bytes += `1`;
260	}
261
262	let value = &input[..bytes];
263	input = &input[bytes..];
264
265	Token::Literal(value.spanned(start_location.to(end_location)))
266	}
267	// component part
268	(byte, start_location) => {
269	let mut bytes = `1`;
270	let mut end_location = start_location;
271	let is_whitespace = byte.is_ascii_whitespace();
272
273	while let Some((_, location)) = iter.next_if(\|&(byte, _)\| {
274	!matches!(byte, b'`\\`' \| b'[' \| b']')
275	&& is_whitespace == byte.is_ascii_whitespace()
276	}) {
277	end_location = location;
278	bytes += `1`;
279	}
280
281	let value = &input[..bytes];
282	input = &input[bytes..];
283
284	Token::ComponentPart {
285	kind: if is_whitespace {
286	ComponentKind::Whitespace
287	} else {
288	ComponentKind::NotWhitespace
289	},
290	value: value.spanned(start_location.to(end_location)),
291	}
292	}
293	}))
294	});
295
296	Lexed {
297	iter: iter.peekable(),
298	}
299	}
300