ast.rs source code [crates/time-0.3.22/src/format_description/parse/ast.rs]

1	//! AST for parsing format descriptions.
2
3	use alloc::boxed::Box;
4	use alloc::string::String;
5	use alloc::vec::Vec;
6	use core::iter;
7
8	use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused};
9
10	/// One part of a complete format description.
11	pub(super) enum Item<'a> {
12	/// A literal string, formatted and parsed as-is.
13	///
14	/// This should never be present inside a nested format description.
15	Literal(Spanned<&'a [u8]>),
16	/// A sequence of brackets. The first acts as the escape character.
17	///
18	/// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
19	EscapedBracket {
20	/// The first bracket.
21	_first: Unused<Location>,
22	/// The second bracket.
23	_second: Unused<Location>,
24	},
25	/// Part of a type, along with its modifiers.
26	Component {
27	/// Where the opening bracket was in the format string.
28	_opening_bracket: Unused<Location>,
29	/// Whitespace between the opening bracket and name.
30	_leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
31	/// The name of the component.
32	name: Spanned<&'a [u8]>,
33	/// The modifiers for the component.
34	modifiers: Box<[Modifier<'a>]>,
35	/// Whitespace between the modifiers and closing bracket.
36	_trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
37	/// Where the closing bracket was in the format string.
38	_closing_bracket: Unused<Location>,
39	},
40	/// An optional sequence of items.
41	Optional {
42	/// Where the opening bracket was in the format string.
43	opening_bracket: Location,
44	/// Whitespace between the opening bracket and "optional".
45	_leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
46	/// The "optional" keyword.
47	_optional_kw: Unused<Spanned<&'a [u8]>>,
48	/// Whitespace between the "optional" keyword and the opening bracket.
49	_whitespace: Unused<Spanned<&'a [u8]>>,
50	/// The items within the optional sequence.
51	nested_format_description: NestedFormatDescription<'a>,
52	/// Where the closing bracket was in the format string.
53	closing_bracket: Location,
54	},
55	/// The first matching parse of a sequence of items.
56	First {
57	/// Where the opening bracket was in the format string.
58	opening_bracket: Location,
59	/// Whitespace between the opening bracket and "first".
60	_leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
61	/// The "first" keyword.
62	_first_kw: Unused<Spanned<&'a [u8]>>,
63	/// Whitespace between the "first" keyword and the opening bracket.
64	_whitespace: Unused<Spanned<&'a [u8]>>,
65	/// The sequences of items to try.
66	nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
67	/// Where the closing bracket was in the format string.
68	closing_bracket: Location,
69	},
70	}
71
72	/// A format description that is nested within another format description.
73	pub(super) struct NestedFormatDescription<'a> {
74	/// Where the opening bracket was in the format string.
75	pub(super) _opening_bracket: Unused<Location>,
76	/// The items within the nested format description.
77	pub(super) items: Box<[Item<'a>]>,
78	/// Where the closing bracket was in the format string.
79	pub(super) _closing_bracket: Unused<Location>,
80	/// Whitespace between the closing bracket and the next item.
81	pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
82	}
83
84	/// A modifier for a component.
85	pub(super) struct Modifier<'a> {
86	/// Whitespace preceding the modifier.
87	pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
88	/// The key of the modifier.
89	pub(super) key: Spanned<&'a [u8]>,
90	/// Where the colon of the modifier was in the format string.
91	pub(super) _colon: Unused<Location>,
92	/// The value of the modifier.
93	pub(super) value: Spanned<&'a [u8]>,
94	}
95
96	/// Parse the provided tokens into an AST.
97	pub(super) fn parse<
98	'item: 'iter,
99	'iter,
100	I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
101	const VERSION: usize,
102	>(
103	tokens: &'iter mut lexer::Lexed<I>,
104	) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter {
105	validate_version!(VERSION);
106	parse_inner::<_, `false`, VERSION>(tokens)
107	}
108
109	/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
110	/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
111	fn parse_inner<
112	'item,
113	I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
114	const NESTED: bool,
115	const VERSION: usize,
116	>(
117	tokens: &mut lexer::Lexed<I>,
118	) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ {
119	validate_version!(VERSION);
120	iter::from_fn(move \|\| {
121	if NESTED && tokens.peek_closing_bracket().is_some() {
122	return None;
123	}
124
125	let next = match tokens.next()? {
126	Ok(token) => token,
127	Err(err) => return Some(Err(err)),
128	};
129
130	Some(match next {
131	lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => {
132	bug!("literal should not be present in nested description")
133	}
134	lexer::Token::Literal(value) => Ok(Item::Literal(value)),
135	lexer::Token::Bracket {
136	kind: lexer::BracketKind::Opening,
137	location,
138	} => {
139	if version!(..=`1`) {
140	if let Some(second_location) = tokens.next_if_opening_bracket() {
141	Ok(Item::EscapedBracket {
142	_first: unused(location),
143	_second: unused(second_location),
144	})
145	} else {
146	parse_component::<_, VERSION>(location, tokens)
147	}
148	} else {
149	parse_component::<_, VERSION>(location, tokens)
150	}
151	}
152	lexer::Token::Bracket {
153	kind: lexer::BracketKind::Closing,
154	location: _,
155	} if NESTED => {
156	bug!("closing bracket should be caught by the `if` statement")
157	}
158	lexer::Token::Bracket {
159	kind: lexer::BracketKind::Closing,
160	location: _,
161	} => {
162	bug!("closing bracket should have been consumed by `parse_component`")
163	}
164	lexer::Token::ComponentPart {
165	kind: _, // whitespace is significant in nested components
166	value,
167	} if NESTED => Ok(Item::Literal(value)),
168	lexer::Token::ComponentPart { kind: _, value: _ } => {
169	bug!("component part should have been consumed by `parse_component`")
170	}
171	})
172	})
173	}
174
175	/// Parse a component. This assumes that the opening bracket has already been consumed.
176	fn parse_component<
177	'a,
178	I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
179	const VERSION: usize,
180	>(
181	opening_bracket: Location,
182	tokens: &mut lexer::Lexed<I>,
183	) -> Result<Item<'a>, Error> {
184	validate_version!(VERSION);
185	let leading_whitespace = tokens.next_if_whitespace();
186
187	let Some(name) = tokens.next_if_not_whitespace() else {
188	let span = match leading_whitespace {
189	Some(Spanned { value: _, span }) => span,
190	None => opening_bracket.to(opening_bracket),
191	};
192	return Err(Error {
193	_inner: unused(span.error("expected component name")),
194	public: crate::error::InvalidFormatDescription::MissingComponentName {
195	index: span.start.byte as _,
196	},
197	});
198	};
199
200	if *name == b"optional" {
201	let Some(whitespace) = tokens.next_if_whitespace() else {
202	return Err(Error {
203	_inner: unused(name.span.error("expected whitespace after `optional`")),
204	public: crate::error::InvalidFormatDescription::Expected {
205	what: "whitespace after `optional`",
206	index: name.span.end.byte as _,
207	},
208	});
209	};
210
211	let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?;
212
213	let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
214	return Err(Error {
215	_inner: unused(opening_bracket.error("unclosed bracket")),
216	public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
217	index: opening_bracket.byte as _,
218	},
219	});
220	};
221
222	return Ok(Item::Optional {
223	opening_bracket,
224	_leading_whitespace: unused(leading_whitespace),
225	_optional_kw: unused(name),
226	_whitespace: unused(whitespace),
227	nested_format_description: nested,
228	closing_bracket,
229	});
230	}
231
232	if *name == b"first" {
233	let Some(whitespace) = tokens.next_if_whitespace() else {
234	return Err(Error {
235	_inner: unused(name.span.error("expected whitespace after `first`")),
236	public: crate::error::InvalidFormatDescription::Expected {
237	what: "whitespace after `first`",
238	index: name.span.end.byte as _,
239	},
240	});
241	};
242
243	let mut nested_format_descriptions = Vec::new();
244	while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) {
245	nested_format_descriptions.push(description);
246	}
247
248	let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
249	return Err(Error {
250	_inner: unused(opening_bracket.error("unclosed bracket")),
251	public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
252	index: opening_bracket.byte as _,
253	},
254	});
255	};
256
257	return Ok(Item::First {
258	opening_bracket,
259	_leading_whitespace: unused(leading_whitespace),
260	_first_kw: unused(name),
261	_whitespace: unused(whitespace),
262	nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
263	closing_bracket,
264	});
265	}
266
267	let mut modifiers = Vec::new();
268	let trailing_whitespace = loop {
269	let Some(whitespace) = tokens.next_if_whitespace() else { break None };
270
271	// This is not necessary for proper parsing, but provides a much better error when a nested
272	// description is used where it's not allowed.
273	if let Some(location) = tokens.next_if_opening_bracket() {
274	return Err(Error {
275	_inner: unused(
276	location
277	.to(location)
278	.error("modifier must be of the form `key:value`"),
279	),
280	public: crate::error::InvalidFormatDescription::InvalidModifier {
281	value: String::from("["),
282	index: location.byte as _,
283	},
284	});
285	}
286
287	let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else {
288	break Some(whitespace);
289	};
290
291	let Some(colon_index) = value.iter().position(\|&b\| b == b':') else {
292	return Err(Error {
293	_inner: unused(span.error("modifier must be of the form `key:value`")),
294	public: crate::error::InvalidFormatDescription::InvalidModifier {
295	value: String::from_utf8_lossy(value).into_owned(),
296	index: span.start.byte as _,
297	},
298	});
299	};
300	let key = &value[..colon_index];
301	let value = &value[colon_index + `1`..];
302
303	if key.is_empty() {
304	return Err(Error {
305	_inner: unused(span.shrink_to_start().error("expected modifier key")),
306	public: crate::error::InvalidFormatDescription::InvalidModifier {
307	value: String::new(),
308	index: span.start.byte as _,
309	},
310	});
311	}
312	if value.is_empty() {
313	return Err(Error {
314	_inner: unused(span.shrink_to_end().error("expected modifier value")),
315	public: crate::error::InvalidFormatDescription::InvalidModifier {
316	value: String::new(),
317	index: span.shrink_to_end().start.byte as _,
318	},
319	});
320	}
321
322	modifiers.push(Modifier {
323	_leading_whitespace: unused(whitespace),
324	key: key.spanned(span.shrink_to_before(colon_index as _)),
325	_colon: unused(span.start.offset(colon_index as _)),
326	value: value.spanned(span.shrink_to_after(colon_index as _)),
327	});
328	};
329
330	let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
331	return Err(Error {
332	_inner: unused(opening_bracket.error("unclosed bracket")),
333	public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
334	index: opening_bracket.byte as _,
335	},
336	});
337	};
338
339	Ok(Item::Component {
340	_opening_bracket: unused(opening_bracket),
341	_leading_whitespace: unused(leading_whitespace),
342	name,
343	modifiers: modifiers.into_boxed_slice(),
344	_trailing_whitespace: unused(trailing_whitespace),
345	_closing_bracket: unused(closing_bracket),
346	})
347	}
348
349	/// Parse a nested format description. The location provided is the the most recent one consumed.
350	fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>(
351	last_location: Location,
352	tokens: &mut lexer::Lexed<I>,
353	) -> Result<NestedFormatDescription<'a>, Error> {
354	validate_version!(VERSION);
355	let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
356	return Err(Error {
357	_inner: unused(last_location.error("expected opening bracket")),
358	public: crate::error::InvalidFormatDescription::Expected {
359	what: "opening bracket",
360	index: last_location.byte as _,
361	},
362	});
363	};
364	let items = parse_inner::<_, `true`, VERSION>(tokens).collect::<Result<_, _>>()?;
365	let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
366	return Err(Error {
367	_inner: unused(opening_bracket.error("unclosed bracket")),
368	public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
369	index: opening_bracket.byte as _,
370	},
371	});
372	};
373	let trailing_whitespace = tokens.next_if_whitespace();
374
375	Ok(NestedFormatDescription {
376	_opening_bracket: unused(opening_bracket),
377	items,
378	_closing_bracket: unused(closing_bracket),
379	_trailing_whitespace: unused(trailing_whitespace),
380	})
381	}
382