1//! AST for parsing format descriptions.
2
3use alloc::boxed::Box;
4use alloc::string::String;
5use alloc::vec::Vec;
6use core::iter;
7
8use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused};
9
10/// One part of a complete format description.
11pub(super) enum Item<'a> {
12 /// A literal string, formatted and parsed as-is.
13 ///
14 /// This should never be present inside a nested format description.
15 Literal(Spanned<&'a [u8]>),
16 /// A sequence of brackets. The first acts as the escape character.
17 ///
18 /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`.
19 EscapedBracket {
20 /// The first bracket.
21 _first: Unused<Location>,
22 /// The second bracket.
23 _second: Unused<Location>,
24 },
25 /// Part of a type, along with its modifiers.
26 Component {
27 /// Where the opening bracket was in the format string.
28 _opening_bracket: Unused<Location>,
29 /// Whitespace between the opening bracket and name.
30 _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
31 /// The name of the component.
32 name: Spanned<&'a [u8]>,
33 /// The modifiers for the component.
34 modifiers: Box<[Modifier<'a>]>,
35 /// Whitespace between the modifiers and closing bracket.
36 _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
37 /// Where the closing bracket was in the format string.
38 _closing_bracket: Unused<Location>,
39 },
40 /// An optional sequence of items.
41 Optional {
42 /// Where the opening bracket was in the format string.
43 opening_bracket: Location,
44 /// Whitespace between the opening bracket and "optional".
45 _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
46 /// The "optional" keyword.
47 _optional_kw: Unused<Spanned<&'a [u8]>>,
48 /// Whitespace between the "optional" keyword and the opening bracket.
49 _whitespace: Unused<Spanned<&'a [u8]>>,
50 /// The items within the optional sequence.
51 nested_format_description: NestedFormatDescription<'a>,
52 /// Where the closing bracket was in the format string.
53 closing_bracket: Location,
54 },
55 /// The first matching parse of a sequence of items.
56 First {
57 /// Where the opening bracket was in the format string.
58 opening_bracket: Location,
59 /// Whitespace between the opening bracket and "first".
60 _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
61 /// The "first" keyword.
62 _first_kw: Unused<Spanned<&'a [u8]>>,
63 /// Whitespace between the "first" keyword and the opening bracket.
64 _whitespace: Unused<Spanned<&'a [u8]>>,
65 /// The sequences of items to try.
66 nested_format_descriptions: Box<[NestedFormatDescription<'a>]>,
67 /// Where the closing bracket was in the format string.
68 closing_bracket: Location,
69 },
70}
71
72/// A format description that is nested within another format description.
73pub(super) struct NestedFormatDescription<'a> {
74 /// Where the opening bracket was in the format string.
75 pub(super) _opening_bracket: Unused<Location>,
76 /// The items within the nested format description.
77 pub(super) items: Box<[Item<'a>]>,
78 /// Where the closing bracket was in the format string.
79 pub(super) _closing_bracket: Unused<Location>,
80 /// Whitespace between the closing bracket and the next item.
81 pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>,
82}
83
84/// A modifier for a component.
85pub(super) struct Modifier<'a> {
86 /// Whitespace preceding the modifier.
87 pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>,
88 /// The key of the modifier.
89 pub(super) key: Spanned<&'a [u8]>,
90 /// Where the colon of the modifier was in the format string.
91 pub(super) _colon: Unused<Location>,
92 /// The value of the modifier.
93 pub(super) value: Spanned<&'a [u8]>,
94}
95
96/// Parse the provided tokens into an AST.
97pub(super) fn parse<
98 'item: 'iter,
99 'iter,
100 I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
101 const VERSION: usize,
102>(
103 tokens: &'iter mut lexer::Lexed<I>,
104) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter {
105 validate_version!(VERSION);
106 parse_inner::<_, false, VERSION>(tokens)
107}
108
109/// Parse the provided tokens into an AST. The const generic indicates whether the resulting
110/// [`Item`] will be used directly or as part of a [`NestedFormatDescription`].
111fn parse_inner<
112 'item,
113 I: Iterator<Item = Result<lexer::Token<'item>, Error>>,
114 const NESTED: bool,
115 const VERSION: usize,
116>(
117 tokens: &mut lexer::Lexed<I>,
118) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ {
119 validate_version!(VERSION);
120 iter::from_fn(move || {
121 if NESTED && tokens.peek_closing_bracket().is_some() {
122 return None;
123 }
124
125 let next = match tokens.next()? {
126 Ok(token) => token,
127 Err(err) => return Some(Err(err)),
128 };
129
130 Some(match next {
131 lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => {
132 bug!("literal should not be present in nested description")
133 }
134 lexer::Token::Literal(value) => Ok(Item::Literal(value)),
135 lexer::Token::Bracket {
136 kind: lexer::BracketKind::Opening,
137 location,
138 } => {
139 if version!(..=1) {
140 if let Some(second_location) = tokens.next_if_opening_bracket() {
141 Ok(Item::EscapedBracket {
142 _first: unused(location),
143 _second: unused(second_location),
144 })
145 } else {
146 parse_component::<_, VERSION>(location, tokens)
147 }
148 } else {
149 parse_component::<_, VERSION>(location, tokens)
150 }
151 }
152 lexer::Token::Bracket {
153 kind: lexer::BracketKind::Closing,
154 location: _,
155 } if NESTED => {
156 bug!("closing bracket should be caught by the `if` statement")
157 }
158 lexer::Token::Bracket {
159 kind: lexer::BracketKind::Closing,
160 location: _,
161 } => {
162 bug!("closing bracket should have been consumed by `parse_component`")
163 }
164 lexer::Token::ComponentPart {
165 kind: _, // whitespace is significant in nested components
166 value,
167 } if NESTED => Ok(Item::Literal(value)),
168 lexer::Token::ComponentPart { kind: _, value: _ } => {
169 bug!("component part should have been consumed by `parse_component`")
170 }
171 })
172 })
173}
174
175/// Parse a component. This assumes that the opening bracket has already been consumed.
176fn parse_component<
177 'a,
178 I: Iterator<Item = Result<lexer::Token<'a>, Error>>,
179 const VERSION: usize,
180>(
181 opening_bracket: Location,
182 tokens: &mut lexer::Lexed<I>,
183) -> Result<Item<'a>, Error> {
184 validate_version!(VERSION);
185 let leading_whitespace = tokens.next_if_whitespace();
186
187 let Some(name) = tokens.next_if_not_whitespace() else {
188 let span = match leading_whitespace {
189 Some(Spanned { value: _, span }) => span,
190 None => opening_bracket.to(opening_bracket),
191 };
192 return Err(Error {
193 _inner: unused(span.error("expected component name")),
194 public: crate::error::InvalidFormatDescription::MissingComponentName {
195 index: span.start.byte as _,
196 },
197 });
198 };
199
200 if *name == b"optional" {
201 let Some(whitespace) = tokens.next_if_whitespace() else {
202 return Err(Error {
203 _inner: unused(name.span.error("expected whitespace after `optional`")),
204 public: crate::error::InvalidFormatDescription::Expected {
205 what: "whitespace after `optional`",
206 index: name.span.end.byte as _,
207 },
208 });
209 };
210
211 let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?;
212
213 let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
214 return Err(Error {
215 _inner: unused(opening_bracket.error("unclosed bracket")),
216 public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
217 index: opening_bracket.byte as _,
218 },
219 });
220 };
221
222 return Ok(Item::Optional {
223 opening_bracket,
224 _leading_whitespace: unused(leading_whitespace),
225 _optional_kw: unused(name),
226 _whitespace: unused(whitespace),
227 nested_format_description: nested,
228 closing_bracket,
229 });
230 }
231
232 if *name == b"first" {
233 let Some(whitespace) = tokens.next_if_whitespace() else {
234 return Err(Error {
235 _inner: unused(name.span.error("expected whitespace after `first`")),
236 public: crate::error::InvalidFormatDescription::Expected {
237 what: "whitespace after `first`",
238 index: name.span.end.byte as _,
239 },
240 });
241 };
242
243 let mut nested_format_descriptions = Vec::new();
244 while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) {
245 nested_format_descriptions.push(description);
246 }
247
248 let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
249 return Err(Error {
250 _inner: unused(opening_bracket.error("unclosed bracket")),
251 public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
252 index: opening_bracket.byte as _,
253 },
254 });
255 };
256
257 return Ok(Item::First {
258 opening_bracket,
259 _leading_whitespace: unused(leading_whitespace),
260 _first_kw: unused(name),
261 _whitespace: unused(whitespace),
262 nested_format_descriptions: nested_format_descriptions.into_boxed_slice(),
263 closing_bracket,
264 });
265 }
266
267 let mut modifiers = Vec::new();
268 let trailing_whitespace = loop {
269 let Some(whitespace) = tokens.next_if_whitespace() else { break None };
270
271 // This is not necessary for proper parsing, but provides a much better error when a nested
272 // description is used where it's not allowed.
273 if let Some(location) = tokens.next_if_opening_bracket() {
274 return Err(Error {
275 _inner: unused(
276 location
277 .to(location)
278 .error("modifier must be of the form `key:value`"),
279 ),
280 public: crate::error::InvalidFormatDescription::InvalidModifier {
281 value: String::from("["),
282 index: location.byte as _,
283 },
284 });
285 }
286
287 let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else {
288 break Some(whitespace);
289 };
290
291 let Some(colon_index) = value.iter().position(|&b| b == b':') else {
292 return Err(Error {
293 _inner: unused(span.error("modifier must be of the form `key:value`")),
294 public: crate::error::InvalidFormatDescription::InvalidModifier {
295 value: String::from_utf8_lossy(value).into_owned(),
296 index: span.start.byte as _,
297 },
298 });
299 };
300 let key = &value[..colon_index];
301 let value = &value[colon_index + 1..];
302
303 if key.is_empty() {
304 return Err(Error {
305 _inner: unused(span.shrink_to_start().error("expected modifier key")),
306 public: crate::error::InvalidFormatDescription::InvalidModifier {
307 value: String::new(),
308 index: span.start.byte as _,
309 },
310 });
311 }
312 if value.is_empty() {
313 return Err(Error {
314 _inner: unused(span.shrink_to_end().error("expected modifier value")),
315 public: crate::error::InvalidFormatDescription::InvalidModifier {
316 value: String::new(),
317 index: span.shrink_to_end().start.byte as _,
318 },
319 });
320 }
321
322 modifiers.push(Modifier {
323 _leading_whitespace: unused(whitespace),
324 key: key.spanned(span.shrink_to_before(colon_index as _)),
325 _colon: unused(span.start.offset(colon_index as _)),
326 value: value.spanned(span.shrink_to_after(colon_index as _)),
327 });
328 };
329
330 let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
331 return Err(Error {
332 _inner: unused(opening_bracket.error("unclosed bracket")),
333 public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
334 index: opening_bracket.byte as _,
335 },
336 });
337 };
338
339 Ok(Item::Component {
340 _opening_bracket: unused(opening_bracket),
341 _leading_whitespace: unused(leading_whitespace),
342 name,
343 modifiers: modifiers.into_boxed_slice(),
344 _trailing_whitespace: unused(trailing_whitespace),
345 _closing_bracket: unused(closing_bracket),
346 })
347}
348
349/// Parse a nested format description. The location provided is the the most recent one consumed.
350fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>(
351 last_location: Location,
352 tokens: &mut lexer::Lexed<I>,
353) -> Result<NestedFormatDescription<'a>, Error> {
354 validate_version!(VERSION);
355 let Some(opening_bracket) = tokens.next_if_opening_bracket() else {
356 return Err(Error {
357 _inner: unused(last_location.error("expected opening bracket")),
358 public: crate::error::InvalidFormatDescription::Expected {
359 what: "opening bracket",
360 index: last_location.byte as _,
361 },
362 });
363 };
364 let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?;
365 let Some(closing_bracket) = tokens.next_if_closing_bracket() else {
366 return Err(Error {
367 _inner: unused(opening_bracket.error("unclosed bracket")),
368 public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
369 index: opening_bracket.byte as _,
370 },
371 });
372 };
373 let trailing_whitespace = tokens.next_if_whitespace();
374
375 Ok(NestedFormatDescription {
376 _opening_bracket: unused(opening_bracket),
377 items,
378 _closing_bracket: unused(closing_bracket),
379 _trailing_whitespace: unused(trailing_whitespace),
380 })
381}
382