1 | //! AST for parsing format descriptions. |
2 | |
3 | use alloc::boxed::Box; |
4 | use alloc::string::String; |
5 | use alloc::vec::Vec; |
6 | use core::iter; |
7 | |
8 | use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused}; |
9 | use crate::internal_macros::bug; |
10 | |
11 | /// One part of a complete format description. |
12 | pub(super) enum Item<'a> { |
13 | /// A literal string, formatted and parsed as-is. |
14 | /// |
15 | /// This should never be present inside a nested format description. |
16 | Literal(Spanned<&'a [u8]>), |
17 | /// A sequence of brackets. The first acts as the escape character. |
18 | /// |
19 | /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`. |
20 | EscapedBracket { |
21 | /// The first bracket. |
22 | _first: Unused<Location>, |
23 | /// The second bracket. |
24 | _second: Unused<Location>, |
25 | }, |
26 | /// Part of a type, along with its modifiers. |
27 | Component { |
28 | /// Where the opening bracket was in the format string. |
29 | _opening_bracket: Unused<Location>, |
30 | /// Whitespace between the opening bracket and name. |
31 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
32 | /// The name of the component. |
33 | name: Spanned<&'a [u8]>, |
34 | /// The modifiers for the component. |
35 | modifiers: Box<[Modifier<'a>]>, |
36 | /// Whitespace between the modifiers and closing bracket. |
37 | _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
38 | /// Where the closing bracket was in the format string. |
39 | _closing_bracket: Unused<Location>, |
40 | }, |
41 | /// An optional sequence of items. |
42 | Optional { |
43 | /// Where the opening bracket was in the format string. |
44 | opening_bracket: Location, |
45 | /// Whitespace between the opening bracket and "optional". |
46 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
47 | /// The "optional" keyword. |
48 | _optional_kw: Unused<Spanned<&'a [u8]>>, |
49 | /// Whitespace between the "optional" keyword and the opening bracket. |
50 | _whitespace: Unused<Spanned<&'a [u8]>>, |
51 | /// The items within the optional sequence. |
52 | nested_format_description: NestedFormatDescription<'a>, |
53 | /// Where the closing bracket was in the format string. |
54 | closing_bracket: Location, |
55 | }, |
56 | /// The first matching parse of a sequence of items. |
57 | First { |
58 | /// Where the opening bracket was in the format string. |
59 | opening_bracket: Location, |
60 | /// Whitespace between the opening bracket and "first". |
61 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
62 | /// The "first" keyword. |
63 | _first_kw: Unused<Spanned<&'a [u8]>>, |
64 | /// Whitespace between the "first" keyword and the opening bracket. |
65 | _whitespace: Unused<Spanned<&'a [u8]>>, |
66 | /// The sequences of items to try. |
67 | nested_format_descriptions: Box<[NestedFormatDescription<'a>]>, |
68 | /// Where the closing bracket was in the format string. |
69 | closing_bracket: Location, |
70 | }, |
71 | } |
72 | |
73 | /// A format description that is nested within another format description. |
74 | pub(super) struct NestedFormatDescription<'a> { |
75 | /// Where the opening bracket was in the format string. |
76 | pub(super) _opening_bracket: Unused<Location>, |
77 | /// The items within the nested format description. |
78 | pub(super) items: Box<[Item<'a>]>, |
79 | /// Where the closing bracket was in the format string. |
80 | pub(super) _closing_bracket: Unused<Location>, |
81 | /// Whitespace between the closing bracket and the next item. |
82 | pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
83 | } |
84 | |
85 | /// A modifier for a component. |
86 | pub(super) struct Modifier<'a> { |
87 | /// Whitespace preceding the modifier. |
88 | pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>, |
89 | /// The key of the modifier. |
90 | pub(super) key: Spanned<&'a [u8]>, |
91 | /// Where the colon of the modifier was in the format string. |
92 | pub(super) _colon: Unused<Location>, |
93 | /// The value of the modifier. |
94 | pub(super) value: Spanned<&'a [u8]>, |
95 | } |
96 | |
97 | /// Parse the provided tokens into an AST. |
98 | pub(super) fn parse< |
99 | 'item: 'iter, |
100 | 'iter, |
101 | I: Iterator<Item = Result<lexer::Token<'item>, Error>>, |
102 | const VERSION: usize, |
103 | >( |
104 | tokens: &'iter mut lexer::Lexed<I>, |
105 | ) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter { |
106 | validate_version!(VERSION); |
107 | parse_inner::<_, false, VERSION>(tokens) |
108 | } |
109 | |
110 | /// Parse the provided tokens into an AST. The const generic indicates whether the resulting |
111 | /// [`Item`] will be used directly or as part of a [`NestedFormatDescription`]. |
112 | fn parse_inner< |
113 | 'item, |
114 | I: Iterator<Item = Result<lexer::Token<'item>, Error>>, |
115 | const NESTED: bool, |
116 | const VERSION: usize, |
117 | >( |
118 | tokens: &mut lexer::Lexed<I>, |
119 | ) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ { |
120 | validate_version!(VERSION); |
121 | iter::from_fn(move || { |
122 | if NESTED && tokens.peek_closing_bracket().is_some() { |
123 | return None; |
124 | } |
125 | |
126 | let next = match tokens.next()? { |
127 | Ok(token) => token, |
128 | Err(err) => return Some(Err(err)), |
129 | }; |
130 | |
131 | Some(match next { |
132 | lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => { |
133 | bug!("literal should not be present in nested description" ) |
134 | } |
135 | lexer::Token::Literal(value) => Ok(Item::Literal(value)), |
136 | lexer::Token::Bracket { |
137 | kind: lexer::BracketKind::Opening, |
138 | location, |
139 | } => { |
140 | if version!(..=1) { |
141 | if let Some(second_location) = tokens.next_if_opening_bracket() { |
142 | Ok(Item::EscapedBracket { |
143 | _first: unused(location), |
144 | _second: unused(second_location), |
145 | }) |
146 | } else { |
147 | parse_component::<_, VERSION>(location, tokens) |
148 | } |
149 | } else { |
150 | parse_component::<_, VERSION>(location, tokens) |
151 | } |
152 | } |
153 | lexer::Token::Bracket { |
154 | kind: lexer::BracketKind::Closing, |
155 | location: _, |
156 | } if NESTED => { |
157 | bug!("closing bracket should be caught by the `if` statement" ) |
158 | } |
159 | lexer::Token::Bracket { |
160 | kind: lexer::BracketKind::Closing, |
161 | location: _, |
162 | } => { |
163 | bug!("closing bracket should have been consumed by `parse_component`" ) |
164 | } |
165 | lexer::Token::ComponentPart { |
166 | kind: _, // whitespace is significant in nested components |
167 | value, |
168 | } if NESTED => Ok(Item::Literal(value)), |
169 | lexer::Token::ComponentPart { kind: _, value: _ } => { |
170 | bug!("component part should have been consumed by `parse_component`" ) |
171 | } |
172 | }) |
173 | }) |
174 | } |
175 | |
176 | /// Parse a component. This assumes that the opening bracket has already been consumed. |
177 | fn parse_component< |
178 | 'a, |
179 | I: Iterator<Item = Result<lexer::Token<'a>, Error>>, |
180 | const VERSION: usize, |
181 | >( |
182 | opening_bracket: Location, |
183 | tokens: &mut lexer::Lexed<I>, |
184 | ) -> Result<Item<'a>, Error> { |
185 | validate_version!(VERSION); |
186 | let leading_whitespace = tokens.next_if_whitespace(); |
187 | |
188 | let Some(name) = tokens.next_if_not_whitespace() else { |
189 | let span = match leading_whitespace { |
190 | Some(Spanned { value: _, span }) => span, |
191 | None => opening_bracket.to_self(), |
192 | }; |
193 | return Err(Error { |
194 | _inner: unused(span.error("expected component name" )), |
195 | public: crate::error::InvalidFormatDescription::MissingComponentName { |
196 | index: span.start.byte as _, |
197 | }, |
198 | }); |
199 | }; |
200 | |
201 | if *name == b"optional" { |
202 | let Some(whitespace) = tokens.next_if_whitespace() else { |
203 | return Err(Error { |
204 | _inner: unused(name.span.error("expected whitespace after `optional`" )), |
205 | public: crate::error::InvalidFormatDescription::Expected { |
206 | what: "whitespace after `optional`" , |
207 | index: name.span.end.byte as _, |
208 | }, |
209 | }); |
210 | }; |
211 | |
212 | let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?; |
213 | |
214 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
215 | return Err(Error { |
216 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
217 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
218 | index: opening_bracket.byte as _, |
219 | }, |
220 | }); |
221 | }; |
222 | |
223 | return Ok(Item::Optional { |
224 | opening_bracket, |
225 | _leading_whitespace: unused(leading_whitespace), |
226 | _optional_kw: unused(name), |
227 | _whitespace: unused(whitespace), |
228 | nested_format_description: nested, |
229 | closing_bracket, |
230 | }); |
231 | } |
232 | |
233 | if *name == b"first" { |
234 | let Some(whitespace) = tokens.next_if_whitespace() else { |
235 | return Err(Error { |
236 | _inner: unused(name.span.error("expected whitespace after `first`" )), |
237 | public: crate::error::InvalidFormatDescription::Expected { |
238 | what: "whitespace after `first`" , |
239 | index: name.span.end.byte as _, |
240 | }, |
241 | }); |
242 | }; |
243 | |
244 | let mut nested_format_descriptions = Vec::new(); |
245 | while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) { |
246 | nested_format_descriptions.push(description); |
247 | } |
248 | |
249 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
250 | return Err(Error { |
251 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
252 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
253 | index: opening_bracket.byte as _, |
254 | }, |
255 | }); |
256 | }; |
257 | |
258 | return Ok(Item::First { |
259 | opening_bracket, |
260 | _leading_whitespace: unused(leading_whitespace), |
261 | _first_kw: unused(name), |
262 | _whitespace: unused(whitespace), |
263 | nested_format_descriptions: nested_format_descriptions.into_boxed_slice(), |
264 | closing_bracket, |
265 | }); |
266 | } |
267 | |
268 | let mut modifiers = Vec::new(); |
269 | let trailing_whitespace = loop { |
270 | let Some(whitespace) = tokens.next_if_whitespace() else { |
271 | break None; |
272 | }; |
273 | |
274 | // This is not necessary for proper parsing, but provides a much better error when a nested |
275 | // description is used where it's not allowed. |
276 | if let Some(location) = tokens.next_if_opening_bracket() { |
277 | return Err(Error { |
278 | _inner: unused( |
279 | location |
280 | .to_self() |
281 | .error("modifier must be of the form `key:value`" ), |
282 | ), |
283 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
284 | value: String::from("[" ), |
285 | index: location.byte as _, |
286 | }, |
287 | }); |
288 | } |
289 | |
290 | let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else { |
291 | break Some(whitespace); |
292 | }; |
293 | |
294 | let Some(colon_index) = value.iter().position(|&b| b == b':' ) else { |
295 | return Err(Error { |
296 | _inner: unused(span.error("modifier must be of the form `key:value`" )), |
297 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
298 | value: String::from_utf8_lossy(value).into_owned(), |
299 | index: span.start.byte as _, |
300 | }, |
301 | }); |
302 | }; |
303 | let key = &value[..colon_index]; |
304 | let value = &value[colon_index + 1..]; |
305 | |
306 | if key.is_empty() { |
307 | return Err(Error { |
308 | _inner: unused(span.shrink_to_start().error("expected modifier key" )), |
309 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
310 | value: String::new(), |
311 | index: span.start.byte as _, |
312 | }, |
313 | }); |
314 | } |
315 | if value.is_empty() { |
316 | return Err(Error { |
317 | _inner: unused(span.shrink_to_end().error("expected modifier value" )), |
318 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
319 | value: String::new(), |
320 | index: span.shrink_to_end().start.byte as _, |
321 | }, |
322 | }); |
323 | } |
324 | |
325 | modifiers.push(Modifier { |
326 | _leading_whitespace: unused(whitespace), |
327 | key: key.spanned(span.shrink_to_before(colon_index as _)), |
328 | _colon: unused(span.start.offset(colon_index as _)), |
329 | value: value.spanned(span.shrink_to_after(colon_index as _)), |
330 | }); |
331 | }; |
332 | |
333 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
334 | return Err(Error { |
335 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
336 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
337 | index: opening_bracket.byte as _, |
338 | }, |
339 | }); |
340 | }; |
341 | |
342 | Ok(Item::Component { |
343 | _opening_bracket: unused(opening_bracket), |
344 | _leading_whitespace: unused(leading_whitespace), |
345 | name, |
346 | modifiers: modifiers.into_boxed_slice(), |
347 | _trailing_whitespace: unused(trailing_whitespace), |
348 | _closing_bracket: unused(closing_bracket), |
349 | }) |
350 | } |
351 | |
352 | /// Parse a nested format description. The location provided is the the most recent one consumed. |
353 | fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>( |
354 | last_location: Location, |
355 | tokens: &mut lexer::Lexed<I>, |
356 | ) -> Result<NestedFormatDescription<'a>, Error> { |
357 | validate_version!(VERSION); |
358 | let Some(opening_bracket) = tokens.next_if_opening_bracket() else { |
359 | return Err(Error { |
360 | _inner: unused(last_location.error("expected opening bracket" )), |
361 | public: crate::error::InvalidFormatDescription::Expected { |
362 | what: "opening bracket" , |
363 | index: last_location.byte as _, |
364 | }, |
365 | }); |
366 | }; |
367 | let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?; |
368 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
369 | return Err(Error { |
370 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
371 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
372 | index: opening_bracket.byte as _, |
373 | }, |
374 | }); |
375 | }; |
376 | let trailing_whitespace = tokens.next_if_whitespace(); |
377 | |
378 | Ok(NestedFormatDescription { |
379 | _opening_bracket: unused(opening_bracket), |
380 | items, |
381 | _closing_bracket: unused(closing_bracket), |
382 | _trailing_whitespace: unused(trailing_whitespace), |
383 | }) |
384 | } |
385 | |