| 1 | //! AST for parsing format descriptions. |
| 2 | |
| 3 | use alloc::boxed::Box; |
| 4 | use alloc::string::String; |
| 5 | use alloc::vec::Vec; |
| 6 | use core::iter; |
| 7 | |
| 8 | use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused}; |
| 9 | use crate::internal_macros::bug; |
| 10 | |
| 11 | /// One part of a complete format description. |
| 12 | pub(super) enum Item<'a> { |
| 13 | /// A literal string, formatted and parsed as-is. |
| 14 | /// |
| 15 | /// This should never be present inside a nested format description. |
| 16 | Literal(Spanned<&'a [u8]>), |
| 17 | /// A sequence of brackets. The first acts as the escape character. |
| 18 | /// |
| 19 | /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`. |
| 20 | EscapedBracket { |
| 21 | /// The first bracket. |
| 22 | _first: Unused<Location>, |
| 23 | /// The second bracket. |
| 24 | _second: Unused<Location>, |
| 25 | }, |
| 26 | /// Part of a type, along with its modifiers. |
| 27 | Component { |
| 28 | /// Where the opening bracket was in the format string. |
| 29 | _opening_bracket: Unused<Location>, |
| 30 | /// Whitespace between the opening bracket and name. |
| 31 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
| 32 | /// The name of the component. |
| 33 | name: Spanned<&'a [u8]>, |
| 34 | /// The modifiers for the component. |
| 35 | modifiers: Box<[Modifier<'a>]>, |
| 36 | /// Whitespace between the modifiers and closing bracket. |
| 37 | _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
| 38 | /// Where the closing bracket was in the format string. |
| 39 | _closing_bracket: Unused<Location>, |
| 40 | }, |
| 41 | /// An optional sequence of items. |
| 42 | Optional { |
| 43 | /// Where the opening bracket was in the format string. |
| 44 | opening_bracket: Location, |
| 45 | /// Whitespace between the opening bracket and "optional". |
| 46 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
| 47 | /// The "optional" keyword. |
| 48 | _optional_kw: Unused<Spanned<&'a [u8]>>, |
| 49 | /// Whitespace between the "optional" keyword and the opening bracket. |
| 50 | _whitespace: Unused<Spanned<&'a [u8]>>, |
| 51 | /// The items within the optional sequence. |
| 52 | nested_format_description: NestedFormatDescription<'a>, |
| 53 | /// Where the closing bracket was in the format string. |
| 54 | closing_bracket: Location, |
| 55 | }, |
| 56 | /// The first matching parse of a sequence of items. |
| 57 | First { |
| 58 | /// Where the opening bracket was in the format string. |
| 59 | opening_bracket: Location, |
| 60 | /// Whitespace between the opening bracket and "first". |
| 61 | _leading_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
| 62 | /// The "first" keyword. |
| 63 | _first_kw: Unused<Spanned<&'a [u8]>>, |
| 64 | /// Whitespace between the "first" keyword and the opening bracket. |
| 65 | _whitespace: Unused<Spanned<&'a [u8]>>, |
| 66 | /// The sequences of items to try. |
| 67 | nested_format_descriptions: Box<[NestedFormatDescription<'a>]>, |
| 68 | /// Where the closing bracket was in the format string. |
| 69 | closing_bracket: Location, |
| 70 | }, |
| 71 | } |
| 72 | |
| 73 | /// A format description that is nested within another format description. |
| 74 | pub(super) struct NestedFormatDescription<'a> { |
| 75 | /// Where the opening bracket was in the format string. |
| 76 | pub(super) _opening_bracket: Unused<Location>, |
| 77 | /// The items within the nested format description. |
| 78 | pub(super) items: Box<[Item<'a>]>, |
| 79 | /// Where the closing bracket was in the format string. |
| 80 | pub(super) _closing_bracket: Unused<Location>, |
| 81 | /// Whitespace between the closing bracket and the next item. |
| 82 | pub(super) _trailing_whitespace: Unused<Option<Spanned<&'a [u8]>>>, |
| 83 | } |
| 84 | |
| 85 | /// A modifier for a component. |
| 86 | pub(super) struct Modifier<'a> { |
| 87 | /// Whitespace preceding the modifier. |
| 88 | pub(super) _leading_whitespace: Unused<Spanned<&'a [u8]>>, |
| 89 | /// The key of the modifier. |
| 90 | pub(super) key: Spanned<&'a [u8]>, |
| 91 | /// Where the colon of the modifier was in the format string. |
| 92 | pub(super) _colon: Unused<Location>, |
| 93 | /// The value of the modifier. |
| 94 | pub(super) value: Spanned<&'a [u8]>, |
| 95 | } |
| 96 | |
| 97 | /// Parse the provided tokens into an AST. |
| 98 | pub(super) fn parse< |
| 99 | 'item: 'iter, |
| 100 | 'iter, |
| 101 | I: Iterator<Item = Result<lexer::Token<'item>, Error>>, |
| 102 | const VERSION: usize, |
| 103 | >( |
| 104 | tokens: &'iter mut lexer::Lexed<I>, |
| 105 | ) -> impl Iterator<Item = Result<Item<'item>, Error>> + 'iter { |
| 106 | validate_version!(VERSION); |
| 107 | parse_inner::<_, false, VERSION>(tokens) |
| 108 | } |
| 109 | |
| 110 | /// Parse the provided tokens into an AST. The const generic indicates whether the resulting |
| 111 | /// [`Item`] will be used directly or as part of a [`NestedFormatDescription`]. |
| 112 | fn parse_inner< |
| 113 | 'item, |
| 114 | I: Iterator<Item = Result<lexer::Token<'item>, Error>>, |
| 115 | const NESTED: bool, |
| 116 | const VERSION: usize, |
| 117 | >( |
| 118 | tokens: &mut lexer::Lexed<I>, |
| 119 | ) -> impl Iterator<Item = Result<Item<'item>, Error>> + '_ { |
| 120 | validate_version!(VERSION); |
| 121 | iter::from_fn(move || { |
| 122 | if NESTED && tokens.peek_closing_bracket().is_some() { |
| 123 | return None; |
| 124 | } |
| 125 | |
| 126 | let next = match tokens.next()? { |
| 127 | Ok(token) => token, |
| 128 | Err(err) => return Some(Err(err)), |
| 129 | }; |
| 130 | |
| 131 | Some(match next { |
| 132 | lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => { |
| 133 | bug!("literal should not be present in nested description" ) |
| 134 | } |
| 135 | lexer::Token::Literal(value) => Ok(Item::Literal(value)), |
| 136 | lexer::Token::Bracket { |
| 137 | kind: lexer::BracketKind::Opening, |
| 138 | location, |
| 139 | } => { |
| 140 | if version!(..=1) { |
| 141 | if let Some(second_location) = tokens.next_if_opening_bracket() { |
| 142 | Ok(Item::EscapedBracket { |
| 143 | _first: unused(location), |
| 144 | _second: unused(second_location), |
| 145 | }) |
| 146 | } else { |
| 147 | parse_component::<_, VERSION>(location, tokens) |
| 148 | } |
| 149 | } else { |
| 150 | parse_component::<_, VERSION>(location, tokens) |
| 151 | } |
| 152 | } |
| 153 | lexer::Token::Bracket { |
| 154 | kind: lexer::BracketKind::Closing, |
| 155 | location: _, |
| 156 | } if NESTED => { |
| 157 | bug!("closing bracket should be caught by the `if` statement" ) |
| 158 | } |
| 159 | lexer::Token::Bracket { |
| 160 | kind: lexer::BracketKind::Closing, |
| 161 | location: _, |
| 162 | } => { |
| 163 | bug!("closing bracket should have been consumed by `parse_component`" ) |
| 164 | } |
| 165 | lexer::Token::ComponentPart { |
| 166 | kind: _, // whitespace is significant in nested components |
| 167 | value, |
| 168 | } if NESTED => Ok(Item::Literal(value)), |
| 169 | lexer::Token::ComponentPart { kind: _, value: _ } => { |
| 170 | bug!("component part should have been consumed by `parse_component`" ) |
| 171 | } |
| 172 | }) |
| 173 | }) |
| 174 | } |
| 175 | |
| 176 | /// Parse a component. This assumes that the opening bracket has already been consumed. |
| 177 | fn parse_component< |
| 178 | 'a, |
| 179 | I: Iterator<Item = Result<lexer::Token<'a>, Error>>, |
| 180 | const VERSION: usize, |
| 181 | >( |
| 182 | opening_bracket: Location, |
| 183 | tokens: &mut lexer::Lexed<I>, |
| 184 | ) -> Result<Item<'a>, Error> { |
| 185 | validate_version!(VERSION); |
| 186 | let leading_whitespace = tokens.next_if_whitespace(); |
| 187 | |
| 188 | let Some(name) = tokens.next_if_not_whitespace() else { |
| 189 | let span = match leading_whitespace { |
| 190 | Some(Spanned { value: _, span }) => span, |
| 191 | None => opening_bracket.to_self(), |
| 192 | }; |
| 193 | return Err(Error { |
| 194 | _inner: unused(span.error("expected component name" )), |
| 195 | public: crate::error::InvalidFormatDescription::MissingComponentName { |
| 196 | index: span.start.byte as _, |
| 197 | }, |
| 198 | }); |
| 199 | }; |
| 200 | |
| 201 | if *name == b"optional" { |
| 202 | let Some(whitespace) = tokens.next_if_whitespace() else { |
| 203 | return Err(Error { |
| 204 | _inner: unused(name.span.error("expected whitespace after `optional`" )), |
| 205 | public: crate::error::InvalidFormatDescription::Expected { |
| 206 | what: "whitespace after `optional`" , |
| 207 | index: name.span.end.byte as _, |
| 208 | }, |
| 209 | }); |
| 210 | }; |
| 211 | |
| 212 | let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?; |
| 213 | |
| 214 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
| 215 | return Err(Error { |
| 216 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
| 217 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
| 218 | index: opening_bracket.byte as _, |
| 219 | }, |
| 220 | }); |
| 221 | }; |
| 222 | |
| 223 | return Ok(Item::Optional { |
| 224 | opening_bracket, |
| 225 | _leading_whitespace: unused(leading_whitespace), |
| 226 | _optional_kw: unused(name), |
| 227 | _whitespace: unused(whitespace), |
| 228 | nested_format_description: nested, |
| 229 | closing_bracket, |
| 230 | }); |
| 231 | } |
| 232 | |
| 233 | if *name == b"first" { |
| 234 | let Some(whitespace) = tokens.next_if_whitespace() else { |
| 235 | return Err(Error { |
| 236 | _inner: unused(name.span.error("expected whitespace after `first`" )), |
| 237 | public: crate::error::InvalidFormatDescription::Expected { |
| 238 | what: "whitespace after `first`" , |
| 239 | index: name.span.end.byte as _, |
| 240 | }, |
| 241 | }); |
| 242 | }; |
| 243 | |
| 244 | let mut nested_format_descriptions = Vec::new(); |
| 245 | while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) { |
| 246 | nested_format_descriptions.push(description); |
| 247 | } |
| 248 | |
| 249 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
| 250 | return Err(Error { |
| 251 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
| 252 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
| 253 | index: opening_bracket.byte as _, |
| 254 | }, |
| 255 | }); |
| 256 | }; |
| 257 | |
| 258 | return Ok(Item::First { |
| 259 | opening_bracket, |
| 260 | _leading_whitespace: unused(leading_whitespace), |
| 261 | _first_kw: unused(name), |
| 262 | _whitespace: unused(whitespace), |
| 263 | nested_format_descriptions: nested_format_descriptions.into_boxed_slice(), |
| 264 | closing_bracket, |
| 265 | }); |
| 266 | } |
| 267 | |
| 268 | let mut modifiers = Vec::new(); |
| 269 | let trailing_whitespace = loop { |
| 270 | let Some(whitespace) = tokens.next_if_whitespace() else { |
| 271 | break None; |
| 272 | }; |
| 273 | |
| 274 | // This is not necessary for proper parsing, but provides a much better error when a nested |
| 275 | // description is used where it's not allowed. |
| 276 | if let Some(location) = tokens.next_if_opening_bracket() { |
| 277 | return Err(Error { |
| 278 | _inner: unused( |
| 279 | location |
| 280 | .to_self() |
| 281 | .error("modifier must be of the form `key:value`" ), |
| 282 | ), |
| 283 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 284 | value: String::from("[" ), |
| 285 | index: location.byte as _, |
| 286 | }, |
| 287 | }); |
| 288 | } |
| 289 | |
| 290 | let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else { |
| 291 | break Some(whitespace); |
| 292 | }; |
| 293 | |
| 294 | let Some(colon_index) = value.iter().position(|&b| b == b':' ) else { |
| 295 | return Err(Error { |
| 296 | _inner: unused(span.error("modifier must be of the form `key:value`" )), |
| 297 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 298 | value: String::from_utf8_lossy(value).into_owned(), |
| 299 | index: span.start.byte as _, |
| 300 | }, |
| 301 | }); |
| 302 | }; |
| 303 | let key = &value[..colon_index]; |
| 304 | let value = &value[colon_index + 1..]; |
| 305 | |
| 306 | if key.is_empty() { |
| 307 | return Err(Error { |
| 308 | _inner: unused(span.shrink_to_start().error("expected modifier key" )), |
| 309 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 310 | value: String::new(), |
| 311 | index: span.start.byte as _, |
| 312 | }, |
| 313 | }); |
| 314 | } |
| 315 | if value.is_empty() { |
| 316 | return Err(Error { |
| 317 | _inner: unused(span.shrink_to_end().error("expected modifier value" )), |
| 318 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 319 | value: String::new(), |
| 320 | index: span.shrink_to_end().start.byte as _, |
| 321 | }, |
| 322 | }); |
| 323 | } |
| 324 | |
| 325 | modifiers.push(Modifier { |
| 326 | _leading_whitespace: unused(whitespace), |
| 327 | key: key.spanned(span.shrink_to_before(colon_index as _)), |
| 328 | _colon: unused(span.start.offset(colon_index as _)), |
| 329 | value: value.spanned(span.shrink_to_after(colon_index as _)), |
| 330 | }); |
| 331 | }; |
| 332 | |
| 333 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
| 334 | return Err(Error { |
| 335 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
| 336 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
| 337 | index: opening_bracket.byte as _, |
| 338 | }, |
| 339 | }); |
| 340 | }; |
| 341 | |
| 342 | Ok(Item::Component { |
| 343 | _opening_bracket: unused(opening_bracket), |
| 344 | _leading_whitespace: unused(leading_whitespace), |
| 345 | name, |
| 346 | modifiers: modifiers.into_boxed_slice(), |
| 347 | _trailing_whitespace: unused(trailing_whitespace), |
| 348 | _closing_bracket: unused(closing_bracket), |
| 349 | }) |
| 350 | } |
| 351 | |
| 352 | /// Parse a nested format description. The location provided is the the most recent one consumed. |
| 353 | fn parse_nested<'a, I: Iterator<Item = Result<lexer::Token<'a>, Error>>, const VERSION: usize>( |
| 354 | last_location: Location, |
| 355 | tokens: &mut lexer::Lexed<I>, |
| 356 | ) -> Result<NestedFormatDescription<'a>, Error> { |
| 357 | validate_version!(VERSION); |
| 358 | let Some(opening_bracket) = tokens.next_if_opening_bracket() else { |
| 359 | return Err(Error { |
| 360 | _inner: unused(last_location.error("expected opening bracket" )), |
| 361 | public: crate::error::InvalidFormatDescription::Expected { |
| 362 | what: "opening bracket" , |
| 363 | index: last_location.byte as _, |
| 364 | }, |
| 365 | }); |
| 366 | }; |
| 367 | let items = parse_inner::<_, true, VERSION>(tokens).collect::<Result<_, _>>()?; |
| 368 | let Some(closing_bracket) = tokens.next_if_closing_bracket() else { |
| 369 | return Err(Error { |
| 370 | _inner: unused(opening_bracket.error("unclosed bracket" )), |
| 371 | public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { |
| 372 | index: opening_bracket.byte as _, |
| 373 | }, |
| 374 | }); |
| 375 | }; |
| 376 | let trailing_whitespace = tokens.next_if_whitespace(); |
| 377 | |
| 378 | Ok(NestedFormatDescription { |
| 379 | _opening_bracket: unused(opening_bracket), |
| 380 | items, |
| 381 | _closing_bracket: unused(closing_bracket), |
| 382 | _trailing_whitespace: unused(trailing_whitespace), |
| 383 | }) |
| 384 | } |
| 385 | |