| 1 | //! Typed, validated representation of a parsed format description. |
| 2 | |
| 3 | use alloc::boxed::Box; |
| 4 | use alloc::string::String; |
| 5 | use core::num::NonZeroU16; |
| 6 | use core::str::{self, FromStr}; |
| 7 | |
| 8 | use super::{ast, unused, Error, Span, Spanned}; |
| 9 | use crate::internal_macros::bug; |
| 10 | |
| 11 | /// Parse an AST iterator into a sequence of format items. |
| 12 | pub(super) fn parse<'a>( |
| 13 | ast_items: impl Iterator<Item = Result<ast::Item<'a>, Error>>, |
| 14 | ) -> impl Iterator<Item = Result<Item<'a>, Error>> { |
| 15 | ast_items.map(|ast_item: Result- , Error>
| ast_item.and_then(op:Item::from_ast)) |
| 16 | } |
| 17 | |
| 18 | /// A description of how to format and parse one part of a type. |
| 19 | pub(super) enum Item<'a> { |
| 20 | /// A literal string. |
| 21 | Literal(&'a [u8]), |
| 22 | /// Part of a type, along with its modifiers. |
| 23 | Component(Component), |
| 24 | /// A sequence of optional items. |
| 25 | Optional { |
| 26 | /// The items themselves. |
| 27 | value: Box<[Self]>, |
| 28 | /// The span of the full sequence. |
| 29 | span: Span, |
| 30 | }, |
| 31 | /// The first matching parse of a sequence of format descriptions. |
| 32 | First { |
| 33 | /// The sequence of format descriptions. |
| 34 | value: Box<[Box<[Self]>]>, |
| 35 | /// The span of the full sequence. |
| 36 | span: Span, |
| 37 | }, |
| 38 | } |
| 39 | |
| 40 | impl Item<'_> { |
| 41 | /// Parse an AST item into a format item. |
| 42 | pub(super) fn from_ast(ast_item: ast::Item<'_>) -> Result<Item<'_>, Error> { |
| 43 | Ok(match ast_item { |
| 44 | ast::Item::Component { |
| 45 | _opening_bracket: _, |
| 46 | _leading_whitespace: _, |
| 47 | name, |
| 48 | modifiers, |
| 49 | _trailing_whitespace: _, |
| 50 | _closing_bracket: _, |
| 51 | } => Item::Component(component_from_ast(&name, &modifiers)?), |
| 52 | ast::Item::Literal(Spanned { value, span: _ }) => Item::Literal(value), |
| 53 | ast::Item::EscapedBracket { |
| 54 | _first: _, |
| 55 | _second: _, |
| 56 | } => Item::Literal(b"[" ), |
| 57 | ast::Item::Optional { |
| 58 | opening_bracket, |
| 59 | _leading_whitespace: _, |
| 60 | _optional_kw: _, |
| 61 | _whitespace: _, |
| 62 | nested_format_description, |
| 63 | closing_bracket, |
| 64 | } => { |
| 65 | let items = nested_format_description |
| 66 | .items |
| 67 | .into_vec() |
| 68 | .into_iter() |
| 69 | .map(Item::from_ast) |
| 70 | .collect::<Result<_, _>>()?; |
| 71 | Item::Optional { |
| 72 | value: items, |
| 73 | span: opening_bracket.to(closing_bracket), |
| 74 | } |
| 75 | } |
| 76 | ast::Item::First { |
| 77 | opening_bracket, |
| 78 | _leading_whitespace: _, |
| 79 | _first_kw: _, |
| 80 | _whitespace: _, |
| 81 | nested_format_descriptions, |
| 82 | closing_bracket, |
| 83 | } => { |
| 84 | let items = nested_format_descriptions |
| 85 | .into_vec() |
| 86 | .into_iter() |
| 87 | .map(|nested_format_description| { |
| 88 | nested_format_description |
| 89 | .items |
| 90 | .into_vec() |
| 91 | .into_iter() |
| 92 | .map(Item::from_ast) |
| 93 | .collect() |
| 94 | }) |
| 95 | .collect::<Result<_, _>>()?; |
| 96 | Item::First { |
| 97 | value: items, |
| 98 | span: opening_bracket.to(closing_bracket), |
| 99 | } |
| 100 | } |
| 101 | }) |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | impl<'a> TryFrom<Item<'a>> for crate::format_description::BorrowedFormatItem<'a> { |
| 106 | type Error = Error; |
| 107 | |
| 108 | fn try_from(item: Item<'a>) -> Result<Self, Self::Error> { |
| 109 | match item { |
| 110 | Item::Literal(literal) => Ok(Self::Literal(literal)), |
| 111 | Item::Component(component) => Ok(Self::Component(component.into())), |
| 112 | Item::Optional { value: _, span } => Err(Error { |
| 113 | _inner: unused(span.error( |
| 114 | "optional items are not supported in runtime-parsed format descriptions" , |
| 115 | )), |
| 116 | public: crate::error::InvalidFormatDescription::NotSupported { |
| 117 | what: "optional item" , |
| 118 | context: "runtime-parsed format descriptions" , |
| 119 | index: span.start.byte as _, |
| 120 | }, |
| 121 | }), |
| 122 | Item::First { value: _, span } => Err(Error { |
| 123 | _inner: unused(span.error( |
| 124 | "'first' items are not supported in runtime-parsed format descriptions" , |
| 125 | )), |
| 126 | public: crate::error::InvalidFormatDescription::NotSupported { |
| 127 | what: "'first' item" , |
| 128 | context: "runtime-parsed format descriptions" , |
| 129 | index: span.start.byte as _, |
| 130 | }, |
| 131 | }), |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | impl From<Item<'_>> for crate::format_description::OwnedFormatItem { |
| 137 | fn from(item: Item<'_>) -> Self { |
| 138 | match item { |
| 139 | Item::Literal(literal: &[u8]) => Self::Literal(literal.to_vec().into_boxed_slice()), |
| 140 | Item::Component(component: Component) => Self::Component(component.into()), |
| 141 | Item::Optional { value: Box<[Item<'_>]>, span: _ } => Self::Optional(Box::new(value.into())), |
| 142 | Item::First { value: Box<[Box<[Item<'_>]>]>, span: _ } => { |
| 143 | Self::First(value.into_vec().into_iter().map(Into::into).collect()) |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | impl<'a> From<Box<[Item<'a>]>> for crate::format_description::OwnedFormatItem { |
| 150 | fn from(items: Box<[Item<'a>]>) -> Self { |
| 151 | let items: Vec- >
= items.into_vec(); |
| 152 | match <[_; 1]>::try_from(items) { |
| 153 | Ok([item: Item<'a>]) => item.into(), |
| 154 | Err(vec: Vec- >
) => Self::Compound(vec.into_iter().map(Into::into).collect()), |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /// Declare the `Component` struct. |
| 160 | macro_rules! component_definition { |
| 161 | (@if_required required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
| 162 | (@if_required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
| 163 | (@if_from_str from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
| 164 | (@if_from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
| 165 | |
| 166 | ($vis:vis enum $name:ident { |
| 167 | $($variant:ident = $parse_variant:literal {$( |
| 168 | $(#[$required:tt])? |
| 169 | $field:ident = $parse_field:literal: |
| 170 | Option<$(#[$from_str:tt])? $field_type:ty> |
| 171 | => $target_field:ident |
| 172 | ),* $(,)?}),* $(,)? |
| 173 | }) => { |
| 174 | $vis enum $name { |
| 175 | $($variant($variant),)* |
| 176 | } |
| 177 | |
| 178 | $($vis struct $variant { |
| 179 | $($field: Option<$field_type>),* |
| 180 | })* |
| 181 | |
| 182 | $(impl $variant { |
| 183 | /// Parse the component from the AST, given its modifiers. |
| 184 | fn with_modifiers( |
| 185 | modifiers: &[ast::Modifier<'_>], |
| 186 | _component_span: Span, |
| 187 | ) -> Result<Self, Error> |
| 188 | { |
| 189 | // rustc will complain if the modifier is empty. |
| 190 | #[allow(unused_mut)] |
| 191 | let mut this = Self { |
| 192 | $($field: None),* |
| 193 | }; |
| 194 | |
| 195 | for modifier in modifiers { |
| 196 | $(#[allow(clippy::string_lit_as_bytes)] |
| 197 | if modifier.key.eq_ignore_ascii_case($parse_field.as_bytes()) { |
| 198 | this.$field = component_definition!(@if_from_str $($from_str)? |
| 199 | then { |
| 200 | parse_from_modifier_value::<$field_type>(&modifier.value)? |
| 201 | } else { |
| 202 | <$field_type>::from_modifier_value(&modifier.value)? |
| 203 | }); |
| 204 | continue; |
| 205 | })* |
| 206 | return Err(Error { |
| 207 | _inner: unused(modifier.key.span.error("invalid modifier key" )), |
| 208 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 209 | value: String::from_utf8_lossy(*modifier.key).into_owned(), |
| 210 | index: modifier.key.span.start.byte as _, |
| 211 | } |
| 212 | }); |
| 213 | } |
| 214 | |
| 215 | $(component_definition! { @if_required $($required)? then { |
| 216 | if this.$field.is_none() { |
| 217 | return Err(Error { |
| 218 | _inner: unused(_component_span.error("missing required modifier" )), |
| 219 | public: |
| 220 | crate::error::InvalidFormatDescription::MissingRequiredModifier { |
| 221 | name: $parse_field, |
| 222 | index: _component_span.start.byte as _, |
| 223 | } |
| 224 | }); |
| 225 | } |
| 226 | }})* |
| 227 | |
| 228 | Ok(this) |
| 229 | } |
| 230 | })* |
| 231 | |
| 232 | impl From<$name> for crate::format_description::Component { |
| 233 | fn from(component: $name) -> Self { |
| 234 | match component {$( |
| 235 | $name::$variant($variant { $($field),* }) => { |
| 236 | $crate::format_description::component::Component::$variant( |
| 237 | $crate::format_description::modifier::$variant {$( |
| 238 | $target_field: component_definition! { @if_required $($required)? |
| 239 | then { |
| 240 | match $field { |
| 241 | Some(value) => value.into(), |
| 242 | None => bug!("required modifier was not set" ), |
| 243 | } |
| 244 | } else { |
| 245 | $field.unwrap_or_default().into() |
| 246 | } |
| 247 | } |
| 248 | ),*} |
| 249 | ) |
| 250 | } |
| 251 | )*} |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | /// Parse a component from the AST, given its name and modifiers. |
| 256 | fn component_from_ast( |
| 257 | name: &Spanned<&[u8]>, |
| 258 | modifiers: &[ast::Modifier<'_>], |
| 259 | ) -> Result<Component, Error> { |
| 260 | $(#[allow(clippy::string_lit_as_bytes)] |
| 261 | if name.eq_ignore_ascii_case($parse_variant.as_bytes()) { |
| 262 | return Ok(Component::$variant($variant::with_modifiers(&modifiers, name.span)?)); |
| 263 | })* |
| 264 | Err(Error { |
| 265 | _inner: unused(name.span.error("invalid component" )), |
| 266 | public: crate::error::InvalidFormatDescription::InvalidComponentName { |
| 267 | name: String::from_utf8_lossy(name).into_owned(), |
| 268 | index: name.span.start.byte as _, |
| 269 | }, |
| 270 | }) |
| 271 | } |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // Keep in alphabetical order. |
| 276 | component_definition! { |
| 277 | pub(super) enum Component { |
| 278 | Day = "day" { |
| 279 | padding = "padding" : Option<Padding> => padding, |
| 280 | }, |
| 281 | End = "end" {}, |
| 282 | Hour = "hour" { |
| 283 | padding = "padding" : Option<Padding> => padding, |
| 284 | base = "repr" : Option<HourBase> => is_12_hour_clock, |
| 285 | }, |
| 286 | Ignore = "ignore" { |
| 287 | #[required] |
| 288 | count = "count" : Option<#[from_str] NonZeroU16> => count, |
| 289 | }, |
| 290 | Minute = "minute" { |
| 291 | padding = "padding" : Option<Padding> => padding, |
| 292 | }, |
| 293 | Month = "month" { |
| 294 | padding = "padding" : Option<Padding> => padding, |
| 295 | repr = "repr" : Option<MonthRepr> => repr, |
| 296 | case_sensitive = "case_sensitive" : Option<MonthCaseSensitive> => case_sensitive, |
| 297 | }, |
| 298 | OffsetHour = "offset_hour" { |
| 299 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
| 300 | padding = "padding" : Option<Padding> => padding, |
| 301 | }, |
| 302 | OffsetMinute = "offset_minute" { |
| 303 | padding = "padding" : Option<Padding> => padding, |
| 304 | }, |
| 305 | OffsetSecond = "offset_second" { |
| 306 | padding = "padding" : Option<Padding> => padding, |
| 307 | }, |
| 308 | Ordinal = "ordinal" { |
| 309 | padding = "padding" : Option<Padding> => padding, |
| 310 | }, |
| 311 | Period = "period" { |
| 312 | case = "case" : Option<PeriodCase> => is_uppercase, |
| 313 | case_sensitive = "case_sensitive" : Option<PeriodCaseSensitive> => case_sensitive, |
| 314 | }, |
| 315 | Second = "second" { |
| 316 | padding = "padding" : Option<Padding> => padding, |
| 317 | }, |
| 318 | Subsecond = "subsecond" { |
| 319 | digits = "digits" : Option<SubsecondDigits> => digits, |
| 320 | }, |
| 321 | UnixTimestamp = "unix_timestamp" { |
| 322 | precision = "precision" : Option<UnixTimestampPrecision> => precision, |
| 323 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
| 324 | }, |
| 325 | Weekday = "weekday" { |
| 326 | repr = "repr" : Option<WeekdayRepr> => repr, |
| 327 | one_indexed = "one_indexed" : Option<WeekdayOneIndexed> => one_indexed, |
| 328 | case_sensitive = "case_sensitive" : Option<WeekdayCaseSensitive> => case_sensitive, |
| 329 | }, |
| 330 | WeekNumber = "week_number" { |
| 331 | padding = "padding" : Option<Padding> => padding, |
| 332 | repr = "repr" : Option<WeekNumberRepr> => repr, |
| 333 | }, |
| 334 | Year = "year" { |
| 335 | padding = "padding" : Option<Padding> => padding, |
| 336 | repr = "repr" : Option<YearRepr> => repr, |
| 337 | base = "base" : Option<YearBase> => iso_week_based, |
| 338 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
| 339 | }, |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | /// Get the target type for a given enum. |
| 344 | macro_rules! target_ty { |
| 345 | ($name:ident $type:ty) => { |
| 346 | $type |
| 347 | }; |
| 348 | ($name:ident) => { |
| 349 | $crate::format_description::modifier::$name |
| 350 | }; |
| 351 | } |
| 352 | |
| 353 | /// Get the target value for a given enum. |
| 354 | macro_rules! target_value { |
| 355 | ($name:ident $variant:ident $value:expr) => { |
| 356 | $value |
| 357 | }; |
| 358 | ($name:ident $variant:ident) => { |
| 359 | $crate::format_description::modifier::$name::$variant |
| 360 | }; |
| 361 | } |
| 362 | |
| 363 | /// Declare the various modifiers. |
| 364 | /// |
| 365 | /// For the general case, ordinary syntax can be used. Note that you _must_ declare a default |
| 366 | /// variant. The only significant change is that the string representation of the variant must be |
| 367 | /// provided after the variant name. For example, `Numerical = b"numerical"` declares a variant |
| 368 | /// named `Numerical` with the string representation `b"numerical"`. This is the value that will be |
| 369 | /// used when parsing the modifier. The value is not case sensitive. |
| 370 | /// |
| 371 | /// If the type in the public API does not have the same name as the type in the internal |
| 372 | /// representation, then the former must be specified in parenthesis after the internal name. For |
| 373 | /// example, `HourBase(bool)` has an internal name "HourBase", but is represented as a boolean in |
| 374 | /// the public API. |
| 375 | /// |
| 376 | /// By default, the internal variant name is assumed to be the same as the public variant name. If |
| 377 | /// this is not the case, the qualified path to the variant must be specified in parenthesis after |
| 378 | /// the internal variant name. For example, `Twelve(true)` has an internal variant name "Twelve", |
| 379 | /// but is represented as `true` in the public API. |
| 380 | macro_rules! modifier { |
| 381 | ($( |
| 382 | enum $name:ident $(($target_ty:ty))? { |
| 383 | $( |
| 384 | $(#[$attr:meta])? |
| 385 | $variant:ident $(($target_value:expr))? = $parse_variant:literal |
| 386 | ),* $(,)? |
| 387 | } |
| 388 | )+) => {$( |
| 389 | #[derive(Default)] |
| 390 | enum $name { |
| 391 | $($(#[$attr])? $variant),* |
| 392 | } |
| 393 | |
| 394 | impl $name { |
| 395 | /// Parse the modifier from its string representation. |
| 396 | fn from_modifier_value(value: &Spanned<&[u8]>) -> Result<Option<Self>, Error> { |
| 397 | $(if value.eq_ignore_ascii_case($parse_variant) { |
| 398 | return Ok(Some(Self::$variant)); |
| 399 | })* |
| 400 | Err(Error { |
| 401 | _inner: unused(value.span.error("invalid modifier value" )), |
| 402 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 403 | value: String::from_utf8_lossy(value).into_owned(), |
| 404 | index: value.span.start.byte as _, |
| 405 | }, |
| 406 | }) |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | impl From<$name> for target_ty!($name $($target_ty)?) { |
| 411 | fn from(modifier: $name) -> Self { |
| 412 | match modifier { |
| 413 | $($name::$variant => target_value!($name $variant $($target_value)?)),* |
| 414 | } |
| 415 | } |
| 416 | } |
| 417 | )+}; |
| 418 | } |
| 419 | |
| 420 | // Keep in alphabetical order. |
| 421 | modifier! { |
| 422 | enum HourBase(bool) { |
| 423 | Twelve(true) = b"12" , |
| 424 | #[default] |
| 425 | TwentyFour(false) = b"24" , |
| 426 | } |
| 427 | |
| 428 | enum MonthCaseSensitive(bool) { |
| 429 | False(false) = b"false" , |
| 430 | #[default] |
| 431 | True(true) = b"true" , |
| 432 | } |
| 433 | |
| 434 | enum MonthRepr { |
| 435 | #[default] |
| 436 | Numerical = b"numerical" , |
| 437 | Long = b"long" , |
| 438 | Short = b"short" , |
| 439 | } |
| 440 | |
| 441 | enum Padding { |
| 442 | Space = b"space" , |
| 443 | #[default] |
| 444 | Zero = b"zero" , |
| 445 | None = b"none" , |
| 446 | } |
| 447 | |
| 448 | enum PeriodCase(bool) { |
| 449 | Lower(false) = b"lower" , |
| 450 | #[default] |
| 451 | Upper(true) = b"upper" , |
| 452 | } |
| 453 | |
| 454 | enum PeriodCaseSensitive(bool) { |
| 455 | False(false) = b"false" , |
| 456 | #[default] |
| 457 | True(true) = b"true" , |
| 458 | } |
| 459 | |
| 460 | enum SignBehavior(bool) { |
| 461 | #[default] |
| 462 | Automatic(false) = b"automatic" , |
| 463 | Mandatory(true) = b"mandatory" , |
| 464 | } |
| 465 | |
| 466 | enum SubsecondDigits { |
| 467 | One = b"1" , |
| 468 | Two = b"2" , |
| 469 | Three = b"3" , |
| 470 | Four = b"4" , |
| 471 | Five = b"5" , |
| 472 | Six = b"6" , |
| 473 | Seven = b"7" , |
| 474 | Eight = b"8" , |
| 475 | Nine = b"9" , |
| 476 | #[default] |
| 477 | OneOrMore = b"1+" , |
| 478 | } |
| 479 | |
| 480 | enum UnixTimestampPrecision { |
| 481 | #[default] |
| 482 | Second = b"second" , |
| 483 | Millisecond = b"millisecond" , |
| 484 | Microsecond = b"microsecond" , |
| 485 | Nanosecond = b"nanosecond" , |
| 486 | } |
| 487 | |
| 488 | enum WeekNumberRepr { |
| 489 | #[default] |
| 490 | Iso = b"iso" , |
| 491 | Sunday = b"sunday" , |
| 492 | Monday = b"monday" , |
| 493 | } |
| 494 | |
| 495 | enum WeekdayCaseSensitive(bool) { |
| 496 | False(false) = b"false" , |
| 497 | #[default] |
| 498 | True(true) = b"true" , |
| 499 | } |
| 500 | |
| 501 | enum WeekdayOneIndexed(bool) { |
| 502 | False(false) = b"false" , |
| 503 | #[default] |
| 504 | True(true) = b"true" , |
| 505 | } |
| 506 | |
| 507 | enum WeekdayRepr { |
| 508 | Short = b"short" , |
| 509 | #[default] |
| 510 | Long = b"long" , |
| 511 | Sunday = b"sunday" , |
| 512 | Monday = b"monday" , |
| 513 | } |
| 514 | |
| 515 | enum YearBase(bool) { |
| 516 | #[default] |
| 517 | Calendar(false) = b"calendar" , |
| 518 | IsoWeek(true) = b"iso_week" , |
| 519 | } |
| 520 | |
| 521 | enum YearRepr { |
| 522 | #[default] |
| 523 | Full = b"full" , |
| 524 | Century = b"century" , |
| 525 | LastTwo = b"last_two" , |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | /// Parse a modifier value using `FromStr`. Requires the modifier value to be valid UTF-8. |
| 530 | fn parse_from_modifier_value<T: FromStr>(value: &Spanned<&[u8]>) -> Result<Option<T>, Error> { |
| 531 | str::from_utf8(value) |
| 532 | .ok() |
| 533 | .and_then(|val| val.parse::<T>().ok()) |
| 534 | .map(|val| Some(val)) |
| 535 | .ok_or_else(|| Error { |
| 536 | _inner: unused(value.span.error(message:"invalid modifier value" )), |
| 537 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
| 538 | value: String::from_utf8_lossy(value).into_owned(), |
| 539 | index: value.span.start.byte as _, |
| 540 | }, |
| 541 | }) |
| 542 | } |
| 543 | |