1 | //! Typed, validated representation of a parsed format description. |
2 | |
3 | use alloc::boxed::Box; |
4 | use alloc::string::String; |
5 | use core::num::NonZeroU16; |
6 | use core::str::{self, FromStr}; |
7 | |
8 | use super::{ast, unused, Error, Span, Spanned}; |
9 | |
10 | /// Parse an AST iterator into a sequence of format items. |
11 | pub(super) fn parse<'a>( |
12 | ast_items: impl Iterator<Item = Result<ast::Item<'a>, Error>>, |
13 | ) -> impl Iterator<Item = Result<Item<'a>, Error>> { |
14 | ast_items.map(|ast_item: Result- , Error>
| ast_item.and_then(op:Item::from_ast)) |
15 | } |
16 | |
17 | /// A description of how to format and parse one part of a type. |
18 | pub(super) enum Item<'a> { |
19 | /// A literal string. |
20 | Literal(&'a [u8]), |
21 | /// Part of a type, along with its modifiers. |
22 | Component(Component), |
23 | /// A sequence of optional items. |
24 | Optional { |
25 | /// The items themselves. |
26 | value: Box<[Self]>, |
27 | /// The span of the full sequence. |
28 | span: Span, |
29 | }, |
30 | /// The first matching parse of a sequence of format descriptions. |
31 | First { |
32 | /// The sequence of format descriptions. |
33 | value: Box<[Box<[Self]>]>, |
34 | /// The span of the full sequence. |
35 | span: Span, |
36 | }, |
37 | } |
38 | |
39 | impl Item<'_> { |
40 | /// Parse an AST item into a format item. |
41 | pub(super) fn from_ast(ast_item: ast::Item<'_>) -> Result<Item<'_>, Error> { |
42 | Ok(match ast_item { |
43 | ast::Item::Component { |
44 | _opening_bracket: _, |
45 | _leading_whitespace: _, |
46 | name, |
47 | modifiers, |
48 | _trailing_whitespace: _, |
49 | _closing_bracket: _, |
50 | } => Item::Component(component_from_ast(&name, &modifiers)?), |
51 | ast::Item::Literal(Spanned { value, span: _ }) => Item::Literal(value), |
52 | ast::Item::EscapedBracket { |
53 | _first: _, |
54 | _second: _, |
55 | } => Item::Literal(b"[" ), |
56 | ast::Item::Optional { |
57 | opening_bracket, |
58 | _leading_whitespace: _, |
59 | _optional_kw: _, |
60 | _whitespace: _, |
61 | nested_format_description, |
62 | closing_bracket, |
63 | } => { |
64 | let items = nested_format_description |
65 | .items |
66 | .into_vec() |
67 | .into_iter() |
68 | .map(Item::from_ast) |
69 | .collect::<Result<_, _>>()?; |
70 | Item::Optional { |
71 | value: items, |
72 | span: opening_bracket.to(closing_bracket), |
73 | } |
74 | } |
75 | ast::Item::First { |
76 | opening_bracket, |
77 | _leading_whitespace: _, |
78 | _first_kw: _, |
79 | _whitespace: _, |
80 | nested_format_descriptions, |
81 | closing_bracket, |
82 | } => { |
83 | let items = nested_format_descriptions |
84 | .into_vec() |
85 | .into_iter() |
86 | .map(|nested_format_description| { |
87 | nested_format_description |
88 | .items |
89 | .into_vec() |
90 | .into_iter() |
91 | .map(Item::from_ast) |
92 | .collect() |
93 | }) |
94 | .collect::<Result<_, _>>()?; |
95 | Item::First { |
96 | value: items, |
97 | span: opening_bracket.to(closing_bracket), |
98 | } |
99 | } |
100 | }) |
101 | } |
102 | } |
103 | |
104 | impl<'a> TryFrom<Item<'a>> for crate::format_description::FormatItem<'a> { |
105 | type Error = Error; |
106 | |
107 | fn try_from(item: Item<'a>) -> Result<Self, Self::Error> { |
108 | match item { |
109 | Item::Literal(literal) => Ok(Self::Literal(literal)), |
110 | Item::Component(component) => Ok(Self::Component(component.into())), |
111 | Item::Optional { value: _, span } => Err(Error { |
112 | _inner: unused(span.error( |
113 | "optional items are not supported in runtime-parsed format descriptions" , |
114 | )), |
115 | public: crate::error::InvalidFormatDescription::NotSupported { |
116 | what: "optional item" , |
117 | context: "runtime-parsed format descriptions" , |
118 | index: span.start.byte as _, |
119 | }, |
120 | }), |
121 | Item::First { value: _, span } => Err(Error { |
122 | _inner: unused(span.error( |
123 | "'first' items are not supported in runtime-parsed format descriptions" , |
124 | )), |
125 | public: crate::error::InvalidFormatDescription::NotSupported { |
126 | what: "'first' item" , |
127 | context: "runtime-parsed format descriptions" , |
128 | index: span.start.byte as _, |
129 | }, |
130 | }), |
131 | } |
132 | } |
133 | } |
134 | |
135 | impl From<Item<'_>> for crate::format_description::OwnedFormatItem { |
136 | fn from(item: Item<'_>) -> Self { |
137 | match item { |
138 | Item::Literal(literal: &[u8]) => Self::Literal(literal.to_vec().into_boxed_slice()), |
139 | Item::Component(component: Component) => Self::Component(component.into()), |
140 | Item::Optional { value: Box<[Item<'_>]>, span: _ } => Self::Optional(Box::new(value.into())), |
141 | Item::First { value: Box<[Box<[Item<'_>]>]>, span: _ } => { |
142 | Self::First(value.into_vec().into_iter().map(Into::into).collect()) |
143 | } |
144 | } |
145 | } |
146 | } |
147 | |
148 | impl<'a> From<Box<[Item<'a>]>> for crate::format_description::OwnedFormatItem { |
149 | fn from(items: Box<[Item<'a>]>) -> Self { |
150 | let items: Vec- >
= items.into_vec(); |
151 | if items.len() == 1 { |
152 | if let Ok([item: Item<'_>]) = <[_; 1]>::try_from(items) { |
153 | item.into() |
154 | } else { |
155 | bug!("the length was just checked to be 1" ) |
156 | } |
157 | } else { |
158 | Self::Compound(items.into_iter().map(Self::from).collect()) |
159 | } |
160 | } |
161 | } |
162 | |
163 | /// Declare the `Component` struct. |
164 | macro_rules! component_definition { |
165 | (@if_required required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
166 | (@if_required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
167 | (@if_from_str from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
168 | (@if_from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
169 | |
170 | ($vis:vis enum $name:ident { |
171 | $($variant:ident = $parse_variant:literal {$( |
172 | $(#[$required:tt])? |
173 | $field:ident = $parse_field:literal: |
174 | Option<$(#[$from_str:tt])? $field_type:ty> |
175 | => $target_field:ident |
176 | ),* $(,)?}),* $(,)? |
177 | }) => { |
178 | $vis enum $name { |
179 | $($variant($variant),)* |
180 | } |
181 | |
182 | $($vis struct $variant { |
183 | $($field: Option<$field_type>),* |
184 | })* |
185 | |
186 | $(impl $variant { |
187 | /// Parse the component from the AST, given its modifiers. |
188 | fn with_modifiers( |
189 | modifiers: &[ast::Modifier<'_>], |
190 | _component_span: Span, |
191 | ) -> Result<Self, Error> |
192 | { |
193 | let mut this = Self { |
194 | $($field: None),* |
195 | }; |
196 | |
197 | for modifier in modifiers { |
198 | $(#[allow(clippy::string_lit_as_bytes)] |
199 | if modifier.key.eq_ignore_ascii_case($parse_field.as_bytes()) { |
200 | this.$field = component_definition!(@if_from_str $($from_str)? |
201 | then { |
202 | parse_from_modifier_value::<$field_type>(&modifier.value)? |
203 | } else { |
204 | <$field_type>::from_modifier_value(&modifier.value)? |
205 | }); |
206 | continue; |
207 | })* |
208 | return Err(Error { |
209 | _inner: unused(modifier.key.span.error("invalid modifier key" )), |
210 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
211 | value: String::from_utf8_lossy(*modifier.key).into_owned(), |
212 | index: modifier.key.span.start.byte as _, |
213 | } |
214 | }); |
215 | } |
216 | |
217 | $(component_definition! { @if_required $($required)? then { |
218 | if this.$field.is_none() { |
219 | return Err(Error { |
220 | _inner: unused(_component_span.error("missing required modifier" )), |
221 | public: |
222 | crate::error::InvalidFormatDescription::MissingRequiredModifier { |
223 | name: $parse_field, |
224 | index: _component_span.start.byte as _, |
225 | } |
226 | }); |
227 | } |
228 | }})* |
229 | |
230 | Ok(this) |
231 | } |
232 | })* |
233 | |
234 | impl From<$name> for crate::format_description::Component { |
235 | fn from(component: $name) -> Self { |
236 | match component {$( |
237 | $name::$variant($variant { $($field),* }) => { |
238 | $crate::format_description::component::Component::$variant( |
239 | $crate::format_description::modifier::$variant {$( |
240 | $target_field: component_definition! { @if_required $($required)? |
241 | then { |
242 | match $field { |
243 | Some(value) => value.into(), |
244 | None => bug!("required modifier was not set" ), |
245 | } |
246 | } else { |
247 | $field.unwrap_or_default().into() |
248 | } |
249 | } |
250 | ),*} |
251 | ) |
252 | } |
253 | )*} |
254 | } |
255 | } |
256 | |
257 | /// Parse a component from the AST, given its name and modifiers. |
258 | fn component_from_ast( |
259 | name: &Spanned<&[u8]>, |
260 | modifiers: &[ast::Modifier<'_>], |
261 | ) -> Result<Component, Error> { |
262 | $(#[allow(clippy::string_lit_as_bytes)] |
263 | if name.eq_ignore_ascii_case($parse_variant.as_bytes()) { |
264 | return Ok(Component::$variant($variant::with_modifiers(&modifiers, name.span)?,)); |
265 | })* |
266 | Err(Error { |
267 | _inner: unused(name.span.error("invalid component" )), |
268 | public: crate::error::InvalidFormatDescription::InvalidComponentName { |
269 | name: String::from_utf8_lossy(name).into_owned(), |
270 | index: name.span.start.byte as _, |
271 | }, |
272 | }) |
273 | } |
274 | } |
275 | } |
276 | |
277 | // Keep in alphabetical order. |
278 | component_definition! { |
279 | pub(super) enum Component { |
280 | Day = "day" { |
281 | padding = "padding" : Option<Padding> => padding, |
282 | }, |
283 | Hour = "hour" { |
284 | padding = "padding" : Option<Padding> => padding, |
285 | base = "repr" : Option<HourBase> => is_12_hour_clock, |
286 | }, |
287 | Ignore = "ignore" { |
288 | #[required] |
289 | count = "count" : Option<#[from_str] NonZeroU16> => count, |
290 | }, |
291 | Minute = "minute" { |
292 | padding = "padding" : Option<Padding> => padding, |
293 | }, |
294 | Month = "month" { |
295 | padding = "padding" : Option<Padding> => padding, |
296 | repr = "repr" : Option<MonthRepr> => repr, |
297 | case_sensitive = "case_sensitive" : Option<MonthCaseSensitive> => case_sensitive, |
298 | }, |
299 | OffsetHour = "offset_hour" { |
300 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
301 | padding = "padding" : Option<Padding> => padding, |
302 | }, |
303 | OffsetMinute = "offset_minute" { |
304 | padding = "padding" : Option<Padding> => padding, |
305 | }, |
306 | OffsetSecond = "offset_second" { |
307 | padding = "padding" : Option<Padding> => padding, |
308 | }, |
309 | Ordinal = "ordinal" { |
310 | padding = "padding" : Option<Padding> => padding, |
311 | }, |
312 | Period = "period" { |
313 | case = "case" : Option<PeriodCase> => is_uppercase, |
314 | case_sensitive = "case_sensitive" : Option<PeriodCaseSensitive> => case_sensitive, |
315 | }, |
316 | Second = "second" { |
317 | padding = "padding" : Option<Padding> => padding, |
318 | }, |
319 | Subsecond = "subsecond" { |
320 | digits = "digits" : Option<SubsecondDigits> => digits, |
321 | }, |
322 | UnixTimestamp = "unix_timestamp" { |
323 | precision = "precision" : Option<UnixTimestampPrecision> => precision, |
324 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
325 | }, |
326 | Weekday = "weekday" { |
327 | repr = "repr" : Option<WeekdayRepr> => repr, |
328 | one_indexed = "one_indexed" : Option<WeekdayOneIndexed> => one_indexed, |
329 | case_sensitive = "case_sensitive" : Option<WeekdayCaseSensitive> => case_sensitive, |
330 | }, |
331 | WeekNumber = "week_number" { |
332 | padding = "padding" : Option<Padding> => padding, |
333 | repr = "repr" : Option<WeekNumberRepr> => repr, |
334 | }, |
335 | Year = "year" { |
336 | padding = "padding" : Option<Padding> => padding, |
337 | repr = "repr" : Option<YearRepr> => repr, |
338 | base = "base" : Option<YearBase> => iso_week_based, |
339 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
340 | }, |
341 | } |
342 | } |
343 | |
344 | /// Get the target type for a given enum. |
345 | macro_rules! target_ty { |
346 | ($name:ident $type:ty) => { |
347 | $type |
348 | }; |
349 | ($name:ident) => { |
350 | $crate::format_description::modifier::$name |
351 | }; |
352 | } |
353 | |
354 | /// Get the target value for a given enum. |
355 | macro_rules! target_value { |
356 | ($name:ident $variant:ident $value:expr) => { |
357 | $value |
358 | }; |
359 | ($name:ident $variant:ident) => { |
360 | $crate::format_description::modifier::$name::$variant |
361 | }; |
362 | } |
363 | |
364 | /// Declare the various modifiers. |
365 | /// |
366 | /// For the general case, ordinary syntax can be used. Note that you _must_ declare a default |
367 | /// variant. The only significant change is that the string representation of the variant must be |
368 | /// provided after the variant name. For example, `Numerical = b"numerical"` declares a variant |
369 | /// named `Numerical` with the string representation `b"numerical"`. This is the value that will be |
370 | /// used when parsing the modifier. The value is not case sensitive. |
371 | /// |
372 | /// If the type in the public API does not have the same name as the type in the internal |
373 | /// representation, then the former must be specified in parenthesis after the internal name. For |
374 | /// example, `HourBase(bool)` has an internal name "HourBase", but is represented as a boolean in |
375 | /// the public API. |
376 | /// |
377 | /// By default, the internal variant name is assumed to be the same as the public variant name. If |
378 | /// this is not the case, the qualified path to the variant must be specified in parenthesis after |
379 | /// the internal variant name. For example, `Twelve(true)` has an internal variant name "Twelve", |
380 | /// but is represented as `true` in the public API. |
381 | macro_rules! modifier { |
382 | ($( |
383 | enum $name:ident $(($target_ty:ty))? { |
384 | $( |
385 | $(#[$attr:meta])? |
386 | $variant:ident $(($target_value:expr))? = $parse_variant:literal |
387 | ),* $(,)? |
388 | } |
389 | )+) => {$( |
390 | #[derive(Default)] |
391 | enum $name { |
392 | $($(#[$attr])? $variant),* |
393 | } |
394 | |
395 | impl $name { |
396 | /// Parse the modifier from its string representation. |
397 | fn from_modifier_value(value: &Spanned<&[u8]>) -> Result<Option<Self>, Error> { |
398 | $(if value.eq_ignore_ascii_case($parse_variant) { |
399 | return Ok(Some(Self::$variant)); |
400 | })* |
401 | Err(Error { |
402 | _inner: unused(value.span.error("invalid modifier value" )), |
403 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
404 | value: String::from_utf8_lossy(value).into_owned(), |
405 | index: value.span.start.byte as _, |
406 | }, |
407 | }) |
408 | } |
409 | } |
410 | |
411 | impl From<$name> for target_ty!($name $($target_ty)?) { |
412 | fn from(modifier: $name) -> Self { |
413 | match modifier { |
414 | $($name::$variant => target_value!($name $variant $($target_value)?)),* |
415 | } |
416 | } |
417 | } |
418 | )+}; |
419 | } |
420 | |
421 | // Keep in alphabetical order. |
422 | modifier! { |
423 | enum HourBase(bool) { |
424 | Twelve(true) = b"12" , |
425 | #[default] |
426 | TwentyFour(false) = b"24" , |
427 | } |
428 | |
429 | enum MonthCaseSensitive(bool) { |
430 | False(false) = b"false" , |
431 | #[default] |
432 | True(true) = b"true" , |
433 | } |
434 | |
435 | enum MonthRepr { |
436 | #[default] |
437 | Numerical = b"numerical" , |
438 | Long = b"long" , |
439 | Short = b"short" , |
440 | } |
441 | |
442 | enum Padding { |
443 | Space = b"space" , |
444 | #[default] |
445 | Zero = b"zero" , |
446 | None = b"none" , |
447 | } |
448 | |
449 | enum PeriodCase(bool) { |
450 | Lower(false) = b"lower" , |
451 | #[default] |
452 | Upper(true) = b"upper" , |
453 | } |
454 | |
455 | enum PeriodCaseSensitive(bool) { |
456 | False(false) = b"false" , |
457 | #[default] |
458 | True(true) = b"true" , |
459 | } |
460 | |
461 | enum SignBehavior(bool) { |
462 | #[default] |
463 | Automatic(false) = b"automatic" , |
464 | Mandatory(true) = b"mandatory" , |
465 | } |
466 | |
467 | enum SubsecondDigits { |
468 | One = b"1" , |
469 | Two = b"2" , |
470 | Three = b"3" , |
471 | Four = b"4" , |
472 | Five = b"5" , |
473 | Six = b"6" , |
474 | Seven = b"7" , |
475 | Eight = b"8" , |
476 | Nine = b"9" , |
477 | #[default] |
478 | OneOrMore = b"1+" , |
479 | } |
480 | |
481 | enum UnixTimestampPrecision { |
482 | #[default] |
483 | Second = b"second" , |
484 | Millisecond = b"millisecond" , |
485 | Microsecond = b"microsecond" , |
486 | Nanosecond = b"nanosecond" , |
487 | } |
488 | |
489 | enum WeekNumberRepr { |
490 | #[default] |
491 | Iso = b"iso" , |
492 | Sunday = b"sunday" , |
493 | Monday = b"monday" , |
494 | } |
495 | |
496 | enum WeekdayCaseSensitive(bool) { |
497 | False(false) = b"false" , |
498 | #[default] |
499 | True(true) = b"true" , |
500 | } |
501 | |
502 | enum WeekdayOneIndexed(bool) { |
503 | False(false) = b"false" , |
504 | #[default] |
505 | True(true) = b"true" , |
506 | } |
507 | |
508 | enum WeekdayRepr { |
509 | Short = b"short" , |
510 | #[default] |
511 | Long = b"long" , |
512 | Sunday = b"sunday" , |
513 | Monday = b"monday" , |
514 | } |
515 | |
516 | enum YearBase(bool) { |
517 | #[default] |
518 | Calendar(false) = b"calendar" , |
519 | IsoWeek(true) = b"iso_week" , |
520 | } |
521 | |
522 | enum YearRepr { |
523 | #[default] |
524 | Full = b"full" , |
525 | LastTwo = b"last_two" , |
526 | } |
527 | } |
528 | |
529 | /// Parse a modifier value using `FromStr`. Requires the modifier value to be valid UTF-8. |
530 | fn parse_from_modifier_value<T: FromStr>(value: &Spanned<&[u8]>) -> Result<Option<T>, Error> { |
531 | str::from_utf8(value) |
532 | .ok() |
533 | .and_then(|val| val.parse::<T>().ok()) |
534 | .map(|val| Some(val)) |
535 | .ok_or_else(|| Error { |
536 | _inner: unused(value.span.error(message:"invalid modifier value" )), |
537 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
538 | value: String::from_utf8_lossy(value).into_owned(), |
539 | index: value.span.start.byte as _, |
540 | }, |
541 | }) |
542 | } |
543 | |