1 | //! Typed, validated representation of a parsed format description. |
2 | |
3 | use alloc::boxed::Box; |
4 | use alloc::string::String; |
5 | use core::num::NonZeroU16; |
6 | use core::str::{self, FromStr}; |
7 | |
8 | use super::{ast, unused, Error, Span, Spanned}; |
9 | use crate::internal_macros::bug; |
10 | |
11 | /// Parse an AST iterator into a sequence of format items. |
12 | pub(super) fn parse<'a>( |
13 | ast_items: impl Iterator<Item = Result<ast::Item<'a>, Error>>, |
14 | ) -> impl Iterator<Item = Result<Item<'a>, Error>> { |
15 | ast_items.map(|ast_item: Result- , Error>
| ast_item.and_then(op:Item::from_ast)) |
16 | } |
17 | |
18 | /// A description of how to format and parse one part of a type. |
19 | pub(super) enum Item<'a> { |
20 | /// A literal string. |
21 | Literal(&'a [u8]), |
22 | /// Part of a type, along with its modifiers. |
23 | Component(Component), |
24 | /// A sequence of optional items. |
25 | Optional { |
26 | /// The items themselves. |
27 | value: Box<[Self]>, |
28 | /// The span of the full sequence. |
29 | span: Span, |
30 | }, |
31 | /// The first matching parse of a sequence of format descriptions. |
32 | First { |
33 | /// The sequence of format descriptions. |
34 | value: Box<[Box<[Self]>]>, |
35 | /// The span of the full sequence. |
36 | span: Span, |
37 | }, |
38 | } |
39 | |
40 | impl Item<'_> { |
41 | /// Parse an AST item into a format item. |
42 | pub(super) fn from_ast(ast_item: ast::Item<'_>) -> Result<Item<'_>, Error> { |
43 | Ok(match ast_item { |
44 | ast::Item::Component { |
45 | _opening_bracket: _, |
46 | _leading_whitespace: _, |
47 | name, |
48 | modifiers, |
49 | _trailing_whitespace: _, |
50 | _closing_bracket: _, |
51 | } => Item::Component(component_from_ast(&name, &modifiers)?), |
52 | ast::Item::Literal(Spanned { value, span: _ }) => Item::Literal(value), |
53 | ast::Item::EscapedBracket { |
54 | _first: _, |
55 | _second: _, |
56 | } => Item::Literal(b"[" ), |
57 | ast::Item::Optional { |
58 | opening_bracket, |
59 | _leading_whitespace: _, |
60 | _optional_kw: _, |
61 | _whitespace: _, |
62 | nested_format_description, |
63 | closing_bracket, |
64 | } => { |
65 | let items = nested_format_description |
66 | .items |
67 | .into_vec() |
68 | .into_iter() |
69 | .map(Item::from_ast) |
70 | .collect::<Result<_, _>>()?; |
71 | Item::Optional { |
72 | value: items, |
73 | span: opening_bracket.to(closing_bracket), |
74 | } |
75 | } |
76 | ast::Item::First { |
77 | opening_bracket, |
78 | _leading_whitespace: _, |
79 | _first_kw: _, |
80 | _whitespace: _, |
81 | nested_format_descriptions, |
82 | closing_bracket, |
83 | } => { |
84 | let items = nested_format_descriptions |
85 | .into_vec() |
86 | .into_iter() |
87 | .map(|nested_format_description| { |
88 | nested_format_description |
89 | .items |
90 | .into_vec() |
91 | .into_iter() |
92 | .map(Item::from_ast) |
93 | .collect() |
94 | }) |
95 | .collect::<Result<_, _>>()?; |
96 | Item::First { |
97 | value: items, |
98 | span: opening_bracket.to(closing_bracket), |
99 | } |
100 | } |
101 | }) |
102 | } |
103 | } |
104 | |
105 | impl<'a> TryFrom<Item<'a>> for crate::format_description::BorrowedFormatItem<'a> { |
106 | type Error = Error; |
107 | |
108 | fn try_from(item: Item<'a>) -> Result<Self, Self::Error> { |
109 | match item { |
110 | Item::Literal(literal) => Ok(Self::Literal(literal)), |
111 | Item::Component(component) => Ok(Self::Component(component.into())), |
112 | Item::Optional { value: _, span } => Err(Error { |
113 | _inner: unused(span.error( |
114 | "optional items are not supported in runtime-parsed format descriptions" , |
115 | )), |
116 | public: crate::error::InvalidFormatDescription::NotSupported { |
117 | what: "optional item" , |
118 | context: "runtime-parsed format descriptions" , |
119 | index: span.start.byte as _, |
120 | }, |
121 | }), |
122 | Item::First { value: _, span } => Err(Error { |
123 | _inner: unused(span.error( |
124 | "'first' items are not supported in runtime-parsed format descriptions" , |
125 | )), |
126 | public: crate::error::InvalidFormatDescription::NotSupported { |
127 | what: "'first' item" , |
128 | context: "runtime-parsed format descriptions" , |
129 | index: span.start.byte as _, |
130 | }, |
131 | }), |
132 | } |
133 | } |
134 | } |
135 | |
136 | impl From<Item<'_>> for crate::format_description::OwnedFormatItem { |
137 | fn from(item: Item<'_>) -> Self { |
138 | match item { |
139 | Item::Literal(literal: &[u8]) => Self::Literal(literal.to_vec().into_boxed_slice()), |
140 | Item::Component(component: Component) => Self::Component(component.into()), |
141 | Item::Optional { value: Box<[Item<'_>]>, span: _ } => Self::Optional(Box::new(value.into())), |
142 | Item::First { value: Box<[Box<[Item<'_>]>]>, span: _ } => { |
143 | Self::First(value.into_vec().into_iter().map(Into::into).collect()) |
144 | } |
145 | } |
146 | } |
147 | } |
148 | |
149 | impl<'a> From<Box<[Item<'a>]>> for crate::format_description::OwnedFormatItem { |
150 | fn from(items: Box<[Item<'a>]>) -> Self { |
151 | let items: Vec- >
= items.into_vec(); |
152 | match <[_; 1]>::try_from(items) { |
153 | Ok([item: Item<'a>]) => item.into(), |
154 | Err(vec: Vec- >
) => Self::Compound(vec.into_iter().map(Into::into).collect()), |
155 | } |
156 | } |
157 | } |
158 | |
159 | /// Declare the `Component` struct. |
160 | macro_rules! component_definition { |
161 | (@if_required required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
162 | (@if_required then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
163 | (@if_from_str from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($then)* }; |
164 | (@if_from_str then { $($then:tt)* } $(else { $($else:tt)* })?) => { $($($else)*)? }; |
165 | |
166 | ($vis:vis enum $name:ident { |
167 | $($variant:ident = $parse_variant:literal {$( |
168 | $(#[$required:tt])? |
169 | $field:ident = $parse_field:literal: |
170 | Option<$(#[$from_str:tt])? $field_type:ty> |
171 | => $target_field:ident |
172 | ),* $(,)?}),* $(,)? |
173 | }) => { |
174 | $vis enum $name { |
175 | $($variant($variant),)* |
176 | } |
177 | |
178 | $($vis struct $variant { |
179 | $($field: Option<$field_type>),* |
180 | })* |
181 | |
182 | $(impl $variant { |
183 | /// Parse the component from the AST, given its modifiers. |
184 | fn with_modifiers( |
185 | modifiers: &[ast::Modifier<'_>], |
186 | _component_span: Span, |
187 | ) -> Result<Self, Error> |
188 | { |
189 | // rustc will complain if the modifier is empty. |
190 | #[allow(unused_mut)] |
191 | let mut this = Self { |
192 | $($field: None),* |
193 | }; |
194 | |
195 | for modifier in modifiers { |
196 | $(#[allow(clippy::string_lit_as_bytes)] |
197 | if modifier.key.eq_ignore_ascii_case($parse_field.as_bytes()) { |
198 | this.$field = component_definition!(@if_from_str $($from_str)? |
199 | then { |
200 | parse_from_modifier_value::<$field_type>(&modifier.value)? |
201 | } else { |
202 | <$field_type>::from_modifier_value(&modifier.value)? |
203 | }); |
204 | continue; |
205 | })* |
206 | return Err(Error { |
207 | _inner: unused(modifier.key.span.error("invalid modifier key" )), |
208 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
209 | value: String::from_utf8_lossy(*modifier.key).into_owned(), |
210 | index: modifier.key.span.start.byte as _, |
211 | } |
212 | }); |
213 | } |
214 | |
215 | $(component_definition! { @if_required $($required)? then { |
216 | if this.$field.is_none() { |
217 | return Err(Error { |
218 | _inner: unused(_component_span.error("missing required modifier" )), |
219 | public: |
220 | crate::error::InvalidFormatDescription::MissingRequiredModifier { |
221 | name: $parse_field, |
222 | index: _component_span.start.byte as _, |
223 | } |
224 | }); |
225 | } |
226 | }})* |
227 | |
228 | Ok(this) |
229 | } |
230 | })* |
231 | |
232 | impl From<$name> for crate::format_description::Component { |
233 | fn from(component: $name) -> Self { |
234 | match component {$( |
235 | $name::$variant($variant { $($field),* }) => { |
236 | $crate::format_description::component::Component::$variant( |
237 | $crate::format_description::modifier::$variant {$( |
238 | $target_field: component_definition! { @if_required $($required)? |
239 | then { |
240 | match $field { |
241 | Some(value) => value.into(), |
242 | None => bug!("required modifier was not set" ), |
243 | } |
244 | } else { |
245 | $field.unwrap_or_default().into() |
246 | } |
247 | } |
248 | ),*} |
249 | ) |
250 | } |
251 | )*} |
252 | } |
253 | } |
254 | |
255 | /// Parse a component from the AST, given its name and modifiers. |
256 | fn component_from_ast( |
257 | name: &Spanned<&[u8]>, |
258 | modifiers: &[ast::Modifier<'_>], |
259 | ) -> Result<Component, Error> { |
260 | $(#[allow(clippy::string_lit_as_bytes)] |
261 | if name.eq_ignore_ascii_case($parse_variant.as_bytes()) { |
262 | return Ok(Component::$variant($variant::with_modifiers(&modifiers, name.span)?)); |
263 | })* |
264 | Err(Error { |
265 | _inner: unused(name.span.error("invalid component" )), |
266 | public: crate::error::InvalidFormatDescription::InvalidComponentName { |
267 | name: String::from_utf8_lossy(name).into_owned(), |
268 | index: name.span.start.byte as _, |
269 | }, |
270 | }) |
271 | } |
272 | } |
273 | } |
274 | |
275 | // Keep in alphabetical order. |
276 | component_definition! { |
277 | pub(super) enum Component { |
278 | Day = "day" { |
279 | padding = "padding" : Option<Padding> => padding, |
280 | }, |
281 | End = "end" {}, |
282 | Hour = "hour" { |
283 | padding = "padding" : Option<Padding> => padding, |
284 | base = "repr" : Option<HourBase> => is_12_hour_clock, |
285 | }, |
286 | Ignore = "ignore" { |
287 | #[required] |
288 | count = "count" : Option<#[from_str] NonZeroU16> => count, |
289 | }, |
290 | Minute = "minute" { |
291 | padding = "padding" : Option<Padding> => padding, |
292 | }, |
293 | Month = "month" { |
294 | padding = "padding" : Option<Padding> => padding, |
295 | repr = "repr" : Option<MonthRepr> => repr, |
296 | case_sensitive = "case_sensitive" : Option<MonthCaseSensitive> => case_sensitive, |
297 | }, |
298 | OffsetHour = "offset_hour" { |
299 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
300 | padding = "padding" : Option<Padding> => padding, |
301 | }, |
302 | OffsetMinute = "offset_minute" { |
303 | padding = "padding" : Option<Padding> => padding, |
304 | }, |
305 | OffsetSecond = "offset_second" { |
306 | padding = "padding" : Option<Padding> => padding, |
307 | }, |
308 | Ordinal = "ordinal" { |
309 | padding = "padding" : Option<Padding> => padding, |
310 | }, |
311 | Period = "period" { |
312 | case = "case" : Option<PeriodCase> => is_uppercase, |
313 | case_sensitive = "case_sensitive" : Option<PeriodCaseSensitive> => case_sensitive, |
314 | }, |
315 | Second = "second" { |
316 | padding = "padding" : Option<Padding> => padding, |
317 | }, |
318 | Subsecond = "subsecond" { |
319 | digits = "digits" : Option<SubsecondDigits> => digits, |
320 | }, |
321 | UnixTimestamp = "unix_timestamp" { |
322 | precision = "precision" : Option<UnixTimestampPrecision> => precision, |
323 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
324 | }, |
325 | Weekday = "weekday" { |
326 | repr = "repr" : Option<WeekdayRepr> => repr, |
327 | one_indexed = "one_indexed" : Option<WeekdayOneIndexed> => one_indexed, |
328 | case_sensitive = "case_sensitive" : Option<WeekdayCaseSensitive> => case_sensitive, |
329 | }, |
330 | WeekNumber = "week_number" { |
331 | padding = "padding" : Option<Padding> => padding, |
332 | repr = "repr" : Option<WeekNumberRepr> => repr, |
333 | }, |
334 | Year = "year" { |
335 | padding = "padding" : Option<Padding> => padding, |
336 | repr = "repr" : Option<YearRepr> => repr, |
337 | base = "base" : Option<YearBase> => iso_week_based, |
338 | sign_behavior = "sign" : Option<SignBehavior> => sign_is_mandatory, |
339 | }, |
340 | } |
341 | } |
342 | |
343 | /// Get the target type for a given enum. |
344 | macro_rules! target_ty { |
345 | ($name:ident $type:ty) => { |
346 | $type |
347 | }; |
348 | ($name:ident) => { |
349 | $crate::format_description::modifier::$name |
350 | }; |
351 | } |
352 | |
353 | /// Get the target value for a given enum. |
354 | macro_rules! target_value { |
355 | ($name:ident $variant:ident $value:expr) => { |
356 | $value |
357 | }; |
358 | ($name:ident $variant:ident) => { |
359 | $crate::format_description::modifier::$name::$variant |
360 | }; |
361 | } |
362 | |
363 | /// Declare the various modifiers. |
364 | /// |
365 | /// For the general case, ordinary syntax can be used. Note that you _must_ declare a default |
366 | /// variant. The only significant change is that the string representation of the variant must be |
367 | /// provided after the variant name. For example, `Numerical = b"numerical"` declares a variant |
368 | /// named `Numerical` with the string representation `b"numerical"`. This is the value that will be |
369 | /// used when parsing the modifier. The value is not case sensitive. |
370 | /// |
371 | /// If the type in the public API does not have the same name as the type in the internal |
372 | /// representation, then the former must be specified in parenthesis after the internal name. For |
373 | /// example, `HourBase(bool)` has an internal name "HourBase", but is represented as a boolean in |
374 | /// the public API. |
375 | /// |
376 | /// By default, the internal variant name is assumed to be the same as the public variant name. If |
377 | /// this is not the case, the qualified path to the variant must be specified in parenthesis after |
378 | /// the internal variant name. For example, `Twelve(true)` has an internal variant name "Twelve", |
379 | /// but is represented as `true` in the public API. |
380 | macro_rules! modifier { |
381 | ($( |
382 | enum $name:ident $(($target_ty:ty))? { |
383 | $( |
384 | $(#[$attr:meta])? |
385 | $variant:ident $(($target_value:expr))? = $parse_variant:literal |
386 | ),* $(,)? |
387 | } |
388 | )+) => {$( |
389 | #[derive(Default)] |
390 | enum $name { |
391 | $($(#[$attr])? $variant),* |
392 | } |
393 | |
394 | impl $name { |
395 | /// Parse the modifier from its string representation. |
396 | fn from_modifier_value(value: &Spanned<&[u8]>) -> Result<Option<Self>, Error> { |
397 | $(if value.eq_ignore_ascii_case($parse_variant) { |
398 | return Ok(Some(Self::$variant)); |
399 | })* |
400 | Err(Error { |
401 | _inner: unused(value.span.error("invalid modifier value" )), |
402 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
403 | value: String::from_utf8_lossy(value).into_owned(), |
404 | index: value.span.start.byte as _, |
405 | }, |
406 | }) |
407 | } |
408 | } |
409 | |
410 | impl From<$name> for target_ty!($name $($target_ty)?) { |
411 | fn from(modifier: $name) -> Self { |
412 | match modifier { |
413 | $($name::$variant => target_value!($name $variant $($target_value)?)),* |
414 | } |
415 | } |
416 | } |
417 | )+}; |
418 | } |
419 | |
420 | // Keep in alphabetical order. |
421 | modifier! { |
422 | enum HourBase(bool) { |
423 | Twelve(true) = b"12" , |
424 | #[default] |
425 | TwentyFour(false) = b"24" , |
426 | } |
427 | |
428 | enum MonthCaseSensitive(bool) { |
429 | False(false) = b"false" , |
430 | #[default] |
431 | True(true) = b"true" , |
432 | } |
433 | |
434 | enum MonthRepr { |
435 | #[default] |
436 | Numerical = b"numerical" , |
437 | Long = b"long" , |
438 | Short = b"short" , |
439 | } |
440 | |
441 | enum Padding { |
442 | Space = b"space" , |
443 | #[default] |
444 | Zero = b"zero" , |
445 | None = b"none" , |
446 | } |
447 | |
448 | enum PeriodCase(bool) { |
449 | Lower(false) = b"lower" , |
450 | #[default] |
451 | Upper(true) = b"upper" , |
452 | } |
453 | |
454 | enum PeriodCaseSensitive(bool) { |
455 | False(false) = b"false" , |
456 | #[default] |
457 | True(true) = b"true" , |
458 | } |
459 | |
460 | enum SignBehavior(bool) { |
461 | #[default] |
462 | Automatic(false) = b"automatic" , |
463 | Mandatory(true) = b"mandatory" , |
464 | } |
465 | |
466 | enum SubsecondDigits { |
467 | One = b"1" , |
468 | Two = b"2" , |
469 | Three = b"3" , |
470 | Four = b"4" , |
471 | Five = b"5" , |
472 | Six = b"6" , |
473 | Seven = b"7" , |
474 | Eight = b"8" , |
475 | Nine = b"9" , |
476 | #[default] |
477 | OneOrMore = b"1+" , |
478 | } |
479 | |
480 | enum UnixTimestampPrecision { |
481 | #[default] |
482 | Second = b"second" , |
483 | Millisecond = b"millisecond" , |
484 | Microsecond = b"microsecond" , |
485 | Nanosecond = b"nanosecond" , |
486 | } |
487 | |
488 | enum WeekNumberRepr { |
489 | #[default] |
490 | Iso = b"iso" , |
491 | Sunday = b"sunday" , |
492 | Monday = b"monday" , |
493 | } |
494 | |
495 | enum WeekdayCaseSensitive(bool) { |
496 | False(false) = b"false" , |
497 | #[default] |
498 | True(true) = b"true" , |
499 | } |
500 | |
501 | enum WeekdayOneIndexed(bool) { |
502 | False(false) = b"false" , |
503 | #[default] |
504 | True(true) = b"true" , |
505 | } |
506 | |
507 | enum WeekdayRepr { |
508 | Short = b"short" , |
509 | #[default] |
510 | Long = b"long" , |
511 | Sunday = b"sunday" , |
512 | Monday = b"monday" , |
513 | } |
514 | |
515 | enum YearBase(bool) { |
516 | #[default] |
517 | Calendar(false) = b"calendar" , |
518 | IsoWeek(true) = b"iso_week" , |
519 | } |
520 | |
521 | enum YearRepr { |
522 | #[default] |
523 | Full = b"full" , |
524 | Century = b"century" , |
525 | LastTwo = b"last_two" , |
526 | } |
527 | } |
528 | |
529 | /// Parse a modifier value using `FromStr`. Requires the modifier value to be valid UTF-8. |
530 | fn parse_from_modifier_value<T: FromStr>(value: &Spanned<&[u8]>) -> Result<Option<T>, Error> { |
531 | str::from_utf8(value) |
532 | .ok() |
533 | .and_then(|val| val.parse::<T>().ok()) |
534 | .map(|val| Some(val)) |
535 | .ok_or_else(|| Error { |
536 | _inner: unused(value.span.error(message:"invalid modifier value" )), |
537 | public: crate::error::InvalidFormatDescription::InvalidModifier { |
538 | value: String::from_utf8_lossy(value).into_owned(), |
539 | index: value.span.start.byte as _, |
540 | }, |
541 | }) |
542 | } |
543 | |