| 1 | /*! |
| 2 | This module provides parsing facilities for [RFC 9557] extensions to |
| 3 | [RFC 3339]. |
| 4 | |
| 5 | This only provides internal helper routines that can be used in other parsers. |
| 6 | Namely, RFC 9557 is just a backward compatible expansion to RFC 3339. |
| 7 | |
| 8 | The parser in this module checks for full syntactic validity of the annotation |
| 9 | syntax defined in RFC 9557. However, Jiff doesn't make use of any of these |
| 10 | annotations except for time zone annotations. So for example, |
| 11 | `2024-05-25T13:33:00-05[America/New_York][foo=bar]` is valid, but the parser |
| 12 | will only expose the `America/New_York` annotation. |
| 13 | |
| 14 | Note though that even for things that are ignored, validity |
| 15 | and criticality are still respected. So for example, |
| 16 | `2024-05-25T13:33:00-05[America/New_York][!foo=bar]` will fail to parse because |
| 17 | of the `!` indicating that consumers must take action on the annotation, |
| 18 | including by returning an error if it isn't supported. |
| 19 | |
| 20 | [RFC 3339]: https://www.rfc-editor.org/rfc/rfc3339 |
| 21 | [RFC 9557]: https://www.rfc-editor.org/rfc/rfc9557.html |
| 22 | */ |
| 23 | |
| 24 | // Here's the specific part of Temporal's grammar that is implemented below |
| 25 | // (which should match what's in RFC 9557): |
| 26 | // |
| 27 | // TimeZoneAnnotation ::: |
| 28 | // [ AnnotationCriticalFlag[opt] TimeZoneIdentifier ] |
| 29 | // |
| 30 | // Annotations ::: |
| 31 | // Annotation Annotations[opt] |
| 32 | // |
| 33 | // AnnotationCriticalFlag ::: |
| 34 | // ! |
| 35 | // |
| 36 | // TimeZoneIdentifier ::: |
| 37 | // TimeZoneUTCOffsetName |
| 38 | // TimeZoneIANAName |
| 39 | // |
| 40 | // TimeZoneIANAName ::: |
| 41 | // TimeZoneIANANameComponent |
| 42 | // TimeZoneIANAName / TimeZoneIANANameComponent |
| 43 | // |
| 44 | // TimeZoneIANANameComponent ::: |
| 45 | // TZLeadingChar |
| 46 | // TimeZoneIANANameComponent TZChar |
| 47 | // |
| 48 | // Annotation ::: |
| 49 | // [ AnnotationCriticalFlag[opt] AnnotationKey = AnnotationValue ] |
| 50 | // |
| 51 | // AnnotationKey ::: |
| 52 | // AKeyLeadingChar |
| 53 | // AnnotationKey AKeyChar |
| 54 | // |
| 55 | // AnnotationValue ::: |
| 56 | // AnnotationValueComponent |
| 57 | // AnnotationValueComponent - AnnotationValue |
| 58 | // |
| 59 | // AnnotationValueComponent ::: |
| 60 | // Alpha AnnotationValueComponent[opt] |
| 61 | // DecimalDigit AnnotationValueComponent[opt] |
| 62 | // |
| 63 | // AKeyLeadingChar ::: |
| 64 | // LowercaseAlpha |
| 65 | // _ |
| 66 | // |
| 67 | // AKeyChar ::: |
| 68 | // AKeyLeadingChar |
| 69 | // DecimalDigit |
| 70 | // - |
| 71 | // |
| 72 | // TZLeadingChar ::: |
| 73 | // Alpha |
| 74 | // . |
| 75 | // _ |
| 76 | // |
| 77 | // TZChar ::: |
| 78 | // TZLeadingChar |
| 79 | // DecimalDigit |
| 80 | // - |
| 81 | // + |
| 82 | // |
| 83 | // DecimalDigit :: one of |
| 84 | // 0 1 2 3 4 5 6 7 8 9 |
| 85 | // |
| 86 | // Alpha ::: one of |
| 87 | // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
| 88 | // a b c d e f g h i j k l m n o p q r s t u v w x y z |
| 89 | // |
| 90 | // LowercaseAlpha ::: one of |
| 91 | // a b c d e f g h i j k l m n o p q r s t u v w x y z |
| 92 | // |
| 93 | // # N.B. This is handled by src/format/offset.rs, so we don't expand it here. |
| 94 | // TimeZoneUTCOffsetName ::: |
| 95 | // UTCOffsetMinutePrecision |
| 96 | |
| 97 | use crate::{ |
| 98 | error::{err, Error}, |
| 99 | fmt::{ |
| 100 | offset::{self, ParsedOffset}, |
| 101 | temporal::{TimeZoneAnnotation, TimeZoneAnnotationKind}, |
| 102 | Parsed, |
| 103 | }, |
| 104 | util::{escape, parse}, |
| 105 | }; |
| 106 | |
| 107 | /// The result of parsing RFC 9557 annotations. |
| 108 | /// |
| 109 | /// Currently, this only provides access to a parsed time zone annotation, if |
| 110 | /// present. While the parser does validate all other key/value annotations, |
| 111 | /// Jiff doesn't make use of them and thus does not expose them here. They are |
| 112 | /// only validated at a syntax level. |
| 113 | #[derive (Debug)] |
| 114 | pub(crate) struct ParsedAnnotations<'i> { |
| 115 | /// The original input that all of the annotations were parsed from. |
| 116 | /// |
| 117 | /// N.B. This is currently unused, but potentially useful, so we leave it. |
| 118 | #[allow (dead_code)] |
| 119 | input: escape::Bytes<'i>, |
| 120 | /// An optional time zone annotation that was extracted from the input. |
| 121 | time_zone: Option<ParsedTimeZone<'i>>, |
| 122 | // While we parse/validate them, we don't support any other annotations |
| 123 | // at time of writing. Temporal supports calendar annotations, but I'm |
| 124 | // not sure Jiff will ever go down that route. |
| 125 | } |
| 126 | |
| 127 | impl<'i> ParsedAnnotations<'i> { |
| 128 | /// Return an empty parsed annotations. |
| 129 | pub(crate) fn none() -> ParsedAnnotations<'static> { |
| 130 | ParsedAnnotations { input: escape::Bytes(&[]), time_zone: None } |
| 131 | } |
| 132 | |
| 133 | /// Turns this parsed time zone into a structured time zone annotation, |
| 134 | /// if an annotation was found. Otherwise, returns `Ok(None)`. |
| 135 | /// |
| 136 | /// This can return an error if the parsed offset could not be converted |
| 137 | /// to a `crate::tz::Offset`. |
| 138 | pub(crate) fn to_time_zone_annotation( |
| 139 | &self, |
| 140 | ) -> Result<Option<TimeZoneAnnotation<'i>>, Error> { |
| 141 | let Some(ref parsed: &ParsedTimeZone<'i>) = self.time_zone else { return Ok(None) }; |
| 142 | Ok(Some(parsed.to_time_zone_annotation()?)) |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | /// The result of parsing a time zone annotation. |
| 147 | #[derive (Debug)] |
| 148 | enum ParsedTimeZone<'i> { |
| 149 | /// The name of an IANA time zone was found. |
| 150 | Named { |
| 151 | /// Whether the critical flag was seen. |
| 152 | critical: bool, |
| 153 | /// The parsed name. |
| 154 | name: &'i str, |
| 155 | }, |
| 156 | /// A specific UTC numeric offset was found. |
| 157 | Offset { |
| 158 | /// Whether the critical flag was seen. |
| 159 | critical: bool, |
| 160 | /// The parsed UTC offset. |
| 161 | offset: ParsedOffset, |
| 162 | }, |
| 163 | } |
| 164 | |
| 165 | impl<'i> ParsedTimeZone<'i> { |
| 166 | /// Turns this parsed time zone into a structured time zone annotation. |
| 167 | /// |
| 168 | /// This can return an error if the parsed offset could not be converted |
| 169 | /// to a `crate::tz::Offset`. |
| 170 | /// |
| 171 | /// This also includes a flag of whether the annotation is "critical" or |
| 172 | /// not. |
| 173 | pub(crate) fn to_time_zone_annotation( |
| 174 | &self, |
| 175 | ) -> Result<TimeZoneAnnotation<'i>, Error> { |
| 176 | let (kind: TimeZoneAnnotationKind<'_>, critical: bool) = match *self { |
| 177 | ParsedTimeZone::Named { name: &str, critical: bool } => { |
| 178 | let kind: TimeZoneAnnotationKind<'_> = TimeZoneAnnotationKind::from(name); |
| 179 | (kind, critical) |
| 180 | } |
| 181 | ParsedTimeZone::Offset { ref offset: &ParsedOffset, critical: bool } => { |
| 182 | let kind: TimeZoneAnnotationKind<'_> = TimeZoneAnnotationKind::Offset(offset.to_offset()?); |
| 183 | (kind, critical) |
| 184 | } |
| 185 | }; |
| 186 | Ok(TimeZoneAnnotation { kind, critical }) |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | /// A parser for RFC 9557 annotations. |
| 191 | #[derive (Debug)] |
| 192 | pub(crate) struct Parser { |
| 193 | /// There are currently no configuration options for this parser. |
| 194 | _priv: (), |
| 195 | } |
| 196 | |
| 197 | impl Parser { |
| 198 | /// Create a new RFC 9557 annotation parser with the default configuration. |
| 199 | pub(crate) const fn new() -> Parser { |
| 200 | Parser { _priv: () } |
| 201 | } |
| 202 | |
| 203 | /// Parse RFC 9557 annotations from the start of `input`. |
| 204 | /// |
| 205 | /// This only parses annotations when `input` starts with an `[`. |
| 206 | /// |
| 207 | /// Note that the result returned only provides access to the time zone |
| 208 | /// annotation (if it was present). All other annotations are parsed and |
| 209 | /// checked for validity, but are not accessible from `ParsedAnnotations` |
| 210 | /// since Jiff does not make use of them. |
| 211 | pub(crate) fn parse<'i>( |
| 212 | &self, |
| 213 | input: &'i [u8], |
| 214 | ) -> Result<Parsed<'i, ParsedAnnotations<'i>>, Error> { |
| 215 | let mkslice = parse::slicer(input); |
| 216 | |
| 217 | let Parsed { value: time_zone, mut input } = |
| 218 | self.parse_time_zone_annotation(input)?; |
| 219 | loop { |
| 220 | // We don't actually do anything with any annotation that isn't |
| 221 | // a time zone, but we do parse them to ensure validity and to |
| 222 | // be able to fail when a critical flag is set. Otherwise, we know |
| 223 | // we're done if parsing an annotation doesn't consume any input. |
| 224 | let Parsed { value: did_consume, input: unconsumed } = |
| 225 | self.parse_annotation(input)?; |
| 226 | if !did_consume { |
| 227 | break; |
| 228 | } |
| 229 | input = unconsumed; |
| 230 | } |
| 231 | |
| 232 | let value = ParsedAnnotations { |
| 233 | input: escape::Bytes(mkslice(input)), |
| 234 | time_zone, |
| 235 | }; |
| 236 | Ok(Parsed { value, input }) |
| 237 | } |
| 238 | |
| 239 | fn parse_time_zone_annotation<'i>( |
| 240 | &self, |
| 241 | mut input: &'i [u8], |
| 242 | ) -> Result<Parsed<'i, Option<ParsedTimeZone<'i>>>, Error> { |
| 243 | let unconsumed = input; |
| 244 | if input.is_empty() || input[0] != b'[' { |
| 245 | return Ok(Parsed { value: None, input: unconsumed }); |
| 246 | } |
| 247 | input = &input[1..]; |
| 248 | |
| 249 | let critical = input.starts_with(b"!" ); |
| 250 | if critical { |
| 251 | input = &input[1..]; |
| 252 | } |
| 253 | |
| 254 | // If we're starting with a `+` or `-`, then we know we MUST have a |
| 255 | // time zone offset annotation. It can't be anything else since neither |
| 256 | // an IANA annotation nor a generic key/value annotation can begin with |
| 257 | // a `+` or a `-`. |
| 258 | if input.starts_with(b"+" ) || input.starts_with(b"-" ) { |
| 259 | const P: offset::Parser = |
| 260 | offset::Parser::new().zulu(false).subminute(false); |
| 261 | |
| 262 | let Parsed { value: offset, input } = P.parse(input)?; |
| 263 | let Parsed { input, .. } = |
| 264 | self.parse_tz_annotation_close(input)?; |
| 265 | let value = Some(ParsedTimeZone::Offset { critical, offset }); |
| 266 | return Ok(Parsed { value, input }); |
| 267 | } |
| 268 | |
| 269 | // At this point, we know it's impossible to see an offset. But we |
| 270 | // could still see *either* an IANA time zone annotation or a more |
| 271 | // generic key-value annotation. We don't know yet. In the latter case, |
| 272 | // we'll eventually see an `=` sign. But since IANA time zone names |
| 273 | // represent a superset of generic keys, we just parse what we can. |
| 274 | // Once we stop, we can check for an `=`. |
| 275 | let mkiana = parse::slicer(input); |
| 276 | let Parsed { mut input, .. } = |
| 277 | self.parse_tz_annotation_iana_name(input)?; |
| 278 | // Now that we've parsed the first IANA name component, if this were |
| 279 | // actually a generic key/value annotation, the `=` *must* appear here. |
| 280 | // Otherwise, we assume we are trying to parse an IANA annotation as it |
| 281 | // is the only other possibility and likely the most common case. |
| 282 | if input.starts_with(b"=" ) { |
| 283 | // Pretend like we parsed nothing and let the caller try to parse |
| 284 | // a generic key/value annotation. |
| 285 | return Ok(Parsed { value: None, input: unconsumed }); |
| 286 | } |
| 287 | while input.starts_with(b"/" ) { |
| 288 | input = &input[1..]; |
| 289 | let Parsed { input: unconsumed, .. } = |
| 290 | self.parse_tz_annotation_iana_name(input)?; |
| 291 | input = unconsumed; |
| 292 | } |
| 293 | // This is OK because all bytes in a IANA TZ annotation are guaranteed |
| 294 | // to be ASCII, or else we wouldn't be here. If this turns out to be |
| 295 | // a perf issue, we can do an unchecked conversion here. But I figured |
| 296 | // it would be better to start conservative. |
| 297 | let iana_name = core::str::from_utf8(mkiana(input)).expect("ASCII" ); |
| 298 | let time_zone = |
| 299 | Some(ParsedTimeZone::Named { critical, name: iana_name }); |
| 300 | // And finally, parse the closing bracket. |
| 301 | let Parsed { input, .. } = self.parse_tz_annotation_close(input)?; |
| 302 | Ok(Parsed { value: time_zone, input }) |
| 303 | } |
| 304 | |
| 305 | fn parse_annotation<'i>( |
| 306 | &self, |
| 307 | mut input: &'i [u8], |
| 308 | ) -> Result<Parsed<'i, bool>, Error> { |
| 309 | if input.is_empty() || input[0] != b'[' { |
| 310 | return Ok(Parsed { value: false, input }); |
| 311 | } |
| 312 | input = &input[1..]; |
| 313 | |
| 314 | let critical = input.starts_with(b"!" ); |
| 315 | if critical { |
| 316 | input = &input[1..]; |
| 317 | } |
| 318 | |
| 319 | let Parsed { value: key, input } = self.parse_annotation_key(input)?; |
| 320 | let Parsed { input, .. } = self.parse_annotation_separator(input)?; |
| 321 | let Parsed { input, .. } = self.parse_annotation_values(input)?; |
| 322 | let Parsed { input, .. } = self.parse_annotation_close(input)?; |
| 323 | |
| 324 | // If the critical flag is set, then we automatically return an error |
| 325 | // because we don't support any non-time-zone annotations. When the |
| 326 | // critical flag isn't set, we're "permissive" and just validate that |
| 327 | // the syntax is correct (as we've already done at this point). |
| 328 | if critical { |
| 329 | return Err(err!( |
| 330 | "found unsupported RFC 9557 annotation with key {key:?} \ |
| 331 | with the critical flag ('!') set" , |
| 332 | key = escape::Bytes(key), |
| 333 | )); |
| 334 | } |
| 335 | |
| 336 | Ok(Parsed { value: true, input }) |
| 337 | } |
| 338 | |
| 339 | fn parse_tz_annotation_iana_name<'i>( |
| 340 | &self, |
| 341 | input: &'i [u8], |
| 342 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
| 343 | let mkname = parse::slicer(input); |
| 344 | let Parsed { mut input, .. } = |
| 345 | self.parse_tz_annotation_leading_char(input)?; |
| 346 | loop { |
| 347 | let Parsed { value: did_consume, input: unconsumed } = |
| 348 | self.parse_tz_annotation_char(input); |
| 349 | if !did_consume { |
| 350 | break; |
| 351 | } |
| 352 | input = unconsumed; |
| 353 | } |
| 354 | Ok(Parsed { value: mkname(input), input }) |
| 355 | } |
| 356 | |
| 357 | fn parse_annotation_key<'i>( |
| 358 | &self, |
| 359 | input: &'i [u8], |
| 360 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
| 361 | let mkkey = parse::slicer(input); |
| 362 | let Parsed { mut input, .. } = |
| 363 | self.parse_annotation_key_leading_char(input)?; |
| 364 | loop { |
| 365 | let Parsed { value: did_consume, input: unconsumed } = |
| 366 | self.parse_annotation_key_char(input); |
| 367 | if !did_consume { |
| 368 | break; |
| 369 | } |
| 370 | input = unconsumed; |
| 371 | } |
| 372 | Ok(Parsed { value: mkkey(input), input }) |
| 373 | } |
| 374 | |
| 375 | // N.B. If we ever actually need the values, this should probably return a |
| 376 | // `Vec<&'i [u8]>`. (Well, no, because that wouldn't be good for core-only |
| 377 | // configurations. So it will probably need to be something else. But, |
| 378 | // probably Jiff will never care about other values.) |
| 379 | fn parse_annotation_values<'i>( |
| 380 | &self, |
| 381 | input: &'i [u8], |
| 382 | ) -> Result<Parsed<'i, ()>, Error> { |
| 383 | let Parsed { mut input, .. } = self.parse_annotation_value(input)?; |
| 384 | while input.starts_with(b"-" ) { |
| 385 | input = &input[1..]; |
| 386 | let Parsed { input: unconsumed, .. } = |
| 387 | self.parse_annotation_value(input)?; |
| 388 | input = unconsumed; |
| 389 | } |
| 390 | Ok(Parsed { value: (), input }) |
| 391 | } |
| 392 | |
| 393 | fn parse_annotation_value<'i>( |
| 394 | &self, |
| 395 | input: &'i [u8], |
| 396 | ) -> Result<Parsed<'i, &'i [u8]>, Error> { |
| 397 | let mkvalue = parse::slicer(input); |
| 398 | let Parsed { mut input, .. } = |
| 399 | self.parse_annotation_value_leading_char(input)?; |
| 400 | loop { |
| 401 | let Parsed { value: did_consume, input: unconsumed } = |
| 402 | self.parse_annotation_value_char(input); |
| 403 | if !did_consume { |
| 404 | break; |
| 405 | } |
| 406 | input = unconsumed; |
| 407 | } |
| 408 | let value = mkvalue(input); |
| 409 | Ok(Parsed { value, input }) |
| 410 | } |
| 411 | |
| 412 | fn parse_tz_annotation_leading_char<'i>( |
| 413 | &self, |
| 414 | input: &'i [u8], |
| 415 | ) -> Result<Parsed<'i, ()>, Error> { |
| 416 | if input.is_empty() { |
| 417 | return Err(err!( |
| 418 | "expected the start of an RFC 9557 annotation or IANA \ |
| 419 | time zone component name, but found end of input instead" , |
| 420 | )); |
| 421 | } |
| 422 | if !matches!(input[0], b'_' | b'.' | b'A' ..=b'Z' | b'a' ..=b'z' ) { |
| 423 | return Err(err!( |
| 424 | "expected ASCII alphabetic byte (or underscore or period) \ |
| 425 | at the start of an RFC 9557 annotation or time zone \ |
| 426 | component name, but found {:?} instead" , |
| 427 | escape::Byte(input[0]), |
| 428 | )); |
| 429 | } |
| 430 | Ok(Parsed { value: (), input: &input[1..] }) |
| 431 | } |
| 432 | |
| 433 | fn parse_tz_annotation_char<'i>( |
| 434 | &self, |
| 435 | input: &'i [u8], |
| 436 | ) -> Parsed<'i, bool> { |
| 437 | let is_tz_annotation_char = |byte| { |
| 438 | matches!( |
| 439 | byte, |
| 440 | b'_' | b'.' | b'+' | b'-' | b'0' ..=b'9' | b'A' ..=b'Z' | b'a' ..=b'z' , |
| 441 | ) |
| 442 | }; |
| 443 | if input.is_empty() || !is_tz_annotation_char(input[0]) { |
| 444 | return Parsed { value: false, input }; |
| 445 | } |
| 446 | Parsed { value: true, input: &input[1..] } |
| 447 | } |
| 448 | |
| 449 | fn parse_annotation_key_leading_char<'i>( |
| 450 | &self, |
| 451 | input: &'i [u8], |
| 452 | ) -> Result<Parsed<'i, ()>, Error> { |
| 453 | if input.is_empty() { |
| 454 | return Err(err!( |
| 455 | "expected the start of an RFC 9557 annotation key, \ |
| 456 | but found end of input instead" , |
| 457 | )); |
| 458 | } |
| 459 | if !matches!(input[0], b'_' | b'a' ..=b'z' ) { |
| 460 | return Err(err!( |
| 461 | "expected lowercase alphabetic byte (or underscore) \ |
| 462 | at the start of an RFC 9557 annotation key, \ |
| 463 | but found {:?} instead" , |
| 464 | escape::Byte(input[0]), |
| 465 | )); |
| 466 | } |
| 467 | Ok(Parsed { value: (), input: &input[1..] }) |
| 468 | } |
| 469 | |
| 470 | fn parse_annotation_key_char<'i>( |
| 471 | &self, |
| 472 | input: &'i [u8], |
| 473 | ) -> Parsed<'i, bool> { |
| 474 | let is_annotation_key_char = |
| 475 | |byte| matches!(byte, b'_' | b'-' | b'0' ..=b'9' | b'a' ..=b'z' ); |
| 476 | if input.is_empty() || !is_annotation_key_char(input[0]) { |
| 477 | return Parsed { value: false, input }; |
| 478 | } |
| 479 | Parsed { value: true, input: &input[1..] } |
| 480 | } |
| 481 | |
| 482 | fn parse_annotation_value_leading_char<'i>( |
| 483 | &self, |
| 484 | input: &'i [u8], |
| 485 | ) -> Result<Parsed<'i, ()>, Error> { |
| 486 | if input.is_empty() { |
| 487 | return Err(err!( |
| 488 | "expected the start of an RFC 9557 annotation value, \ |
| 489 | but found end of input instead" , |
| 490 | )); |
| 491 | } |
| 492 | if !matches!(input[0], b'0' ..=b'9' | b'A' ..=b'Z' | b'a' ..=b'z' ) { |
| 493 | return Err(err!( |
| 494 | "expected alphanumeric ASCII byte \ |
| 495 | at the start of an RFC 9557 annotation value, \ |
| 496 | but found {:?} instead" , |
| 497 | escape::Byte(input[0]), |
| 498 | )); |
| 499 | } |
| 500 | Ok(Parsed { value: (), input: &input[1..] }) |
| 501 | } |
| 502 | |
| 503 | fn parse_annotation_value_char<'i>( |
| 504 | &self, |
| 505 | input: &'i [u8], |
| 506 | ) -> Parsed<'i, bool> { |
| 507 | let is_annotation_value_char = |
| 508 | |byte| matches!(byte, b'0' ..=b'9' | b'A' ..=b'Z' | b'a' ..=b'z' ); |
| 509 | if input.is_empty() || !is_annotation_value_char(input[0]) { |
| 510 | return Parsed { value: false, input }; |
| 511 | } |
| 512 | Parsed { value: true, input: &input[1..] } |
| 513 | } |
| 514 | |
| 515 | fn parse_annotation_separator<'i>( |
| 516 | &self, |
| 517 | input: &'i [u8], |
| 518 | ) -> Result<Parsed<'i, ()>, Error> { |
| 519 | if input.is_empty() { |
| 520 | return Err(err!( |
| 521 | "expected an '=' after parsing an RFC 9557 annotation key, \ |
| 522 | but found end of input instead" , |
| 523 | )); |
| 524 | } |
| 525 | if input[0] != b'=' { |
| 526 | // If we see a /, then it's likely the user was trying to insert a |
| 527 | // time zone annotation in the wrong place. |
| 528 | return Err(if input[0] == b'/' { |
| 529 | err!( |
| 530 | "expected an '=' after parsing an RFC 9557 annotation \ |
| 531 | key, but found / instead (time zone annotations must \ |
| 532 | come first)" , |
| 533 | ) |
| 534 | } else { |
| 535 | err!( |
| 536 | "expected an '=' after parsing an RFC 9557 annotation \ |
| 537 | key, but found {:?} instead" , |
| 538 | escape::Byte(input[0]), |
| 539 | ) |
| 540 | }); |
| 541 | } |
| 542 | Ok(Parsed { value: (), input: &input[1..] }) |
| 543 | } |
| 544 | |
| 545 | fn parse_annotation_close<'i>( |
| 546 | &self, |
| 547 | input: &'i [u8], |
| 548 | ) -> Result<Parsed<'i, ()>, Error> { |
| 549 | if input.is_empty() { |
| 550 | return Err(err!( |
| 551 | "expected an ']' after parsing an RFC 9557 annotation key \ |
| 552 | and value, but found end of input instead" , |
| 553 | )); |
| 554 | } |
| 555 | if input[0] != b']' { |
| 556 | return Err(err!( |
| 557 | "expected an ']' after parsing an RFC 9557 annotation key \ |
| 558 | and value, but found {:?} instead" , |
| 559 | escape::Byte(input[0]), |
| 560 | )); |
| 561 | } |
| 562 | Ok(Parsed { value: (), input: &input[1..] }) |
| 563 | } |
| 564 | |
| 565 | fn parse_tz_annotation_close<'i>( |
| 566 | &self, |
| 567 | input: &'i [u8], |
| 568 | ) -> Result<Parsed<'i, ()>, Error> { |
| 569 | if input.is_empty() { |
| 570 | return Err(err!( |
| 571 | "expected an ']' after parsing an RFC 9557 time zone \ |
| 572 | annotation, but found end of input instead" , |
| 573 | )); |
| 574 | } |
| 575 | if input[0] != b']' { |
| 576 | return Err(err!( |
| 577 | "expected an ']' after parsing an RFC 9557 time zone \ |
| 578 | annotation, but found {:?} instead" , |
| 579 | escape::Byte(input[0]), |
| 580 | )); |
| 581 | } |
| 582 | Ok(Parsed { value: (), input: &input[1..] }) |
| 583 | } |
| 584 | } |
| 585 | |
| 586 | #[cfg (test)] |
| 587 | mod tests { |
| 588 | use super::*; |
| 589 | |
| 590 | #[test ] |
| 591 | fn ok_time_zone() { |
| 592 | if crate::tz::db().is_definitively_empty() { |
| 593 | return; |
| 594 | } |
| 595 | |
| 596 | let p = |input| { |
| 597 | Parser::new() |
| 598 | .parse(input) |
| 599 | .unwrap() |
| 600 | .value |
| 601 | .to_time_zone_annotation() |
| 602 | .unwrap() |
| 603 | .map(|ann| (ann.to_time_zone().unwrap(), ann.is_critical())) |
| 604 | }; |
| 605 | |
| 606 | insta::assert_debug_snapshot!(p(b"[America/New_York]" ), @r###" |
| 607 | Some( |
| 608 | ( |
| 609 | TimeZone( |
| 610 | TZif( |
| 611 | "America/New_York", |
| 612 | ), |
| 613 | ), |
| 614 | false, |
| 615 | ), |
| 616 | ) |
| 617 | "### ); |
| 618 | insta::assert_debug_snapshot!(p(b"[!America/New_York]" ), @r###" |
| 619 | Some( |
| 620 | ( |
| 621 | TimeZone( |
| 622 | TZif( |
| 623 | "America/New_York", |
| 624 | ), |
| 625 | ), |
| 626 | true, |
| 627 | ), |
| 628 | ) |
| 629 | "### ); |
| 630 | insta::assert_debug_snapshot!(p(b"[america/new_york]" ), @r###" |
| 631 | Some( |
| 632 | ( |
| 633 | TimeZone( |
| 634 | TZif( |
| 635 | "America/New_York", |
| 636 | ), |
| 637 | ), |
| 638 | false, |
| 639 | ), |
| 640 | ) |
| 641 | "### ); |
| 642 | insta::assert_debug_snapshot!(p(b"[+25:59]" ), @r###" |
| 643 | Some( |
| 644 | ( |
| 645 | TimeZone( |
| 646 | 25:59:00, |
| 647 | ), |
| 648 | false, |
| 649 | ), |
| 650 | ) |
| 651 | "### ); |
| 652 | insta::assert_debug_snapshot!(p(b"[-25:59]" ), @r###" |
| 653 | Some( |
| 654 | ( |
| 655 | TimeZone( |
| 656 | -25:59:00, |
| 657 | ), |
| 658 | false, |
| 659 | ), |
| 660 | ) |
| 661 | "### ); |
| 662 | } |
| 663 | |
| 664 | #[test ] |
| 665 | fn ok_empty() { |
| 666 | let p = |input| Parser::new().parse(input).unwrap(); |
| 667 | |
| 668 | insta::assert_debug_snapshot!(p(b"" ), @r###" |
| 669 | Parsed { |
| 670 | value: ParsedAnnotations { |
| 671 | input: "", |
| 672 | time_zone: None, |
| 673 | }, |
| 674 | input: "", |
| 675 | } |
| 676 | "### ); |
| 677 | insta::assert_debug_snapshot!(p(b"blah" ), @r###" |
| 678 | Parsed { |
| 679 | value: ParsedAnnotations { |
| 680 | input: "", |
| 681 | time_zone: None, |
| 682 | }, |
| 683 | input: "blah", |
| 684 | } |
| 685 | "### ); |
| 686 | } |
| 687 | |
| 688 | #[test ] |
| 689 | fn ok_unsupported() { |
| 690 | let p = |input| Parser::new().parse(input).unwrap(); |
| 691 | |
| 692 | insta::assert_debug_snapshot!( |
| 693 | p(b"[u-ca=chinese]" ), |
| 694 | @r###" |
| 695 | Parsed { |
| 696 | value: ParsedAnnotations { |
| 697 | input: "[u-ca=chinese]", |
| 698 | time_zone: None, |
| 699 | }, |
| 700 | input: "", |
| 701 | } |
| 702 | "### , |
| 703 | ); |
| 704 | insta::assert_debug_snapshot!( |
| 705 | p(b"[u-ca=chinese-japanese]" ), |
| 706 | @r###" |
| 707 | Parsed { |
| 708 | value: ParsedAnnotations { |
| 709 | input: "[u-ca=chinese-japanese]", |
| 710 | time_zone: None, |
| 711 | }, |
| 712 | input: "", |
| 713 | } |
| 714 | "### , |
| 715 | ); |
| 716 | insta::assert_debug_snapshot!( |
| 717 | p(b"[u-ca=chinese-japanese-russian]" ), |
| 718 | @r###" |
| 719 | Parsed { |
| 720 | value: ParsedAnnotations { |
| 721 | input: "[u-ca=chinese-japanese-russian]", |
| 722 | time_zone: None, |
| 723 | }, |
| 724 | input: "", |
| 725 | } |
| 726 | "### , |
| 727 | ); |
| 728 | } |
| 729 | |
| 730 | #[test ] |
| 731 | fn ok_iana() { |
| 732 | let p = |input| Parser::new().parse(input).unwrap(); |
| 733 | |
| 734 | insta::assert_debug_snapshot!(p(b"[America/New_York]" ), @r###" |
| 735 | Parsed { |
| 736 | value: ParsedAnnotations { |
| 737 | input: "[America/New_York]", |
| 738 | time_zone: Some( |
| 739 | Named { |
| 740 | critical: false, |
| 741 | name: "America/New_York", |
| 742 | }, |
| 743 | ), |
| 744 | }, |
| 745 | input: "", |
| 746 | } |
| 747 | "### ); |
| 748 | insta::assert_debug_snapshot!(p(b"[!America/New_York]" ), @r###" |
| 749 | Parsed { |
| 750 | value: ParsedAnnotations { |
| 751 | input: "[!America/New_York]", |
| 752 | time_zone: Some( |
| 753 | Named { |
| 754 | critical: true, |
| 755 | name: "America/New_York", |
| 756 | }, |
| 757 | ), |
| 758 | }, |
| 759 | input: "", |
| 760 | } |
| 761 | "### ); |
| 762 | insta::assert_debug_snapshot!(p(b"[UTC]" ), @r###" |
| 763 | Parsed { |
| 764 | value: ParsedAnnotations { |
| 765 | input: "[UTC]", |
| 766 | time_zone: Some( |
| 767 | Named { |
| 768 | critical: false, |
| 769 | name: "UTC", |
| 770 | }, |
| 771 | ), |
| 772 | }, |
| 773 | input: "", |
| 774 | } |
| 775 | "### ); |
| 776 | insta::assert_debug_snapshot!(p(b"[.._foo_../.0+-]" ), @r###" |
| 777 | Parsed { |
| 778 | value: ParsedAnnotations { |
| 779 | input: "[.._foo_../.0+-]", |
| 780 | time_zone: Some( |
| 781 | Named { |
| 782 | critical: false, |
| 783 | name: ".._foo_../.0+-", |
| 784 | }, |
| 785 | ), |
| 786 | }, |
| 787 | input: "", |
| 788 | } |
| 789 | "### ); |
| 790 | } |
| 791 | |
| 792 | #[test ] |
| 793 | fn ok_offset() { |
| 794 | let p = |input| Parser::new().parse(input).unwrap(); |
| 795 | |
| 796 | insta::assert_debug_snapshot!(p(b"[-00]" ), @r###" |
| 797 | Parsed { |
| 798 | value: ParsedAnnotations { |
| 799 | input: "[-00]", |
| 800 | time_zone: Some( |
| 801 | Offset { |
| 802 | critical: false, |
| 803 | offset: ParsedOffset { |
| 804 | kind: Numeric( |
| 805 | -00, |
| 806 | ), |
| 807 | }, |
| 808 | }, |
| 809 | ), |
| 810 | }, |
| 811 | input: "", |
| 812 | } |
| 813 | "### ); |
| 814 | insta::assert_debug_snapshot!(p(b"[+00]" ), @r###" |
| 815 | Parsed { |
| 816 | value: ParsedAnnotations { |
| 817 | input: "[+00]", |
| 818 | time_zone: Some( |
| 819 | Offset { |
| 820 | critical: false, |
| 821 | offset: ParsedOffset { |
| 822 | kind: Numeric( |
| 823 | +00, |
| 824 | ), |
| 825 | }, |
| 826 | }, |
| 827 | ), |
| 828 | }, |
| 829 | input: "", |
| 830 | } |
| 831 | "### ); |
| 832 | insta::assert_debug_snapshot!(p(b"[-05]" ), @r###" |
| 833 | Parsed { |
| 834 | value: ParsedAnnotations { |
| 835 | input: "[-05]", |
| 836 | time_zone: Some( |
| 837 | Offset { |
| 838 | critical: false, |
| 839 | offset: ParsedOffset { |
| 840 | kind: Numeric( |
| 841 | -05, |
| 842 | ), |
| 843 | }, |
| 844 | }, |
| 845 | ), |
| 846 | }, |
| 847 | input: "", |
| 848 | } |
| 849 | "### ); |
| 850 | insta::assert_debug_snapshot!(p(b"[!+05:12]" ), @r###" |
| 851 | Parsed { |
| 852 | value: ParsedAnnotations { |
| 853 | input: "[!+05:12]", |
| 854 | time_zone: Some( |
| 855 | Offset { |
| 856 | critical: true, |
| 857 | offset: ParsedOffset { |
| 858 | kind: Numeric( |
| 859 | +05:12, |
| 860 | ), |
| 861 | }, |
| 862 | }, |
| 863 | ), |
| 864 | }, |
| 865 | input: "", |
| 866 | } |
| 867 | "### ); |
| 868 | } |
| 869 | |
| 870 | #[test ] |
| 871 | fn ok_iana_unsupported() { |
| 872 | let p = |input| Parser::new().parse(input).unwrap(); |
| 873 | |
| 874 | insta::assert_debug_snapshot!( |
| 875 | p(b"[America/New_York][u-ca=chinese-japanese-russian]" ), |
| 876 | @r###" |
| 877 | Parsed { |
| 878 | value: ParsedAnnotations { |
| 879 | input: "[America/New_York][u-ca=chinese-japanese-russian]", |
| 880 | time_zone: Some( |
| 881 | Named { |
| 882 | critical: false, |
| 883 | name: "America/New_York", |
| 884 | }, |
| 885 | ), |
| 886 | }, |
| 887 | input: "", |
| 888 | } |
| 889 | "### , |
| 890 | ); |
| 891 | } |
| 892 | |
| 893 | #[test ] |
| 894 | fn err_iana() { |
| 895 | insta::assert_snapshot!( |
| 896 | Parser::new().parse(b"[0/Foo]" ).unwrap_err(), |
| 897 | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"### , |
| 898 | ); |
| 899 | insta::assert_snapshot!( |
| 900 | Parser::new().parse(b"[Foo/0Bar]" ).unwrap_err(), |
| 901 | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"### , |
| 902 | ); |
| 903 | } |
| 904 | |
| 905 | #[test ] |
| 906 | fn err_offset() { |
| 907 | insta::assert_snapshot!( |
| 908 | Parser::new().parse(b"[+" ).unwrap_err(), |
| 909 | @r###"failed to parse hours in UTC numeric offset "+": expected two digit hour after sign, but found end of input"### , |
| 910 | ); |
| 911 | insta::assert_snapshot!( |
| 912 | Parser::new().parse(b"[+26]" ).unwrap_err(), |
| 913 | @r###"failed to parse hours in UTC numeric offset "+26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"### , |
| 914 | ); |
| 915 | insta::assert_snapshot!( |
| 916 | Parser::new().parse(b"[-26]" ).unwrap_err(), |
| 917 | @r###"failed to parse hours in UTC numeric offset "-26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"### , |
| 918 | ); |
| 919 | insta::assert_snapshot!( |
| 920 | Parser::new().parse(b"[+05:12:34]" ).unwrap_err(), |
| 921 | @r###"subminute precision for UTC numeric offset "+05:12:34]" is not enabled in this context (must provide only integral minutes)"### , |
| 922 | ); |
| 923 | insta::assert_snapshot!( |
| 924 | Parser::new().parse(b"[+05:12:34.123456789]" ).unwrap_err(), |
| 925 | @r###"subminute precision for UTC numeric offset "+05:12:34.123456789]" is not enabled in this context (must provide only integral minutes)"### , |
| 926 | ); |
| 927 | } |
| 928 | |
| 929 | #[test ] |
| 930 | fn err_critical_unsupported() { |
| 931 | insta::assert_snapshot!( |
| 932 | Parser::new().parse(b"[!u-ca=chinese]" ).unwrap_err(), |
| 933 | @r###"found unsupported RFC 9557 annotation with key "u-ca" with the critical flag ('!') set"### , |
| 934 | ); |
| 935 | } |
| 936 | |
| 937 | #[test ] |
| 938 | fn err_key_leading_char() { |
| 939 | insta::assert_snapshot!( |
| 940 | Parser::new().parse(b"[" ).unwrap_err(), |
| 941 | @"expected the start of an RFC 9557 annotation or IANA time zone component name, but found end of input instead" , |
| 942 | ); |
| 943 | insta::assert_snapshot!( |
| 944 | Parser::new().parse(b"[&" ).unwrap_err(), |
| 945 | @r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "&" instead"### , |
| 946 | ); |
| 947 | insta::assert_snapshot!( |
| 948 | Parser::new().parse(b"[Foo][" ).unwrap_err(), |
| 949 | @"expected the start of an RFC 9557 annotation key, but found end of input instead" , |
| 950 | ); |
| 951 | insta::assert_snapshot!( |
| 952 | Parser::new().parse(b"[Foo][&" ).unwrap_err(), |
| 953 | @r###"expected lowercase alphabetic byte (or underscore) at the start of an RFC 9557 annotation key, but found "&" instead"### , |
| 954 | ); |
| 955 | } |
| 956 | |
| 957 | #[test ] |
| 958 | fn err_separator() { |
| 959 | insta::assert_snapshot!( |
| 960 | Parser::new().parse(b"[abc" ).unwrap_err(), |
| 961 | @"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead" , |
| 962 | ); |
| 963 | insta::assert_snapshot!( |
| 964 | Parser::new().parse(b"[_abc" ).unwrap_err(), |
| 965 | @"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead" , |
| 966 | ); |
| 967 | insta::assert_snapshot!( |
| 968 | Parser::new().parse(b"[abc^" ).unwrap_err(), |
| 969 | @r###"expected an ']' after parsing an RFC 9557 time zone annotation, but found "^" instead"### , |
| 970 | ); |
| 971 | insta::assert_snapshot!( |
| 972 | Parser::new().parse(b"[Foo][abc" ).unwrap_err(), |
| 973 | @"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead" , |
| 974 | ); |
| 975 | insta::assert_snapshot!( |
| 976 | Parser::new().parse(b"[Foo][_abc" ).unwrap_err(), |
| 977 | @"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead" , |
| 978 | ); |
| 979 | insta::assert_snapshot!( |
| 980 | Parser::new().parse(b"[Foo][abc^" ).unwrap_err(), |
| 981 | @r###"expected an '=' after parsing an RFC 9557 annotation key, but found "^" instead"### , |
| 982 | ); |
| 983 | } |
| 984 | |
| 985 | #[test ] |
| 986 | fn err_value() { |
| 987 | insta::assert_snapshot!( |
| 988 | Parser::new().parse(b"[abc=" ).unwrap_err(), |
| 989 | @"expected the start of an RFC 9557 annotation value, but found end of input instead" , |
| 990 | ); |
| 991 | insta::assert_snapshot!( |
| 992 | Parser::new().parse(b"[_abc=" ).unwrap_err(), |
| 993 | @"expected the start of an RFC 9557 annotation value, but found end of input instead" , |
| 994 | ); |
| 995 | insta::assert_snapshot!( |
| 996 | Parser::new().parse(b"[abc=^" ).unwrap_err(), |
| 997 | @r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "^" instead"### , |
| 998 | ); |
| 999 | insta::assert_snapshot!( |
| 1000 | Parser::new().parse(b"[abc=]" ).unwrap_err(), |
| 1001 | @r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "]" instead"### , |
| 1002 | ); |
| 1003 | } |
| 1004 | |
| 1005 | #[test ] |
| 1006 | fn err_close() { |
| 1007 | insta::assert_snapshot!( |
| 1008 | Parser::new().parse(b"[abc=123" ).unwrap_err(), |
| 1009 | @"expected an ']' after parsing an RFC 9557 annotation key and value, but found end of input instead" , |
| 1010 | ); |
| 1011 | insta::assert_snapshot!( |
| 1012 | Parser::new().parse(b"[abc=123*" ).unwrap_err(), |
| 1013 | @r###"expected an ']' after parsing an RFC 9557 annotation key and value, but found "*" instead"### , |
| 1014 | ); |
| 1015 | } |
| 1016 | |
| 1017 | #[cfg (feature = "std" )] |
| 1018 | #[test ] |
| 1019 | fn err_time_zone_db_lookup() { |
| 1020 | // The error message snapshotted below can vary based on tzdb |
| 1021 | // config, so only run this when we know we've got a real tzdb. |
| 1022 | if crate::tz::db().is_definitively_empty() { |
| 1023 | return; |
| 1024 | } |
| 1025 | |
| 1026 | let p = |input| { |
| 1027 | Parser::new() |
| 1028 | .parse(input) |
| 1029 | .unwrap() |
| 1030 | .value |
| 1031 | .to_time_zone_annotation() |
| 1032 | .unwrap() |
| 1033 | .unwrap() |
| 1034 | .to_time_zone() |
| 1035 | .unwrap_err() |
| 1036 | }; |
| 1037 | |
| 1038 | insta::assert_snapshot!( |
| 1039 | p(b"[Foo]" ), |
| 1040 | @"failed to find time zone `Foo` in time zone database" , |
| 1041 | ); |
| 1042 | } |
| 1043 | |
| 1044 | #[test ] |
| 1045 | fn err_repeated_time_zone() { |
| 1046 | let p = |input| Parser::new().parse(input).unwrap_err(); |
| 1047 | insta::assert_snapshot!( |
| 1048 | p(b"[america/new_york][america/new_york]" ), |
| 1049 | @"expected an '=' after parsing an RFC 9557 annotation key, but found / instead (time zone annotations must come first)" , |
| 1050 | ); |
| 1051 | } |
| 1052 | } |
| 1053 | |