1 | /*! |
2 | This module provides facilities for parsing time zone offsets. |
3 | |
4 | The parsing here follows primarily from [RFC 3339] and [ISO 8601], but also |
5 | from [Temporal's hybrid grammar]. |
6 | |
7 | [RFC 3339]: https://www.rfc-editor.org/rfc/rfc3339 |
8 | [ISO 8601]: https://www.iso.org/iso-8601-date-and-time-format.html |
9 | [Temporal's hybrid grammar]: https://tc39.es/proposal-temporal/#sec-temporal-iso8601grammar |
10 | */ |
11 | |
12 | // Here's the specific part of Temporal's grammar that is implemented below: |
13 | // |
14 | // # Parser::new().zulu(true).subminute(true).parse(b"...") |
15 | // DateTimeUTCOffset ::: |
16 | // UTCDesignator |
17 | // UTCOffsetSubMinutePrecision |
18 | // |
19 | // # Parser::new().zulu(false).subminute(false).parse(b"...") |
20 | // TimeZoneUTCOffsetName ::: |
21 | // UTCOffsetMinutePrecision |
22 | // |
23 | // UTCDesignator ::: one of |
24 | // Z z |
25 | // |
26 | // UTCOffsetSubMinutePrecision ::: |
27 | // UTCOffsetMinutePrecision |
28 | // UTCOffsetWithSubMinuteComponents[+Extended] |
29 | // UTCOffsetWithSubMinuteComponents[~Extended] |
30 | // |
31 | // UTCOffsetMinutePrecision ::: |
32 | // TemporalSign Hour |
33 | // TemporalSign Hour TimeSeparator[+Extended] MinuteSecond |
34 | // TemporalSign Hour TimeSeparator[~Extended] MinuteSecond |
35 | // |
36 | // UTCOffsetWithSubMinuteComponents[Extended] ::: |
37 | // TemporalSign Hour |
38 | // TimeSeparator[?Extended] MinuteSecond |
39 | // TimeSeparator[?Extended] MinuteSecond |
40 | // TemporalDecimalFraction[opt] |
41 | // |
42 | // TimeSeparator[Extended] ::: |
43 | // [+Extended] : |
44 | // [~Extended] [empty] |
45 | // |
46 | // TemporalSign ::: |
47 | // ASCIISign |
48 | // <MINUS> |
49 | // |
50 | // ASCIISign ::: one of |
51 | // + - |
52 | // |
53 | // Hour ::: |
54 | // 0 DecimalDigit |
55 | // 1 DecimalDigit |
56 | // 20 |
57 | // 21 |
58 | // 22 |
59 | // 23 |
60 | // |
61 | // MinuteSecond ::: |
62 | // 0 DecimalDigit |
63 | // 1 DecimalDigit |
64 | // 2 DecimalDigit |
65 | // 3 DecimalDigit |
66 | // 4 DecimalDigit |
67 | // 5 DecimalDigit |
68 | // |
69 | // DecimalDigit :: one of |
70 | // 0 1 2 3 4 5 6 7 8 9 |
71 | // |
72 | // TemporalDecimalFraction ::: |
73 | // TemporalDecimalSeparator DecimalDigit |
74 | // TemporalDecimalSeparator DecimalDigit DecimalDigit |
75 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
76 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
77 | // DecimalDigit |
78 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
79 | // DecimalDigit DecimalDigit |
80 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
81 | // DecimalDigit DecimalDigit DecimalDigit |
82 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
83 | // DecimalDigit DecimalDigit DecimalDigit |
84 | // DecimalDigit |
85 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
86 | // DecimalDigit DecimalDigit DecimalDigit |
87 | // DecimalDigit DecimalDigit |
88 | // TemporalDecimalSeparator DecimalDigit DecimalDigit DecimalDigit |
89 | // DecimalDigit DecimalDigit DecimalDigit |
90 | // DecimalDigit DecimalDigit DecimalDigit |
91 | // TemporalDecimalSeparator ::: one of |
92 | // . , |
93 | // |
94 | // The quick summary of the above is that offsets up to nanosecond precision |
95 | // are supported. The general format is `{+,-}HH[:MM[:SS[.NNNNNNNNN]]]`. But |
96 | // ISO 8601 extended or basic formats are also supported. For example, the |
97 | // basic format `-0530` is equivalent to the extended format `-05:30`. |
98 | // |
99 | // Note that even though we support parsing up to nanosecond precision, Jiff |
100 | // currently only supports offsets up to second precision. I don't think there |
101 | // is any real practical need for any greater precision, but I don't think it |
102 | // would be too hard to switch an `Offset` from an `i32` representation in |
103 | // seconds to a `i64` representation in nanoseconds. (Since it only needs to |
104 | // support a span of time of about 52 hours or so.) |
105 | |
106 | use crate::{ |
107 | error::{err, Error, ErrorContext}, |
108 | fmt::{ |
109 | temporal::{PiecesNumericOffset, PiecesOffset}, |
110 | util::{parse_temporal_fraction, FractionalFormatter}, |
111 | Parsed, |
112 | }, |
113 | tz::Offset, |
114 | util::{ |
115 | escape, parse, |
116 | rangeint::{ri8, RFrom}, |
117 | t::{self, C}, |
118 | }, |
119 | }; |
120 | |
121 | // We define our own ranged types because we want them to only be positive. We |
122 | // represent the sign explicitly as a separate field. But the range supported |
123 | // is the same as the component fields of `Offset`. |
124 | type ParsedOffsetHours = ri8<0, { t::SpanZoneOffsetHours::MAX }>; |
125 | type ParsedOffsetMinutes = ri8<0, { t::SpanZoneOffsetMinutes::MAX }>; |
126 | type ParsedOffsetSeconds = ri8<0, { t::SpanZoneOffsetSeconds::MAX }>; |
127 | |
128 | /// An offset that has been parsed from a datetime string. |
129 | /// |
130 | /// This represents either a Zulu offset (corresponding to UTC with an unknown |
131 | /// time zone offset), or a specific numeric offset given in hours, minutes, |
132 | /// seconds and nanoseconds (with everything except hours being optional). |
133 | #[derive (Debug)] |
134 | pub(crate) struct ParsedOffset { |
135 | /// The kind of offset parsed. |
136 | kind: ParsedOffsetKind, |
137 | } |
138 | |
139 | impl ParsedOffset { |
140 | /// Convert a parsed offset into a Jiff offset. |
141 | /// |
142 | /// If the offset was parsed from a Zulu designator, then the offset |
143 | /// returned is indistinguishable from `+00` or `-00`. |
144 | /// |
145 | /// # Errors |
146 | /// |
147 | /// A variety of parsing errors are possible. |
148 | /// |
149 | /// Also, beyond normal range checks on the allowed components of a UTC |
150 | /// offset, this does rounding based on the fractional nanosecond part. As |
151 | /// a result, if the parsed value would be rounded to a value not in bounds |
152 | /// for a Jiff offset, this returns an error. |
153 | pub(crate) fn to_offset(&self) -> Result<Offset, Error> { |
154 | match self.kind { |
155 | ParsedOffsetKind::Zulu => Ok(Offset::UTC), |
156 | ParsedOffsetKind::Numeric(ref numeric) => numeric.to_offset(), |
157 | } |
158 | } |
159 | |
160 | /// Convert a parsed offset to a more structured representation. |
161 | /// |
162 | /// This is like `to_offset`, but preserves `Z` and `-00:00` versus |
163 | /// `+00:00`. This does still attempt to create an `Offset`, and that |
164 | /// construction can fail. |
165 | pub(crate) fn to_pieces_offset(&self) -> Result<PiecesOffset, Error> { |
166 | match self.kind { |
167 | ParsedOffsetKind::Zulu => Ok(PiecesOffset::Zulu), |
168 | ParsedOffsetKind::Numeric(ref numeric) => { |
169 | let mut off = PiecesNumericOffset::from(numeric.to_offset()?); |
170 | if numeric.sign < C(0) { |
171 | off = off.with_negative_zero(); |
172 | } |
173 | Ok(PiecesOffset::from(off)) |
174 | } |
175 | } |
176 | } |
177 | |
178 | /// Whether this parsed offset corresponds to Zulu time or not. |
179 | /// |
180 | /// This is useful in error reporting for parsing civil times. Namely, we |
181 | /// report an error when parsing a civil time with a Zulu offset since it |
182 | /// is almost always the wrong thing to do. |
183 | pub(crate) fn is_zulu(&self) -> bool { |
184 | matches!(self.kind, ParsedOffsetKind::Zulu) |
185 | } |
186 | } |
187 | |
188 | /// The kind of a parsed offset. |
189 | #[derive (Debug)] |
190 | enum ParsedOffsetKind { |
191 | /// The zulu offset, corresponding to UTC in a context where the offset for |
192 | /// civil time is unknown or unavailable. |
193 | Zulu, |
194 | /// The specific numeric offset. |
195 | Numeric(Numeric), |
196 | } |
197 | |
198 | /// A numeric representation of a UTC offset. |
199 | struct Numeric { |
200 | /// The sign that was parsed from the numeric UTC offset. This is always |
201 | /// either `1` or `-1`, never `0`. |
202 | sign: t::Sign, |
203 | /// The hours component. This is non-optional because every UTC offset must |
204 | /// have at least hours. |
205 | hours: ParsedOffsetHours, |
206 | /// The minutes component. |
207 | minutes: Option<ParsedOffsetMinutes>, |
208 | /// The seconds component. This is only possible when subminute resolution |
209 | /// is enabled. |
210 | seconds: Option<ParsedOffsetSeconds>, |
211 | /// The nanoseconds fractional component. This is only possible when |
212 | /// subminute resolution is enabled. |
213 | nanoseconds: Option<t::SubsecNanosecond>, |
214 | } |
215 | |
216 | impl Numeric { |
217 | /// Convert a parsed numeric offset into a Jiff offset. |
218 | /// |
219 | /// This does rounding based on the fractional nanosecond part. As a |
220 | /// result, if the parsed value would be rounded to a value not in bounds |
221 | /// for a Jiff offset, this returns an error. |
222 | fn to_offset(&self) -> Result<Offset, Error> { |
223 | let mut seconds = t::SpanZoneOffset::rfrom(C(3_600) * self.hours); |
224 | if let Some(part_minutes) = self.minutes { |
225 | seconds += C(60) * part_minutes; |
226 | } |
227 | if let Some(part_seconds) = self.seconds { |
228 | seconds += part_seconds; |
229 | } |
230 | if let Some(part_nanoseconds) = self.nanoseconds { |
231 | if part_nanoseconds >= C(500_000_000) { |
232 | seconds = seconds |
233 | .try_checked_add("offset-seconds" , C(1)) |
234 | .with_context(|| { |
235 | err!( |
236 | "due to precision loss, UTC offset ' {}' is \ |
237 | rounded to a value that is out of bounds" , |
238 | self, |
239 | ) |
240 | })?; |
241 | } |
242 | } |
243 | Ok(Offset::from_seconds_ranged(seconds * self.sign)) |
244 | } |
245 | } |
246 | |
247 | // This impl is just used for error messages when converting a `Numeric` to an |
248 | // `Offset` fails. |
249 | impl core::fmt::Display for Numeric { |
250 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
251 | if self.sign == C(constant:-1) { |
252 | write!(f, "-" )?; |
253 | } else { |
254 | write!(f, "+" )?; |
255 | } |
256 | write!(f, " {:02}" , self.hours)?; |
257 | if let Some(minutes: ri8<0, _>) = self.minutes { |
258 | write!(f, ": {:02}" , minutes)?; |
259 | } |
260 | if let Some(seconds: ri8<0, _>) = self.seconds { |
261 | write!(f, ": {:02}" , seconds)?; |
262 | } |
263 | if let Some(nanos: ri32<0, _>) = self.nanoseconds { |
264 | static FMT: FractionalFormatter = FractionalFormatter::new(); |
265 | write!(f, ". {}" , FMT.format(i64::from(nanos)).as_str())?; |
266 | } |
267 | Ok(()) |
268 | } |
269 | } |
270 | |
271 | // We give a succinct Debug impl (identical to Display) to make snapshot |
272 | // testing a bit nicer. |
273 | impl core::fmt::Debug for Numeric { |
274 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
275 | core::fmt::Display::fmt(self, f) |
276 | } |
277 | } |
278 | |
279 | /// A parser for UTC offsets. |
280 | /// |
281 | /// At time of writing, the typical configuration for offset parsing is to |
282 | /// enable Zulu support and subminute precision. But when parsing zoned |
283 | /// datetimes, and specifically, offsets within time zone annotations (the RFC |
284 | /// 9557 extension to RFC 3339), then neither zulu nor subminute support are |
285 | /// enabled. |
286 | /// |
287 | /// N.B. I'm not actually totally clear on why zulu/subminute aren't allowed in |
288 | /// time zone annotations, but that's what Temporal's grammar seems to dictate. |
289 | /// One might argue that this is what RFCs 3339 and 9557 require, but the |
290 | /// Temporal grammar is already recognizing a superset anyway. |
291 | #[derive (Debug)] |
292 | pub(crate) struct Parser { |
293 | zulu: bool, |
294 | subminute: bool, |
295 | subsecond: bool, |
296 | } |
297 | |
298 | impl Parser { |
299 | /// Create a new UTC offset parser with the default configuration. |
300 | pub(crate) const fn new() -> Parser { |
301 | Parser { zulu: true, subminute: true, subsecond: true } |
302 | } |
303 | |
304 | /// When enabled, the `z` and `Z` designators are recognized as a "zulu" |
305 | /// indicator for UTC when the civil time offset is unknown or unavailable. |
306 | /// |
307 | /// When disabled, neither `z` nor `Z` will be recognized and a parser |
308 | /// error will occur if one is found. |
309 | /// |
310 | /// This is enabled by default. |
311 | pub(crate) const fn zulu(self, yes: bool) -> Parser { |
312 | Parser { zulu: yes, ..self } |
313 | } |
314 | |
315 | /// When enabled, offsets with precision greater than integral minutes |
316 | /// are supported. Specifically, when enabled, nanosecond precision is |
317 | /// supported. |
318 | /// |
319 | /// When disabled, offsets must be integral minutes. And the `subsecond` |
320 | /// option is ignored. |
321 | pub(crate) const fn subminute(self, yes: bool) -> Parser { |
322 | Parser { subminute: yes, ..self } |
323 | } |
324 | |
325 | /// When enabled, offsets with precision greater than integral seconds |
326 | /// are supported. Specifically, when enabled, nanosecond precision is |
327 | /// supported. Note though that when a fractional second is found, it is |
328 | /// used to round to the nearest second. (Jiff's `Offset` type only has |
329 | /// second resolution.) |
330 | /// |
331 | /// When disabled, offsets must be integral seconds (or integrate minutes |
332 | /// if the `subminute` option is disabled as well). |
333 | /// |
334 | /// This is ignored if `subminute` is disabled. |
335 | pub(crate) const fn subsecond(self, yes: bool) -> Parser { |
336 | Parser { subsecond: yes, ..self } |
337 | } |
338 | |
339 | /// Parse an offset from the beginning of `input`. |
340 | /// |
341 | /// If no offset could be found or it was otherwise invalid, then an error |
342 | /// is returned. |
343 | /// |
344 | /// In general, parsing stops when, after all required components are seen, |
345 | /// an optional component is not present (either because of the end of the |
346 | /// input or because of a character that cannot possibly begin said optional |
347 | /// component). This does mean that there are some corner cases where error |
348 | /// messages will not be as good as they possibly can be. But there are |
349 | /// two exceptions here: |
350 | /// |
351 | /// 1. When Zulu support is disabled and a `Z` or `z` are found, then an |
352 | /// error is returned indicating that `Z` was recognized but specifically |
353 | /// not allowed. |
354 | /// 2. When subminute precision is disabled and a `:` is found after the |
355 | /// minutes component, then an error is returned indicating that the |
356 | /// seconds component was recognized but specifically not allowed. |
357 | /// |
358 | /// Otherwise, for example, if `input` is `-0512:34`, then the `-0512` |
359 | /// will be parsed as `-5 hours, 12 minutes` with an offset of `5`. |
360 | /// Presumably, whatever higher level parser is invoking this routine will |
361 | /// then see an unexpected `:`. But it's likely that a better error message |
362 | /// would call out the fact that mixed basic and extended formats (from |
363 | /// ISO 8601) aren't allowed, and that the offset needs to be written as |
364 | /// either `-05:12:34` or `-051234`. But... these are odd corner cases, so |
365 | /// we abide them. |
366 | pub(crate) fn parse<'i>( |
367 | &self, |
368 | mut input: &'i [u8], |
369 | ) -> Result<Parsed<'i, ParsedOffset>, Error> { |
370 | if input.is_empty() { |
371 | return Err(err!("expected UTC offset, but found end of input" )); |
372 | } |
373 | |
374 | if input[0] == b'Z' || input[0] == b'z' { |
375 | if !self.zulu { |
376 | return Err(err!( |
377 | "found {z:?} in {original:?} where a numeric UTC offset \ |
378 | was expected (this context does not permit \ |
379 | the Zulu offset)" , |
380 | z = escape::Byte(input[0]), |
381 | original = escape::Bytes(input), |
382 | )); |
383 | } |
384 | input = &input[1..]; |
385 | let value = ParsedOffset { kind: ParsedOffsetKind::Zulu }; |
386 | return Ok(Parsed { value, input }); |
387 | } |
388 | let Parsed { value: numeric, input } = self.parse_numeric(input)?; |
389 | let value = ParsedOffset { kind: ParsedOffsetKind::Numeric(numeric) }; |
390 | Ok(Parsed { value, input }) |
391 | } |
392 | |
393 | /// Like `parse`, but will return `None` if `input` cannot possibly start |
394 | /// with an offset. |
395 | /// |
396 | /// Basically, if `input` is empty, or is not one of `z`, `Z`, `+` or `-` |
397 | /// then this returns `None`. |
398 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
399 | pub(crate) fn parse_optional<'i>( |
400 | &self, |
401 | input: &'i [u8], |
402 | ) -> Result<Parsed<'i, Option<ParsedOffset>>, Error> { |
403 | let Some(first) = input.first().copied() else { |
404 | return Ok(Parsed { value: None, input }); |
405 | }; |
406 | if !matches!(first, b'z' | b'Z' | b'+' | b'-' ) { |
407 | return Ok(Parsed { value: None, input }); |
408 | } |
409 | let Parsed { value, input } = self.parse(input)?; |
410 | Ok(Parsed { value: Some(value), input }) |
411 | } |
412 | |
413 | /// Parses a numeric offset from the beginning of `input`. |
414 | /// |
415 | /// The beginning of the input is expected to start with a `+` or a `-`. |
416 | /// Any other case (including an empty string) will result in an error. |
417 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
418 | fn parse_numeric<'i>( |
419 | &self, |
420 | input: &'i [u8], |
421 | ) -> Result<Parsed<'i, Numeric>, Error> { |
422 | let original = escape::Bytes(input); |
423 | |
424 | // Parse sign component. |
425 | let Parsed { value: sign, input } = |
426 | self.parse_sign(input).with_context(|| { |
427 | err!("failed to parse sign in UTC numeric offset {original:?}" ) |
428 | })?; |
429 | |
430 | // Parse hours component. |
431 | let Parsed { value: hours, input } = |
432 | self.parse_hours(input).with_context(|| { |
433 | err!( |
434 | "failed to parse hours in UTC numeric offset {original:?}" |
435 | ) |
436 | })?; |
437 | let extended = input.starts_with(b":" ); |
438 | |
439 | // Start building up our numeric offset value. |
440 | let mut numeric = Numeric { |
441 | sign, |
442 | hours, |
443 | minutes: None, |
444 | seconds: None, |
445 | nanoseconds: None, |
446 | }; |
447 | |
448 | // Parse optional separator after hours. |
449 | let Parsed { value: has_minutes, input } = |
450 | self.parse_separator(input, extended).with_context(|| { |
451 | err!( |
452 | "failed to parse separator after hours in \ |
453 | UTC numeric offset {original:?}" |
454 | ) |
455 | })?; |
456 | if !has_minutes { |
457 | return Ok(Parsed { value: numeric, input }); |
458 | } |
459 | |
460 | // Parse minutes component. |
461 | let Parsed { value: minutes, input } = |
462 | self.parse_minutes(input).with_context(|| { |
463 | err!( |
464 | "failed to parse minutes in UTC numeric offset \ |
465 | {original:?}" |
466 | ) |
467 | })?; |
468 | numeric.minutes = Some(minutes); |
469 | |
470 | // If subminute resolution is not supported, then we're done here. |
471 | if !self.subminute { |
472 | // While we generally try to "stop" parsing once we're done |
473 | // seeing things we expect, in this case, if we see a colon, it |
474 | // almost certainly indicates that someone has tried to provide |
475 | // more precision than is supported. So we return an error here. |
476 | // If this winds up being problematic, we can make this error |
477 | // configuration or remove it altogether (unfortunate). |
478 | if input.get(0).map_or(false, |&b| b == b':' ) { |
479 | return Err(err!( |
480 | "subminute precision for UTC numeric offset {original:?} \ |
481 | is not enabled in this context (must provide only \ |
482 | integral minutes)" , |
483 | )); |
484 | } |
485 | return Ok(Parsed { value: numeric, input }); |
486 | } |
487 | |
488 | // Parse optional separator after minutes. |
489 | let Parsed { value: has_seconds, input } = |
490 | self.parse_separator(input, extended).with_context(|| { |
491 | err!( |
492 | "failed to parse separator after minutes in \ |
493 | UTC numeric offset {original:?}" |
494 | ) |
495 | })?; |
496 | if !has_seconds { |
497 | return Ok(Parsed { value: numeric, input }); |
498 | } |
499 | |
500 | // Parse seconds component. |
501 | let Parsed { value: seconds, input } = |
502 | self.parse_seconds(input).with_context(|| { |
503 | err!( |
504 | "failed to parse seconds in UTC numeric offset \ |
505 | {original:?}" |
506 | ) |
507 | })?; |
508 | numeric.seconds = Some(seconds); |
509 | |
510 | // If subsecond resolution is not supported, then we're done here. |
511 | if !self.subsecond { |
512 | if input.get(0).map_or(false, |&b| b == b'.' || b == b',' ) { |
513 | return Err(err!( |
514 | "subsecond precision for UTC numeric offset {original:?} \ |
515 | is not enabled in this context (must provide only \ |
516 | integral minutes or seconds)" , |
517 | )); |
518 | } |
519 | return Ok(Parsed { value: numeric, input }); |
520 | } |
521 | |
522 | // Parse an optional fractional component. |
523 | let Parsed { value: nanoseconds, input } = |
524 | parse_temporal_fraction(input).with_context(|| { |
525 | err!( |
526 | "failed to parse fractional nanoseconds in \ |
527 | UTC numeric offset {original:?}" , |
528 | ) |
529 | })?; |
530 | numeric.nanoseconds = nanoseconds; |
531 | Ok(Parsed { value: numeric, input }) |
532 | } |
533 | |
534 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
535 | fn parse_sign<'i>( |
536 | &self, |
537 | input: &'i [u8], |
538 | ) -> Result<Parsed<'i, t::Sign>, Error> { |
539 | let sign = input.get(0).copied().ok_or_else(|| { |
540 | err!("expected UTC numeric offset, but found end of input" ) |
541 | })?; |
542 | let sign = if sign == b'+' { |
543 | t::Sign::N::<1>() |
544 | } else if sign == b'-' { |
545 | t::Sign::N::<-1>() |
546 | } else { |
547 | return Err(err!( |
548 | "expected '+' or '-' sign at start of UTC numeric offset, \ |
549 | but found {found:?} instead" , |
550 | found = escape::Byte(sign), |
551 | )); |
552 | }; |
553 | Ok(Parsed { value: sign, input: &input[1..] }) |
554 | } |
555 | |
556 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
557 | fn parse_hours<'i>( |
558 | &self, |
559 | input: &'i [u8], |
560 | ) -> Result<Parsed<'i, ParsedOffsetHours>, Error> { |
561 | let (hours, input) = parse::split(input, 2).ok_or_else(|| { |
562 | err!("expected two digit hour after sign, but found end of input" ,) |
563 | })?; |
564 | let hours = parse::i64(hours).with_context(|| { |
565 | err!( |
566 | "failed to parse {hours:?} as hours (a two digit integer)" , |
567 | hours = escape::Bytes(hours), |
568 | ) |
569 | })?; |
570 | // Note that we support a slightly bigger range of offsets than |
571 | // Temporal. Temporal seems to support only up to 23 hours, but |
572 | // we go up to 25 hours. This is done to support POSIX time zone |
573 | // strings, which also require 25 hours (plus the maximal minute/second |
574 | // components). |
575 | let hours = ParsedOffsetHours::try_new("hours" , hours) |
576 | .context("offset hours are not valid" )?; |
577 | Ok(Parsed { value: hours, input }) |
578 | } |
579 | |
580 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
581 | fn parse_minutes<'i>( |
582 | &self, |
583 | input: &'i [u8], |
584 | ) -> Result<Parsed<'i, ParsedOffsetMinutes>, Error> { |
585 | let (minutes, input) = parse::split(input, 2).ok_or_else(|| { |
586 | err!( |
587 | "expected two digit minute after hours, \ |
588 | but found end of input" , |
589 | ) |
590 | })?; |
591 | let minutes = parse::i64(minutes).with_context(|| { |
592 | err!( |
593 | "failed to parse {minutes:?} as minutes (a two digit integer)" , |
594 | minutes = escape::Bytes(minutes), |
595 | ) |
596 | })?; |
597 | let minutes = ParsedOffsetMinutes::try_new("minutes" , minutes) |
598 | .context("minutes are not valid" )?; |
599 | Ok(Parsed { value: minutes, input }) |
600 | } |
601 | |
602 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
603 | fn parse_seconds<'i>( |
604 | &self, |
605 | input: &'i [u8], |
606 | ) -> Result<Parsed<'i, ParsedOffsetSeconds>, Error> { |
607 | let (seconds, input) = parse::split(input, 2).ok_or_else(|| { |
608 | err!( |
609 | "expected two digit second after hours, \ |
610 | but found end of input" , |
611 | ) |
612 | })?; |
613 | let seconds = parse::i64(seconds).with_context(|| { |
614 | err!( |
615 | "failed to parse {seconds:?} as seconds (a two digit integer)" , |
616 | seconds = escape::Bytes(seconds), |
617 | ) |
618 | })?; |
619 | let seconds = ParsedOffsetSeconds::try_new("seconds" , seconds) |
620 | .context("time zone offset seconds are not valid" )?; |
621 | Ok(Parsed { value: seconds, input }) |
622 | } |
623 | |
624 | /// Parses a separator between hours/minutes or minutes/seconds. When |
625 | /// `true` is returned, we expect to parse the next component. When `false` |
626 | /// is returned, then no separator was found and there is no expectation of |
627 | /// finding another component. |
628 | /// |
629 | /// When in extended mode, true is returned if and only if a separator is |
630 | /// found. |
631 | /// |
632 | /// When in basic mode (not extended), then a subsequent component is only |
633 | /// expected when `input` begins with two ASCII digits. |
634 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
635 | fn parse_separator<'i>( |
636 | &self, |
637 | mut input: &'i [u8], |
638 | extended: bool, |
639 | ) -> Result<Parsed<'i, bool>, Error> { |
640 | if !extended { |
641 | let expected = |
642 | input.len() >= 2 && input[..2].iter().all(u8::is_ascii_digit); |
643 | return Ok(Parsed { value: expected, input }); |
644 | } |
645 | let is_separator = input.get(0).map_or(false, |&b| b == b':' ); |
646 | if is_separator { |
647 | input = &input[1..]; |
648 | } |
649 | Ok(Parsed { value: is_separator, input }) |
650 | } |
651 | } |
652 | |
653 | #[cfg (test)] |
654 | mod tests { |
655 | use crate::util::rangeint::RInto; |
656 | |
657 | use super::*; |
658 | |
659 | #[test ] |
660 | fn ok_zulu() { |
661 | let p = |input| Parser::new().parse(input).unwrap(); |
662 | |
663 | insta::assert_debug_snapshot!(p(b"Z" ), @r###" |
664 | Parsed { |
665 | value: ParsedOffset { |
666 | kind: Zulu, |
667 | }, |
668 | input: "", |
669 | } |
670 | "### ); |
671 | insta::assert_debug_snapshot!(p(b"z" ), @r###" |
672 | Parsed { |
673 | value: ParsedOffset { |
674 | kind: Zulu, |
675 | }, |
676 | input: "", |
677 | } |
678 | "### ); |
679 | } |
680 | |
681 | #[test ] |
682 | fn ok_numeric() { |
683 | let p = |input| Parser::new().parse(input).unwrap(); |
684 | |
685 | insta::assert_debug_snapshot!(p(b"-05" ), @r###" |
686 | Parsed { |
687 | value: ParsedOffset { |
688 | kind: Numeric( |
689 | -05, |
690 | ), |
691 | }, |
692 | input: "", |
693 | } |
694 | "### ); |
695 | } |
696 | |
697 | // Successful parse tests where the offset ends at the end of the string. |
698 | #[test ] |
699 | fn ok_numeric_complete() { |
700 | let p = |input| Parser::new().parse_numeric(input).unwrap(); |
701 | |
702 | insta::assert_debug_snapshot!(p(b"-05" ), @r###" |
703 | Parsed { |
704 | value: -05, |
705 | input: "", |
706 | } |
707 | "### ); |
708 | insta::assert_debug_snapshot!(p(b"+05" ), @r###" |
709 | Parsed { |
710 | value: +05, |
711 | input: "", |
712 | } |
713 | "### ); |
714 | |
715 | insta::assert_debug_snapshot!(p(b"+25:59" ), @r###" |
716 | Parsed { |
717 | value: +25:59, |
718 | input: "", |
719 | } |
720 | "### ); |
721 | insta::assert_debug_snapshot!(p(b"+2559" ), @r###" |
722 | Parsed { |
723 | value: +25:59, |
724 | input: "", |
725 | } |
726 | "### ); |
727 | |
728 | insta::assert_debug_snapshot!(p(b"+25:59:59" ), @r###" |
729 | Parsed { |
730 | value: +25:59:59, |
731 | input: "", |
732 | } |
733 | "### ); |
734 | insta::assert_debug_snapshot!(p(b"+255959" ), @r###" |
735 | Parsed { |
736 | value: +25:59:59, |
737 | input: "", |
738 | } |
739 | "### ); |
740 | |
741 | insta::assert_debug_snapshot!(p(b"+25:59:59.999" ), @r###" |
742 | Parsed { |
743 | value: +25:59:59.999, |
744 | input: "", |
745 | } |
746 | "### ); |
747 | insta::assert_debug_snapshot!(p(b"+25:59:59,999" ), @r###" |
748 | Parsed { |
749 | value: +25:59:59.999, |
750 | input: "", |
751 | } |
752 | "### ); |
753 | insta::assert_debug_snapshot!(p(b"+255959.999" ), @r###" |
754 | Parsed { |
755 | value: +25:59:59.999, |
756 | input: "", |
757 | } |
758 | "### ); |
759 | insta::assert_debug_snapshot!(p(b"+255959,999" ), @r###" |
760 | Parsed { |
761 | value: +25:59:59.999, |
762 | input: "", |
763 | } |
764 | "### ); |
765 | |
766 | insta::assert_debug_snapshot!(p(b"+25:59:59.999999999" ), @r###" |
767 | Parsed { |
768 | value: +25:59:59.999999999, |
769 | input: "", |
770 | } |
771 | "### ); |
772 | } |
773 | |
774 | // Successful parse tests where the offset ends before the end of the |
775 | // string. |
776 | #[test ] |
777 | fn ok_numeric_incomplete() { |
778 | let p = |input| Parser::new().parse_numeric(input).unwrap(); |
779 | |
780 | insta::assert_debug_snapshot!(p(b"-05a" ), @r###" |
781 | Parsed { |
782 | value: -05, |
783 | input: "a", |
784 | } |
785 | "### ); |
786 | insta::assert_debug_snapshot!(p(b"-05:12a" ), @r###" |
787 | Parsed { |
788 | value: -05:12, |
789 | input: "a", |
790 | } |
791 | "### ); |
792 | insta::assert_debug_snapshot!(p(b"-05:12." ), @r###" |
793 | Parsed { |
794 | value: -05:12, |
795 | input: ".", |
796 | } |
797 | "### ); |
798 | insta::assert_debug_snapshot!(p(b"-05:12," ), @r###" |
799 | Parsed { |
800 | value: -05:12, |
801 | input: ",", |
802 | } |
803 | "### ); |
804 | insta::assert_debug_snapshot!(p(b"-0512a" ), @r###" |
805 | Parsed { |
806 | value: -05:12, |
807 | input: "a", |
808 | } |
809 | "### ); |
810 | insta::assert_debug_snapshot!(p(b"-0512:" ), @r###" |
811 | Parsed { |
812 | value: -05:12, |
813 | input: ":", |
814 | } |
815 | "### ); |
816 | insta::assert_debug_snapshot!(p(b"-05:12:34a" ), @r###" |
817 | Parsed { |
818 | value: -05:12:34, |
819 | input: "a", |
820 | } |
821 | "### ); |
822 | insta::assert_debug_snapshot!(p(b"-05:12:34.9a" ), @r###" |
823 | Parsed { |
824 | value: -05:12:34.9, |
825 | input: "a", |
826 | } |
827 | "### ); |
828 | insta::assert_debug_snapshot!(p(b"-05:12:34.9." ), @r###" |
829 | Parsed { |
830 | value: -05:12:34.9, |
831 | input: ".", |
832 | } |
833 | "### ); |
834 | insta::assert_debug_snapshot!(p(b"-05:12:34.9," ), @r###" |
835 | Parsed { |
836 | value: -05:12:34.9, |
837 | input: ",", |
838 | } |
839 | "### ); |
840 | } |
841 | |
842 | // An empty string is invalid. The parser is written from the perspective |
843 | // that if it's called, then the caller expects a numeric UTC offset at |
844 | // that position. |
845 | #[test ] |
846 | fn err_numeric_empty() { |
847 | insta::assert_snapshot!( |
848 | Parser::new().parse_numeric(b"" ).unwrap_err(), |
849 | @r###"failed to parse sign in UTC numeric offset "": expected UTC numeric offset, but found end of input"### , |
850 | ); |
851 | } |
852 | |
853 | // A numeric offset always has to begin with a '+' or a '-'. |
854 | #[test ] |
855 | fn err_numeric_notsign() { |
856 | insta::assert_snapshot!( |
857 | Parser::new().parse_numeric(b"*" ).unwrap_err(), |
858 | @r###"failed to parse sign in UTC numeric offset "*": expected '+' or '-' sign at start of UTC numeric offset, but found "*" instead"### , |
859 | ); |
860 | } |
861 | |
862 | // The hours component must be at least two bytes. |
863 | #[test ] |
864 | fn err_numeric_hours_too_short() { |
865 | insta::assert_snapshot!( |
866 | Parser::new().parse_numeric(b"+a" ).unwrap_err(), |
867 | @r###"failed to parse hours in UTC numeric offset "+a": expected two digit hour after sign, but found end of input"### , |
868 | ); |
869 | } |
870 | |
871 | // The hours component must be at least two ASCII digits. |
872 | #[test ] |
873 | fn err_numeric_hours_invalid_digits() { |
874 | insta::assert_snapshot!( |
875 | Parser::new().parse_numeric(b"+ab" ).unwrap_err(), |
876 | @r###"failed to parse hours in UTC numeric offset "+ab": failed to parse "ab" as hours (a two digit integer): invalid digit, expected 0-9 but got a"### , |
877 | ); |
878 | } |
879 | |
880 | // The hours component must be in range. |
881 | #[test ] |
882 | fn err_numeric_hours_out_of_range() { |
883 | insta::assert_snapshot!( |
884 | Parser::new().parse_numeric(b"-26" ).unwrap_err(), |
885 | @r###"failed to parse hours in UTC numeric offset "-26": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"### , |
886 | ); |
887 | } |
888 | |
889 | // The minutes component must be at least two bytes. |
890 | #[test ] |
891 | fn err_numeric_minutes_too_short() { |
892 | insta::assert_snapshot!( |
893 | Parser::new().parse_numeric(b"+05:a" ).unwrap_err(), |
894 | @r###"failed to parse minutes in UTC numeric offset "+05:a": expected two digit minute after hours, but found end of input"### , |
895 | ); |
896 | } |
897 | |
898 | // The minutes component must be at least two ASCII digits. |
899 | #[test ] |
900 | fn err_numeric_minutes_invalid_digits() { |
901 | insta::assert_snapshot!( |
902 | Parser::new().parse_numeric(b"+05:ab" ).unwrap_err(), |
903 | @r###"failed to parse minutes in UTC numeric offset "+05:ab": failed to parse "ab" as minutes (a two digit integer): invalid digit, expected 0-9 but got a"### , |
904 | ); |
905 | } |
906 | |
907 | // The minutes component must be in range. |
908 | #[test ] |
909 | fn err_numeric_minutes_out_of_range() { |
910 | insta::assert_snapshot!( |
911 | Parser::new().parse_numeric(b"-05:60" ).unwrap_err(), |
912 | @r###"failed to parse minutes in UTC numeric offset "-05:60": minutes are not valid: parameter 'minutes' with value 60 is not in the required range of 0..=59"### , |
913 | ); |
914 | } |
915 | |
916 | // The seconds component must be at least two bytes. |
917 | #[test ] |
918 | fn err_numeric_seconds_too_short() { |
919 | insta::assert_snapshot!( |
920 | Parser::new().parse_numeric(b"+05:30:a" ).unwrap_err(), |
921 | @r###"failed to parse seconds in UTC numeric offset "+05:30:a": expected two digit second after hours, but found end of input"### , |
922 | ); |
923 | } |
924 | |
925 | // The seconds component must be at least two ASCII digits. |
926 | #[test ] |
927 | fn err_numeric_seconds_invalid_digits() { |
928 | insta::assert_snapshot!( |
929 | Parser::new().parse_numeric(b"+05:30:ab" ).unwrap_err(), |
930 | @r###"failed to parse seconds in UTC numeric offset "+05:30:ab": failed to parse "ab" as seconds (a two digit integer): invalid digit, expected 0-9 but got a"### , |
931 | ); |
932 | } |
933 | |
934 | // The seconds component must be in range. |
935 | #[test ] |
936 | fn err_numeric_seconds_out_of_range() { |
937 | insta::assert_snapshot!( |
938 | Parser::new().parse_numeric(b"-05:30:60" ).unwrap_err(), |
939 | @r###"failed to parse seconds in UTC numeric offset "-05:30:60": time zone offset seconds are not valid: parameter 'seconds' with value 60 is not in the required range of 0..=59"### , |
940 | ); |
941 | } |
942 | |
943 | // The fraction component, if present as indicated by a separator, must be |
944 | // non-empty. |
945 | #[test ] |
946 | fn err_numeric_fraction_non_empty() { |
947 | insta::assert_snapshot!( |
948 | Parser::new().parse_numeric(b"-05:30:44." ).unwrap_err(), |
949 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44.": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
950 | ); |
951 | insta::assert_snapshot!( |
952 | Parser::new().parse_numeric(b"-05:30:44," ).unwrap_err(), |
953 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44,": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
954 | ); |
955 | |
956 | // Instead of end-of-string, add invalid digit. |
957 | insta::assert_snapshot!( |
958 | Parser::new().parse_numeric(b"-05:30:44.a" ).unwrap_err(), |
959 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44.a": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
960 | ); |
961 | insta::assert_snapshot!( |
962 | Parser::new().parse_numeric(b"-05:30:44,a" ).unwrap_err(), |
963 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-05:30:44,a": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
964 | ); |
965 | |
966 | // And also test basic format. |
967 | insta::assert_snapshot!( |
968 | Parser::new().parse_numeric(b"-053044.a" ).unwrap_err(), |
969 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-053044.a": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
970 | ); |
971 | insta::assert_snapshot!( |
972 | Parser::new().parse_numeric(b"-053044,a" ).unwrap_err(), |
973 | @r###"failed to parse fractional nanoseconds in UTC numeric offset "-053044,a": found decimal after seconds component, but did not find any decimal digits after decimal"### , |
974 | ); |
975 | } |
976 | |
977 | // A special case where it is clear that sub-minute precision has been |
978 | // requested, but that it is has been forcefully disabled. This error is |
979 | // meant to make what is likely a subtle failure mode more explicit. |
980 | #[test ] |
981 | fn err_numeric_subminute_disabled_but_desired() { |
982 | insta::assert_snapshot!( |
983 | Parser::new().subminute(false).parse_numeric(b"-05:59:32" ).unwrap_err(), |
984 | @r###"subminute precision for UTC numeric offset "-05:59:32" is not enabled in this context (must provide only integral minutes)"### , |
985 | ); |
986 | } |
987 | |
988 | // Another special case where Zulu parsing has been explicitly disabled, |
989 | // but a Zulu string was found. |
990 | #[test ] |
991 | fn err_zulu_disabled_but_desired() { |
992 | insta::assert_snapshot!( |
993 | Parser::new().zulu(false).parse(b"Z" ).unwrap_err(), |
994 | @r###"found "Z" in "Z" where a numeric UTC offset was expected (this context does not permit the Zulu offset)"### , |
995 | ); |
996 | insta::assert_snapshot!( |
997 | Parser::new().zulu(false).parse(b"z" ).unwrap_err(), |
998 | @r###"found "z" in "z" where a numeric UTC offset was expected (this context does not permit the Zulu offset)"### , |
999 | ); |
1000 | } |
1001 | |
1002 | // Once a `Numeric` has been parsed, it is almost possible to assume that |
1003 | // it can be infallibly converted to an `Offset`. The one case where this |
1004 | // isn't true is when there is a fractional nanosecond part along with |
1005 | // maximal |
1006 | #[test ] |
1007 | fn err_numeric_too_big_for_offset() { |
1008 | let numeric = Numeric { |
1009 | sign: t::Sign::MAX_SELF, |
1010 | hours: ParsedOffsetHours::MAX_SELF, |
1011 | minutes: Some(ParsedOffsetMinutes::MAX_SELF), |
1012 | seconds: Some(ParsedOffsetSeconds::MAX_SELF), |
1013 | nanoseconds: Some(C(499_999_999).rinto()), |
1014 | }; |
1015 | assert_eq!(numeric.to_offset().unwrap(), Offset::MAX); |
1016 | |
1017 | let numeric = Numeric { |
1018 | sign: t::Sign::MAX_SELF, |
1019 | hours: ParsedOffsetHours::MAX_SELF, |
1020 | minutes: Some(ParsedOffsetMinutes::MAX_SELF), |
1021 | seconds: Some(ParsedOffsetSeconds::MAX_SELF), |
1022 | nanoseconds: Some(C(500_000_000).rinto()), |
1023 | }; |
1024 | insta::assert_snapshot!( |
1025 | numeric.to_offset().unwrap_err(), |
1026 | @"due to precision loss, UTC offset '+25:59:59.5' is rounded to a value that is out of bounds: parameter 'offset-seconds' with value 1 is not in the required range of -93599..=93599" , |
1027 | ); |
1028 | } |
1029 | |
1030 | // Same as numeric_too_big_for_offset, but at the minimum boundary. |
1031 | #[test ] |
1032 | fn err_numeric_too_small_for_offset() { |
1033 | let numeric = Numeric { |
1034 | sign: t::Sign::MIN_SELF, |
1035 | hours: ParsedOffsetHours::MAX_SELF, |
1036 | minutes: Some(ParsedOffsetMinutes::MAX_SELF), |
1037 | seconds: Some(ParsedOffsetSeconds::MAX_SELF), |
1038 | nanoseconds: Some(C(499_999_999).rinto()), |
1039 | }; |
1040 | assert_eq!(numeric.to_offset().unwrap(), Offset::MIN); |
1041 | |
1042 | let numeric = Numeric { |
1043 | sign: t::Sign::MIN_SELF, |
1044 | hours: ParsedOffsetHours::MAX_SELF, |
1045 | minutes: Some(ParsedOffsetMinutes::MAX_SELF), |
1046 | seconds: Some(ParsedOffsetSeconds::MAX_SELF), |
1047 | nanoseconds: Some(C(500_000_000).rinto()), |
1048 | }; |
1049 | insta::assert_snapshot!( |
1050 | numeric.to_offset().unwrap_err(), |
1051 | @"due to precision loss, UTC offset '-25:59:59.5' is rounded to a value that is out of bounds: parameter 'offset-seconds' with value 1 is not in the required range of -93599..=93599" , |
1052 | ); |
1053 | } |
1054 | } |
1055 | |