1// This is a part of Chrono.
2// See README.md and LICENSE.txt for details.
3
4/*!
5 * Various scanning routines for the parser.
6 */
7
8use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT};
9use crate::Weekday;
10
11/// Tries to parse the non-negative number from `min` to `max` digits.
12///
13/// The absence of digits at all is an unconditional error.
14/// More than `max` digits are consumed up to the first `max` digits.
15/// Any number that does not fit in `i64` is an error.
16#[inline]
17pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
18 assert!(min <= max);
19
20 // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
21 // the first non-numeric byte, which may be another ascii character or beginning of multi-byte
22 // UTF-8 character.
23 let bytes = s.as_bytes();
24 if bytes.len() < min {
25 return Err(TOO_SHORT);
26 }
27
28 let mut n = 0i64;
29 for (i, c) in bytes.iter().take(max).cloned().enumerate() {
30 // cloned() = copied()
31 if !c.is_ascii_digit() {
32 if i < min {
33 return Err(INVALID);
34 } else {
35 return Ok((&s[i..], n));
36 }
37 }
38
39 n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
40 Some(n) => n,
41 None => return Err(OUT_OF_RANGE),
42 };
43 }
44
45 Ok((&s[core::cmp::min(max, bytes.len())..], n))
46}
47
48/// Tries to consume at least one digits as a fractional second.
49/// Returns the number of whole nanoseconds (0--999,999,999).
50pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
51 // record the number of digits consumed for later scaling.
52 let origlen: usize = s.len();
53 let (s: &str, v: i64) = number(s, min:1, max:9)?;
54 let consumed: usize = origlen - s.len();
55
56 // scale the number accordingly.
57 static SCALE: [i64; 10] =
58 [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
59 let v: i64 = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
60
61 // if there are more than 9 digits, skip next digits.
62 let s: &str = s.trim_start_matches(|c: char| c.is_ascii_digit());
63
64 Ok((s, v))
65}
66
67/// Tries to consume a fixed number of digits as a fractional second.
68/// Returns the number of whole nanoseconds (0--999,999,999).
69pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
70 // record the number of digits consumed for later scaling.
71 let (s: &str, v: i64) = number(s, min:digits, max:digits)?;
72
73 // scale the number accordingly.
74 static SCALE: [i64; 10] =
75 [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
76 let v: i64 = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
77
78 Ok((s, v))
79}
80
81/// Tries to parse the month index (0 through 11) with the first three ASCII letters.
82pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
83 if s.len() < 3 {
84 return Err(TOO_SHORT);
85 }
86 let buf: &[u8] = s.as_bytes();
87 let month0: u8 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
88 (b'j', b'a', b'n') => 0,
89 (b'f', b'e', b'b') => 1,
90 (b'm', b'a', b'r') => 2,
91 (b'a', b'p', b'r') => 3,
92 (b'm', b'a', b'y') => 4,
93 (b'j', b'u', b'n') => 5,
94 (b'j', b'u', b'l') => 6,
95 (b'a', b'u', b'g') => 7,
96 (b's', b'e', b'p') => 8,
97 (b'o', b'c', b't') => 9,
98 (b'n', b'o', b'v') => 10,
99 (b'd', b'e', b'c') => 11,
100 _ => return Err(INVALID),
101 };
102 Ok((&s[3..], month0))
103}
104
105/// Tries to parse the weekday with the first three ASCII letters.
106pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
107 if s.len() < 3 {
108 return Err(TOO_SHORT);
109 }
110 let buf: &[u8] = s.as_bytes();
111 let weekday: Weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
112 (b'm', b'o', b'n') => Weekday::Mon,
113 (b't', b'u', b'e') => Weekday::Tue,
114 (b'w', b'e', b'd') => Weekday::Wed,
115 (b't', b'h', b'u') => Weekday::Thu,
116 (b'f', b'r', b'i') => Weekday::Fri,
117 (b's', b'a', b't') => Weekday::Sat,
118 (b's', b'u', b'n') => Weekday::Sun,
119 _ => return Err(INVALID),
120 };
121 Ok((&s[3..], weekday))
122}
123
124/// Tries to parse the month index (0 through 11) with short or long month names.
125/// It prefers long month names to short month names when both are possible.
126pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
127 // lowercased month names, minus first three chars
128 static LONG_MONTH_SUFFIXES: [&[u8]; 12] = [
129 b"uary", b"ruary", b"ch", b"il", b"", b"e", b"y", b"ust", b"tember", b"ober", b"ember",
130 b"ember",
131 ];
132
133 let (mut s: &str, month0: u8) = short_month0(s)?;
134
135 // tries to consume the suffix if possible
136 let suffix: &[u8] = LONG_MONTH_SUFFIXES[month0 as usize];
137 if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) {
138 s = &s[suffix.len()..];
139 }
140
141 Ok((s, month0))
142}
143
144/// Tries to parse the weekday with short or long weekday names.
145/// It prefers long weekday names to short weekday names when both are possible.
146pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
147 // lowercased weekday names, minus first three chars
148 static LONG_WEEKDAY_SUFFIXES: [&[u8]; 7] =
149 [b"day", b"sday", b"nesday", b"rsday", b"day", b"urday", b"day"];
150
151 let (mut s: &str, weekday: Weekday) = short_weekday(s)?;
152
153 // tries to consume the suffix if possible
154 let suffix: &[u8] = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
155 if s.len() >= suffix.len() && s.as_bytes()[..suffix.len()].eq_ignore_ascii_case(suffix) {
156 s = &s[suffix.len()..];
157 }
158
159 Ok((s, weekday))
160}
161
162/// Tries to consume exactly one given character.
163pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> {
164 match s.as_bytes().first() {
165 Some(&c: u8) if c == c1 => Ok(&s[1..]),
166 Some(_) => Err(INVALID),
167 None => Err(TOO_SHORT),
168 }
169}
170
171/// Tries to consume one or more whitespace.
172pub(super) fn space(s: &str) -> ParseResult<&str> {
173 let s_: &str = s.trim_start();
174 if s_.len() < s.len() {
175 Ok(s_)
176 } else if s.is_empty() {
177 Err(TOO_SHORT)
178 } else {
179 Err(INVALID)
180 }
181}
182
183/// Consumes any number (including zero) of colon or spaces.
184pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> {
185 Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace()))
186}
187
188/// Parse a timezone from `s` and return the offset in seconds.
189///
190/// The `consume_colon` function is used to parse a mandatory or optional `:`
191/// separator between hours offset and minutes offset.
192///
193/// The `allow_missing_minutes` flag allows the timezone minutes offset to be
194/// missing from `s`.
195///
196/// The `allow_tz_minus_sign` flag allows the timezone offset negative character
197/// to also be `−` MINUS SIGN (U+2212) in addition to the typical
198/// ASCII-compatible `-` HYPHEN-MINUS (U+2D).
199/// This is part of [RFC 3339 & ISO 8601].
200///
201/// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
202pub(crate) fn timezone_offset<F>(
203 mut s: &str,
204 mut consume_colon: F,
205 allow_zulu: bool,
206 allow_missing_minutes: bool,
207 allow_tz_minus_sign: bool,
208) -> ParseResult<(&str, i32)>
209where
210 F: FnMut(&str) -> ParseResult<&str>,
211{
212 if allow_zulu {
213 if let Some(&b'Z' | &b'z') = s.as_bytes().first() {
214 return Ok((&s[1..], 0));
215 }
216 }
217
218 const fn digits(s: &str) -> ParseResult<(u8, u8)> {
219 let b = s.as_bytes();
220 if b.len() < 2 {
221 Err(TOO_SHORT)
222 } else {
223 Ok((b[0], b[1]))
224 }
225 }
226 let negative = match s.chars().next() {
227 Some('+') => {
228 // PLUS SIGN (U+2B)
229 s = &s['+'.len_utf8()..];
230
231 false
232 }
233 Some('-') => {
234 // HYPHEN-MINUS (U+2D)
235 s = &s['-'.len_utf8()..];
236
237 true
238 }
239 Some('−') => {
240 // MINUS SIGN (U+2212)
241 if !allow_tz_minus_sign {
242 return Err(INVALID);
243 }
244 s = &s['−'.len_utf8()..];
245
246 true
247 }
248 Some(_) => return Err(INVALID),
249 None => return Err(TOO_SHORT),
250 };
251
252 // hours (00--99)
253 let hours = match digits(s)? {
254 (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
255 _ => return Err(INVALID),
256 };
257 s = &s[2..];
258
259 // colons (and possibly other separators)
260 s = consume_colon(s)?;
261
262 // minutes (00--59)
263 // if the next two items are digits then we have to add minutes
264 let minutes = if let Ok(ds) = digits(s) {
265 match ds {
266 (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
267 (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE),
268 _ => return Err(INVALID),
269 }
270 } else if allow_missing_minutes {
271 0
272 } else {
273 return Err(TOO_SHORT);
274 };
275 s = match s.len() {
276 len if len >= 2 => &s[2..],
277 0 => s,
278 _ => return Err(TOO_SHORT),
279 };
280
281 let seconds = hours * 3600 + minutes * 60;
282 Ok((s, if negative { -seconds } else { seconds }))
283}
284
285/// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
286/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
287/// See [RFC 2822 Section 4.3].
288///
289/// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3
290pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, i32)> {
291 // tries to parse legacy time zone names
292 let upto = s.as_bytes().iter().position(|&c| !c.is_ascii_alphabetic()).unwrap_or(s.len());
293 if upto > 0 {
294 let name = &s.as_bytes()[..upto];
295 let s = &s[upto..];
296 let offset_hours = |o| Ok((s, o * 3600));
297 // RFC 2822 requires support for some named North America timezones, a small subset of all
298 // named timezones.
299 if name.eq_ignore_ascii_case(b"gmt")
300 || name.eq_ignore_ascii_case(b"ut")
301 || name.eq_ignore_ascii_case(b"z")
302 {
303 return offset_hours(0);
304 } else if name.eq_ignore_ascii_case(b"edt") {
305 return offset_hours(-4);
306 } else if name.eq_ignore_ascii_case(b"est") || name.eq_ignore_ascii_case(b"cdt") {
307 return offset_hours(-5);
308 } else if name.eq_ignore_ascii_case(b"cst") || name.eq_ignore_ascii_case(b"mdt") {
309 return offset_hours(-6);
310 } else if name.eq_ignore_ascii_case(b"mst") || name.eq_ignore_ascii_case(b"pdt") {
311 return offset_hours(-7);
312 } else if name.eq_ignore_ascii_case(b"pst") {
313 return offset_hours(-8);
314 } else if name.len() == 1 {
315 if let b'a'..=b'i' | b'k'..=b'y' | b'A'..=b'I' | b'K'..=b'Y' = name[0] {
316 // recommended by RFC 2822: consume but treat it as -0000
317 return Ok((s, 0));
318 }
319 }
320 Err(INVALID)
321 } else {
322 timezone_offset(s, |s| Ok(s), false, false, false)
323 }
324}
325
326/// Tries to consume an RFC2822 comment including preceding ` `.
327///
328/// Returns the remaining string after the closing parenthesis.
329pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> {
330 use CommentState::*;
331
332 let s: &str = s.trim_start();
333
334 let mut state: CommentState = Start;
335 for (i: usize, c: u8) in s.bytes().enumerate() {
336 state = match (state, c) {
337 (Start, b'(') => Next(1),
338 (Next(1), b')') => return Ok((&s[i + 1..], ())),
339 (Next(depth: usize), b'\\') => Escape(depth),
340 (Next(depth: usize), b'(') => Next(depth + 1),
341 (Next(depth: usize), b')') => Next(depth - 1),
342 (Next(depth: usize), _) | (Escape(depth: usize), _) => Next(depth),
343 _ => return Err(INVALID),
344 };
345 }
346
347 Err(TOO_SHORT)
348}
349
350enum CommentState {
351 Start,
352 Next(usize),
353 Escape(usize),
354}
355
356#[cfg(test)]
357mod tests {
358 use super::{
359 comment_2822, nanosecond, nanosecond_fixed, short_or_long_month0, short_or_long_weekday,
360 timezone_offset_2822,
361 };
362 use crate::format::{INVALID, TOO_SHORT};
363 use crate::Weekday;
364
365 #[test]
366 fn test_rfc2822_comments() {
367 let testdata = [
368 ("", Err(TOO_SHORT)),
369 (" ", Err(TOO_SHORT)),
370 ("x", Err(INVALID)),
371 ("(", Err(TOO_SHORT)),
372 ("()", Ok("")),
373 (" \r\n\t()", Ok("")),
374 ("() ", Ok(" ")),
375 ("()z", Ok("z")),
376 ("(x)", Ok("")),
377 ("(())", Ok("")),
378 ("((()))", Ok("")),
379 ("(x(x(x)x)x)", Ok("")),
380 ("( x ( x ( x ) x ) x )", Ok("")),
381 (r"(\)", Err(TOO_SHORT)),
382 (r"(\()", Ok("")),
383 (r"(\))", Ok("")),
384 (r"(\\)", Ok("")),
385 ("(()())", Ok("")),
386 ("( x ( x ) x ( x ) x )", Ok("")),
387 ];
388
389 for (test_in, expected) in testdata.iter() {
390 let actual = comment_2822(test_in).map(|(s, _)| s);
391 assert_eq!(
392 *expected, actual,
393 "{:?} expected to produce {:?}, but produced {:?}.",
394 test_in, expected, actual
395 );
396 }
397 }
398
399 #[test]
400 fn test_timezone_offset_2822() {
401 assert_eq!(timezone_offset_2822("cSt").unwrap(), ("", -21600));
402 assert_eq!(timezone_offset_2822("pSt").unwrap(), ("", -28800));
403 assert_eq!(timezone_offset_2822("mSt").unwrap(), ("", -25200));
404 assert_eq!(timezone_offset_2822("-1551").unwrap(), ("", -57060));
405 assert_eq!(timezone_offset_2822("Gp"), Err(INVALID));
406 }
407
408 #[test]
409 fn test_short_or_long_month0() {
410 assert_eq!(short_or_long_month0("JUn").unwrap(), ("", 5));
411 assert_eq!(short_or_long_month0("mAy").unwrap(), ("", 4));
412 assert_eq!(short_or_long_month0("AuG").unwrap(), ("", 7));
413 assert_eq!(short_or_long_month0("Aprâ").unwrap(), ("â", 3));
414 assert_eq!(short_or_long_month0("JUl").unwrap(), ("", 6));
415 assert_eq!(short_or_long_month0("mAr").unwrap(), ("", 2));
416 assert_eq!(short_or_long_month0("Jan").unwrap(), ("", 0));
417 }
418
419 #[test]
420 fn test_short_or_long_weekday() {
421 assert_eq!(short_or_long_weekday("sAtu").unwrap(), ("u", Weekday::Sat));
422 assert_eq!(short_or_long_weekday("thu").unwrap(), ("", Weekday::Thu));
423 }
424
425 #[test]
426 fn test_nanosecond_fixed() {
427 assert_eq!(nanosecond_fixed("", 0usize).unwrap(), ("", 0));
428 assert!(nanosecond_fixed("", 1usize).is_err());
429 }
430
431 #[test]
432 fn test_nanosecond() {
433 assert_eq!(nanosecond("2Ù").unwrap(), ("Ù", 200000000));
434 assert_eq!(nanosecond("8").unwrap(), ("", 800000000));
435 }
436}
437