1// This is a part of Chrono.
2// See README.md and LICENSE.txt for details.
3
4/*!
5 * Various scanning routines for the parser.
6 */
7
8#![allow(deprecated)]
9
10use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT};
11use crate::Weekday;
12
13/// Returns true when two slices are equal case-insensitively (in ASCII).
14/// Assumes that the `pattern` is already converted to lower case.
15fn equals(s: &[u8], pattern: &str) -> bool {
16 let mut xs: impl Iterator = s.iter().map(|&c: u8| match c {
17 b'A'..=b'Z' => c + 32,
18 _ => c,
19 });
20 let mut ys: impl Iterator = pattern.as_bytes().iter().cloned();
21 loop {
22 match (xs.next(), ys.next()) {
23 (None, None) => return true,
24 (None, _) | (_, None) => return false,
25 (Some(x: u8), Some(y: u8)) if x != y => return false,
26 _ => (),
27 }
28 }
29}
30
31/// Tries to parse the non-negative number from `min` to `max` digits.
32///
33/// The absence of digits at all is an unconditional error.
34/// More than `max` digits are consumed up to the first `max` digits.
35/// Any number that does not fit in `i64` is an error.
36#[inline]
37pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
38 assert!(min <= max);
39
40 // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
41 // the first non-numeric byte, which may be another ascii character or beginning of multi-byte
42 // UTF-8 character.
43 let bytes = s.as_bytes();
44 if bytes.len() < min {
45 return Err(TOO_SHORT);
46 }
47
48 let mut n = 0i64;
49 for (i, c) in bytes.iter().take(max).cloned().enumerate() {
50 // cloned() = copied()
51 if !c.is_ascii_digit() {
52 if i < min {
53 return Err(INVALID);
54 } else {
55 return Ok((&s[i..], n));
56 }
57 }
58
59 n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
60 Some(n) => n,
61 None => return Err(OUT_OF_RANGE),
62 };
63 }
64
65 Ok((&s[core::cmp::min(max, bytes.len())..], n))
66}
67
68/// Tries to consume at least one digits as a fractional second.
69/// Returns the number of whole nanoseconds (0--999,999,999).
70pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
71 // record the number of digits consumed for later scaling.
72 let origlen: usize = s.len();
73 let (s: &str, v: i64) = number(s, min:1, max:9)?;
74 let consumed: usize = origlen - s.len();
75
76 // scale the number accordingly.
77 static SCALE: [i64; 10] =
78 [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
79 let v: i64 = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
80
81 // if there are more than 9 digits, skip next digits.
82 let s: &str = s.trim_left_matches(|c: char| c.is_ascii_digit());
83
84 Ok((s, v))
85}
86
87/// Tries to consume a fixed number of digits as a fractional second.
88/// Returns the number of whole nanoseconds (0--999,999,999).
89pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
90 // record the number of digits consumed for later scaling.
91 let (s: &str, v: i64) = number(s, min:digits, max:digits)?;
92
93 // scale the number accordingly.
94 static SCALE: [i64; 10] =
95 [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1];
96 let v: i64 = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
97
98 Ok((s, v))
99}
100
101/// Tries to parse the month index (0 through 11) with the first three ASCII letters.
102pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
103 if s.len() < 3 {
104 return Err(TOO_SHORT);
105 }
106 let buf: &[u8] = s.as_bytes();
107 let month0: u8 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
108 (b'j', b'a', b'n') => 0,
109 (b'f', b'e', b'b') => 1,
110 (b'm', b'a', b'r') => 2,
111 (b'a', b'p', b'r') => 3,
112 (b'm', b'a', b'y') => 4,
113 (b'j', b'u', b'n') => 5,
114 (b'j', b'u', b'l') => 6,
115 (b'a', b'u', b'g') => 7,
116 (b's', b'e', b'p') => 8,
117 (b'o', b'c', b't') => 9,
118 (b'n', b'o', b'v') => 10,
119 (b'd', b'e', b'c') => 11,
120 _ => return Err(INVALID),
121 };
122 Ok((&s[3..], month0))
123}
124
125/// Tries to parse the weekday with the first three ASCII letters.
126pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
127 if s.len() < 3 {
128 return Err(TOO_SHORT);
129 }
130 let buf: &[u8] = s.as_bytes();
131 let weekday: Weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
132 (b'm', b'o', b'n') => Weekday::Mon,
133 (b't', b'u', b'e') => Weekday::Tue,
134 (b'w', b'e', b'd') => Weekday::Wed,
135 (b't', b'h', b'u') => Weekday::Thu,
136 (b'f', b'r', b'i') => Weekday::Fri,
137 (b's', b'a', b't') => Weekday::Sat,
138 (b's', b'u', b'n') => Weekday::Sun,
139 _ => return Err(INVALID),
140 };
141 Ok((&s[3..], weekday))
142}
143
144/// Tries to parse the month index (0 through 11) with short or long month names.
145/// It prefers long month names to short month names when both are possible.
146pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
147 // lowercased month names, minus first three chars
148 static LONG_MONTH_SUFFIXES: [&str; 12] =
149 ["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"];
150
151 let (mut s: &str, month0: u8) = short_month0(s)?;
152
153 // tries to consume the suffix if possible
154 let suffix: &str = LONG_MONTH_SUFFIXES[month0 as usize];
155 if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) {
156 s = &s[suffix.len()..];
157 }
158
159 Ok((s, month0))
160}
161
162/// Tries to parse the weekday with short or long weekday names.
163/// It prefers long weekday names to short weekday names when both are possible.
164pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
165 // lowercased weekday names, minus first three chars
166 static LONG_WEEKDAY_SUFFIXES: [&str; 7] =
167 ["day", "sday", "nesday", "rsday", "day", "urday", "day"];
168
169 let (mut s: &str, weekday: Weekday) = short_weekday(s)?;
170
171 // tries to consume the suffix if possible
172 let suffix: &str = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
173 if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) {
174 s = &s[suffix.len()..];
175 }
176
177 Ok((s, weekday))
178}
179
180/// Tries to consume exactly one given character.
181pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> {
182 match s.as_bytes().first() {
183 Some(&c: u8) if c == c1 => Ok(&s[1..]),
184 Some(_) => Err(INVALID),
185 None => Err(TOO_SHORT),
186 }
187}
188
189/// Tries to consume one or more whitespace.
190pub(super) fn space(s: &str) -> ParseResult<&str> {
191 let s_: &str = s.trim_left();
192 if s_.len() < s.len() {
193 Ok(s_)
194 } else if s.is_empty() {
195 Err(TOO_SHORT)
196 } else {
197 Err(INVALID)
198 }
199}
200
201/// Consumes any number (including zero) of colon or spaces.
202pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> {
203 Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace()))
204}
205
206/// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
207///
208/// The additional `colon` may be used to parse a mandatory or optional `:`
209/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
210pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
211where
212 F: FnMut(&str) -> ParseResult<&str>,
213{
214 timezone_offset_internal(s, consume_colon, allow_missing_minutes:false)
215}
216
217fn timezone_offset_internal<F>(
218 mut s: &str,
219 mut consume_colon: F,
220 allow_missing_minutes: bool,
221) -> ParseResult<(&str, i32)>
222where
223 F: FnMut(&str) -> ParseResult<&str>,
224{
225 const fn digits(s: &str) -> ParseResult<(u8, u8)> {
226 let b = s.as_bytes();
227 if b.len() < 2 {
228 Err(TOO_SHORT)
229 } else {
230 Ok((b[0], b[1]))
231 }
232 }
233 let negative = match s.as_bytes().first() {
234 Some(&b'+') => false,
235 Some(&b'-') => true,
236 Some(_) => return Err(INVALID),
237 None => return Err(TOO_SHORT),
238 };
239 s = &s[1..];
240
241 // hours (00--99)
242 let hours = match digits(s)? {
243 (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
244 _ => return Err(INVALID),
245 };
246 s = &s[2..];
247
248 // colons (and possibly other separators)
249 s = consume_colon(s)?;
250
251 // minutes (00--59)
252 // if the next two items are digits then we have to add minutes
253 let minutes = if let Ok(ds) = digits(s) {
254 match ds {
255 (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
256 (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE),
257 _ => return Err(INVALID),
258 }
259 } else if allow_missing_minutes {
260 0
261 } else {
262 return Err(TOO_SHORT);
263 };
264 s = match s.len() {
265 len if len >= 2 => &s[2..],
266 len if len == 0 => s,
267 _ => return Err(TOO_SHORT),
268 };
269
270 let seconds = hours * 3600 + minutes * 60;
271 Ok((s, if negative { -seconds } else { seconds }))
272}
273
274/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`.
275pub(super) fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
276where
277 F: FnMut(&str) -> ParseResult<&str>,
278{
279 let bytes: &[u8] = s.as_bytes();
280 match bytes.first() {
281 Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
282 Some(&b'u') | Some(&b'U') => {
283 if bytes.len() >= 3 {
284 let (b: u8, c: u8) = (bytes[1], bytes[2]);
285 match (b | 32, c | 32) {
286 (b't', b'c') => Ok((&s[3..], 0)),
287 _ => Err(INVALID),
288 }
289 } else {
290 Err(INVALID)
291 }
292 }
293 _ => timezone_offset(s, consume_colon:colon),
294 }
295}
296
297/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as
298/// `+00:00`, and allows missing minutes entirely.
299pub(super) fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
300where
301 F: FnMut(&str) -> ParseResult<&str>,
302{
303 match s.as_bytes().first() {
304 Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
305 _ => timezone_offset_internal(s, consume_colon:colon, allow_missing_minutes:true),
306 }
307}
308
309/// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
310/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
311/// See [RFC 2822 Section 4.3].
312///
313/// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3
314pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
315 // tries to parse legacy time zone names
316 let upto = s.as_bytes().iter().position(|&c| !c.is_ascii_alphabetic()).unwrap_or(s.len());
317 if upto > 0 {
318 let name = &s.as_bytes()[..upto];
319 let s = &s[upto..];
320 let offset_hours = |o| Ok((s, Some(o * 3600)));
321 if equals(name, "gmt") || equals(name, "ut") {
322 offset_hours(0)
323 } else if equals(name, "edt") {
324 offset_hours(-4)
325 } else if equals(name, "est") || equals(name, "cdt") {
326 offset_hours(-5)
327 } else if equals(name, "cst") || equals(name, "mdt") {
328 offset_hours(-6)
329 } else if equals(name, "mst") || equals(name, "pdt") {
330 offset_hours(-7)
331 } else if equals(name, "pst") {
332 offset_hours(-8)
333 } else if name.len() == 1 {
334 match name[0] {
335 // recommended by RFC 2822: consume but treat it as -0000
336 b'a'..=b'i' | b'k'..=b'z' | b'A'..=b'I' | b'K'..=b'Z' => offset_hours(0),
337 _ => Ok((s, None)),
338 }
339 } else {
340 Ok((s, None))
341 }
342 } else {
343 let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
344 Ok((s_, Some(offset)))
345 }
346}
347
348/// Tries to consume everything until next whitespace-like symbol.
349/// Does not provide any offset information from the consumed data.
350pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> {
351 Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ()))
352}
353
354/// Tries to consume an RFC2822 comment including preceding ` `.
355///
356/// Returns the remaining string after the closing parenthesis.
357pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> {
358 use CommentState::*;
359
360 let s: &str = s.trim_start();
361
362 let mut state: CommentState = Start;
363 for (i: usize, c: u8) in s.bytes().enumerate() {
364 state = match (state, c) {
365 (Start, b'(') => Next(1),
366 (Next(1), b')') => return Ok((&s[i + 1..], ())),
367 (Next(depth: usize), b'\\') => Escape(depth),
368 (Next(depth: usize), b'(') => Next(depth + 1),
369 (Next(depth: usize), b')') => Next(depth - 1),
370 (Next(depth: usize), _) | (Escape(depth: usize), _) => Next(depth),
371 _ => return Err(INVALID),
372 };
373 }
374
375 Err(TOO_SHORT)
376}
377
378enum CommentState {
379 Start,
380 Next(usize),
381 Escape(usize),
382}
383
384#[cfg(test)]
385#[test]
386fn test_rfc2822_comments() {
387 let testdata = [
388 ("", Err(TOO_SHORT)),
389 (" ", Err(TOO_SHORT)),
390 ("x", Err(INVALID)),
391 ("(", Err(TOO_SHORT)),
392 ("()", Ok("")),
393 (" \r\n\t()", Ok("")),
394 ("() ", Ok(" ")),
395 ("()z", Ok("z")),
396 ("(x)", Ok("")),
397 ("(())", Ok("")),
398 ("((()))", Ok("")),
399 ("(x(x(x)x)x)", Ok("")),
400 ("( x ( x ( x ) x ) x )", Ok("")),
401 (r"(\)", Err(TOO_SHORT)),
402 (r"(\()", Ok("")),
403 (r"(\))", Ok("")),
404 (r"(\\)", Ok("")),
405 ("(()())", Ok("")),
406 ("( x ( x ) x ( x ) x )", Ok("")),
407 ];
408
409 for (test_in, expected) in testdata.iter() {
410 let actual = comment_2822(test_in).map(|(s, _)| s);
411 assert_eq!(
412 *expected, actual,
413 "{:?} expected to produce {:?}, but produced {:?}.",
414 test_in, expected, actual
415 );
416 }
417}
418