1 | // This is a part of Chrono. |
2 | // See README.md and LICENSE.txt for details. |
3 | |
4 | /*! |
5 | * Various scanning routines for the parser. |
6 | */ |
7 | |
8 | #![allow (deprecated)] |
9 | |
10 | use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT}; |
11 | use crate::Weekday; |
12 | |
13 | /// Returns true when two slices are equal case-insensitively (in ASCII). |
14 | /// Assumes that the `pattern` is already converted to lower case. |
15 | fn equals(s: &[u8], pattern: &str) -> bool { |
16 | let mut xs: impl Iterator = s.iter().map(|&c: u8| match c { |
17 | b'A' ..=b'Z' => c + 32, |
18 | _ => c, |
19 | }); |
20 | let mut ys: impl Iterator = pattern.as_bytes().iter().cloned(); |
21 | loop { |
22 | match (xs.next(), ys.next()) { |
23 | (None, None) => return true, |
24 | (None, _) | (_, None) => return false, |
25 | (Some(x: u8), Some(y: u8)) if x != y => return false, |
26 | _ => (), |
27 | } |
28 | } |
29 | } |
30 | |
31 | /// Tries to parse the non-negative number from `min` to `max` digits. |
32 | /// |
33 | /// The absence of digits at all is an unconditional error. |
34 | /// More than `max` digits are consumed up to the first `max` digits. |
35 | /// Any number that does not fit in `i64` is an error. |
36 | #[inline ] |
37 | pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> { |
38 | assert!(min <= max); |
39 | |
40 | // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on |
41 | // the first non-numeric byte, which may be another ascii character or beginning of multi-byte |
42 | // UTF-8 character. |
43 | let bytes = s.as_bytes(); |
44 | if bytes.len() < min { |
45 | return Err(TOO_SHORT); |
46 | } |
47 | |
48 | let mut n = 0i64; |
49 | for (i, c) in bytes.iter().take(max).cloned().enumerate() { |
50 | // cloned() = copied() |
51 | if !c.is_ascii_digit() { |
52 | if i < min { |
53 | return Err(INVALID); |
54 | } else { |
55 | return Ok((&s[i..], n)); |
56 | } |
57 | } |
58 | |
59 | n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0' ) as i64)) { |
60 | Some(n) => n, |
61 | None => return Err(OUT_OF_RANGE), |
62 | }; |
63 | } |
64 | |
65 | Ok((&s[core::cmp::min(max, bytes.len())..], n)) |
66 | } |
67 | |
68 | /// Tries to consume at least one digits as a fractional second. |
69 | /// Returns the number of whole nanoseconds (0--999,999,999). |
70 | pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, i64)> { |
71 | // record the number of digits consumed for later scaling. |
72 | let origlen: usize = s.len(); |
73 | let (s: &str, v: i64) = number(s, min:1, max:9)?; |
74 | let consumed: usize = origlen - s.len(); |
75 | |
76 | // scale the number accordingly. |
77 | static SCALE: [i64; 10] = |
78 | [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1]; |
79 | let v: i64 = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?; |
80 | |
81 | // if there are more than 9 digits, skip next digits. |
82 | let s: &str = s.trim_left_matches(|c: char| c.is_ascii_digit()); |
83 | |
84 | Ok((s, v)) |
85 | } |
86 | |
87 | /// Tries to consume a fixed number of digits as a fractional second. |
88 | /// Returns the number of whole nanoseconds (0--999,999,999). |
89 | pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> { |
90 | // record the number of digits consumed for later scaling. |
91 | let (s: &str, v: i64) = number(s, min:digits, max:digits)?; |
92 | |
93 | // scale the number accordingly. |
94 | static SCALE: [i64; 10] = |
95 | [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, 1_000, 100, 10, 1]; |
96 | let v: i64 = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?; |
97 | |
98 | Ok((s, v)) |
99 | } |
100 | |
101 | /// Tries to parse the month index (0 through 11) with the first three ASCII letters. |
102 | pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> { |
103 | if s.len() < 3 { |
104 | return Err(TOO_SHORT); |
105 | } |
106 | let buf: &[u8] = s.as_bytes(); |
107 | let month0: u8 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { |
108 | (b'j' , b'a' , b'n' ) => 0, |
109 | (b'f' , b'e' , b'b' ) => 1, |
110 | (b'm' , b'a' , b'r' ) => 2, |
111 | (b'a' , b'p' , b'r' ) => 3, |
112 | (b'm' , b'a' , b'y' ) => 4, |
113 | (b'j' , b'u' , b'n' ) => 5, |
114 | (b'j' , b'u' , b'l' ) => 6, |
115 | (b'a' , b'u' , b'g' ) => 7, |
116 | (b's' , b'e' , b'p' ) => 8, |
117 | (b'o' , b'c' , b't' ) => 9, |
118 | (b'n' , b'o' , b'v' ) => 10, |
119 | (b'd' , b'e' , b'c' ) => 11, |
120 | _ => return Err(INVALID), |
121 | }; |
122 | Ok((&s[3..], month0)) |
123 | } |
124 | |
125 | /// Tries to parse the weekday with the first three ASCII letters. |
126 | pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> { |
127 | if s.len() < 3 { |
128 | return Err(TOO_SHORT); |
129 | } |
130 | let buf: &[u8] = s.as_bytes(); |
131 | let weekday: Weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { |
132 | (b'm' , b'o' , b'n' ) => Weekday::Mon, |
133 | (b't' , b'u' , b'e' ) => Weekday::Tue, |
134 | (b'w' , b'e' , b'd' ) => Weekday::Wed, |
135 | (b't' , b'h' , b'u' ) => Weekday::Thu, |
136 | (b'f' , b'r' , b'i' ) => Weekday::Fri, |
137 | (b's' , b'a' , b't' ) => Weekday::Sat, |
138 | (b's' , b'u' , b'n' ) => Weekday::Sun, |
139 | _ => return Err(INVALID), |
140 | }; |
141 | Ok((&s[3..], weekday)) |
142 | } |
143 | |
144 | /// Tries to parse the month index (0 through 11) with short or long month names. |
145 | /// It prefers long month names to short month names when both are possible. |
146 | pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> { |
147 | // lowercased month names, minus first three chars |
148 | static LONG_MONTH_SUFFIXES: [&str; 12] = |
149 | ["uary" , "ruary" , "ch" , "il" , "" , "e" , "y" , "ust" , "tember" , "ober" , "ember" , "ember" ]; |
150 | |
151 | let (mut s: &str, month0: u8) = short_month0(s)?; |
152 | |
153 | // tries to consume the suffix if possible |
154 | let suffix: &str = LONG_MONTH_SUFFIXES[month0 as usize]; |
155 | if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) { |
156 | s = &s[suffix.len()..]; |
157 | } |
158 | |
159 | Ok((s, month0)) |
160 | } |
161 | |
162 | /// Tries to parse the weekday with short or long weekday names. |
163 | /// It prefers long weekday names to short weekday names when both are possible. |
164 | pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> { |
165 | // lowercased weekday names, minus first three chars |
166 | static LONG_WEEKDAY_SUFFIXES: [&str; 7] = |
167 | ["day" , "sday" , "nesday" , "rsday" , "day" , "urday" , "day" ]; |
168 | |
169 | let (mut s: &str, weekday: Weekday) = short_weekday(s)?; |
170 | |
171 | // tries to consume the suffix if possible |
172 | let suffix: &str = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize]; |
173 | if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) { |
174 | s = &s[suffix.len()..]; |
175 | } |
176 | |
177 | Ok((s, weekday)) |
178 | } |
179 | |
180 | /// Tries to consume exactly one given character. |
181 | pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> { |
182 | match s.as_bytes().first() { |
183 | Some(&c: u8) if c == c1 => Ok(&s[1..]), |
184 | Some(_) => Err(INVALID), |
185 | None => Err(TOO_SHORT), |
186 | } |
187 | } |
188 | |
189 | /// Tries to consume one or more whitespace. |
190 | pub(super) fn space(s: &str) -> ParseResult<&str> { |
191 | let s_: &str = s.trim_left(); |
192 | if s_.len() < s.len() { |
193 | Ok(s_) |
194 | } else if s.is_empty() { |
195 | Err(TOO_SHORT) |
196 | } else { |
197 | Err(INVALID) |
198 | } |
199 | } |
200 | |
201 | /// Consumes any number (including zero) of colon or spaces. |
202 | pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> { |
203 | Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) |
204 | } |
205 | |
206 | /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. |
207 | /// |
208 | /// The additional `colon` may be used to parse a mandatory or optional `:` |
209 | /// between hours and minutes, and should return either a new suffix or `Err` when parsing fails. |
210 | pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)> |
211 | where |
212 | F: FnMut(&str) -> ParseResult<&str>, |
213 | { |
214 | timezone_offset_internal(s, consume_colon, allow_missing_minutes:false) |
215 | } |
216 | |
217 | fn timezone_offset_internal<F>( |
218 | mut s: &str, |
219 | mut consume_colon: F, |
220 | allow_missing_minutes: bool, |
221 | ) -> ParseResult<(&str, i32)> |
222 | where |
223 | F: FnMut(&str) -> ParseResult<&str>, |
224 | { |
225 | const fn digits(s: &str) -> ParseResult<(u8, u8)> { |
226 | let b = s.as_bytes(); |
227 | if b.len() < 2 { |
228 | Err(TOO_SHORT) |
229 | } else { |
230 | Ok((b[0], b[1])) |
231 | } |
232 | } |
233 | let negative = match s.as_bytes().first() { |
234 | Some(&b'+' ) => false, |
235 | Some(&b'-' ) => true, |
236 | Some(_) => return Err(INVALID), |
237 | None => return Err(TOO_SHORT), |
238 | }; |
239 | s = &s[1..]; |
240 | |
241 | // hours (00--99) |
242 | let hours = match digits(s)? { |
243 | (h1 @ b'0' ..=b'9' , h2 @ b'0' ..=b'9' ) => i32::from((h1 - b'0' ) * 10 + (h2 - b'0' )), |
244 | _ => return Err(INVALID), |
245 | }; |
246 | s = &s[2..]; |
247 | |
248 | // colons (and possibly other separators) |
249 | s = consume_colon(s)?; |
250 | |
251 | // minutes (00--59) |
252 | // if the next two items are digits then we have to add minutes |
253 | let minutes = if let Ok(ds) = digits(s) { |
254 | match ds { |
255 | (m1 @ b'0' ..=b'5' , m2 @ b'0' ..=b'9' ) => i32::from((m1 - b'0' ) * 10 + (m2 - b'0' )), |
256 | (b'6' ..=b'9' , b'0' ..=b'9' ) => return Err(OUT_OF_RANGE), |
257 | _ => return Err(INVALID), |
258 | } |
259 | } else if allow_missing_minutes { |
260 | 0 |
261 | } else { |
262 | return Err(TOO_SHORT); |
263 | }; |
264 | s = match s.len() { |
265 | len if len >= 2 => &s[2..], |
266 | len if len == 0 => s, |
267 | _ => return Err(TOO_SHORT), |
268 | }; |
269 | |
270 | let seconds = hours * 3600 + minutes * 60; |
271 | Ok((s, if negative { -seconds } else { seconds })) |
272 | } |
273 | |
274 | /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`. |
275 | pub(super) fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> |
276 | where |
277 | F: FnMut(&str) -> ParseResult<&str>, |
278 | { |
279 | let bytes: &[u8] = s.as_bytes(); |
280 | match bytes.first() { |
281 | Some(&b'z' ) | Some(&b'Z' ) => Ok((&s[1..], 0)), |
282 | Some(&b'u' ) | Some(&b'U' ) => { |
283 | if bytes.len() >= 3 { |
284 | let (b: u8, c: u8) = (bytes[1], bytes[2]); |
285 | match (b | 32, c | 32) { |
286 | (b't' , b'c' ) => Ok((&s[3..], 0)), |
287 | _ => Err(INVALID), |
288 | } |
289 | } else { |
290 | Err(INVALID) |
291 | } |
292 | } |
293 | _ => timezone_offset(s, consume_colon:colon), |
294 | } |
295 | } |
296 | |
297 | /// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as |
298 | /// `+00:00`, and allows missing minutes entirely. |
299 | pub(super) fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)> |
300 | where |
301 | F: FnMut(&str) -> ParseResult<&str>, |
302 | { |
303 | match s.as_bytes().first() { |
304 | Some(&b'z' ) | Some(&b'Z' ) => Ok((&s[1..], 0)), |
305 | _ => timezone_offset_internal(s, consume_colon:colon, allow_missing_minutes:true), |
306 | } |
307 | } |
308 | |
309 | /// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones. |
310 | /// May return `None` which indicates an insufficient offset data (i.e. `-0000`). |
311 | /// See [RFC 2822 Section 4.3]. |
312 | /// |
313 | /// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3 |
314 | pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> { |
315 | // tries to parse legacy time zone names |
316 | let upto = s.as_bytes().iter().position(|&c| !c.is_ascii_alphabetic()).unwrap_or(s.len()); |
317 | if upto > 0 { |
318 | let name = &s.as_bytes()[..upto]; |
319 | let s = &s[upto..]; |
320 | let offset_hours = |o| Ok((s, Some(o * 3600))); |
321 | if equals(name, "gmt" ) || equals(name, "ut" ) { |
322 | offset_hours(0) |
323 | } else if equals(name, "edt" ) { |
324 | offset_hours(-4) |
325 | } else if equals(name, "est" ) || equals(name, "cdt" ) { |
326 | offset_hours(-5) |
327 | } else if equals(name, "cst" ) || equals(name, "mdt" ) { |
328 | offset_hours(-6) |
329 | } else if equals(name, "mst" ) || equals(name, "pdt" ) { |
330 | offset_hours(-7) |
331 | } else if equals(name, "pst" ) { |
332 | offset_hours(-8) |
333 | } else if name.len() == 1 { |
334 | match name[0] { |
335 | // recommended by RFC 2822: consume but treat it as -0000 |
336 | b'a' ..=b'i' | b'k' ..=b'z' | b'A' ..=b'I' | b'K' ..=b'Z' => offset_hours(0), |
337 | _ => Ok((s, None)), |
338 | } |
339 | } else { |
340 | Ok((s, None)) |
341 | } |
342 | } else { |
343 | let (s_, offset) = timezone_offset(s, |s| Ok(s))?; |
344 | Ok((s_, Some(offset))) |
345 | } |
346 | } |
347 | |
348 | /// Tries to consume everything until next whitespace-like symbol. |
349 | /// Does not provide any offset information from the consumed data. |
350 | pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> { |
351 | Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ())) |
352 | } |
353 | |
354 | /// Tries to consume an RFC2822 comment including preceding ` `. |
355 | /// |
356 | /// Returns the remaining string after the closing parenthesis. |
357 | pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> { |
358 | use CommentState::*; |
359 | |
360 | let s: &str = s.trim_start(); |
361 | |
362 | let mut state: CommentState = Start; |
363 | for (i: usize, c: u8) in s.bytes().enumerate() { |
364 | state = match (state, c) { |
365 | (Start, b'(' ) => Next(1), |
366 | (Next(1), b')' ) => return Ok((&s[i + 1..], ())), |
367 | (Next(depth: usize), b' \\' ) => Escape(depth), |
368 | (Next(depth: usize), b'(' ) => Next(depth + 1), |
369 | (Next(depth: usize), b')' ) => Next(depth - 1), |
370 | (Next(depth: usize), _) | (Escape(depth: usize), _) => Next(depth), |
371 | _ => return Err(INVALID), |
372 | }; |
373 | } |
374 | |
375 | Err(TOO_SHORT) |
376 | } |
377 | |
378 | enum CommentState { |
379 | Start, |
380 | Next(usize), |
381 | Escape(usize), |
382 | } |
383 | |
384 | #[cfg (test)] |
385 | #[test ] |
386 | fn test_rfc2822_comments() { |
387 | let testdata = [ |
388 | ("" , Err(TOO_SHORT)), |
389 | (" " , Err(TOO_SHORT)), |
390 | ("x" , Err(INVALID)), |
391 | ("(" , Err(TOO_SHORT)), |
392 | ("()" , Ok("" )), |
393 | (" \r\n\t()" , Ok("" )), |
394 | ("() " , Ok(" " )), |
395 | ("()z" , Ok("z" )), |
396 | ("(x)" , Ok("" )), |
397 | ("(())" , Ok("" )), |
398 | ("((()))" , Ok("" )), |
399 | ("(x(x(x)x)x)" , Ok("" )), |
400 | ("( x ( x ( x ) x ) x )" , Ok("" )), |
401 | (r"(\)" , Err(TOO_SHORT)), |
402 | (r"(\()" , Ok("" )), |
403 | (r"(\))" , Ok("" )), |
404 | (r"(\\)" , Ok("" )), |
405 | ("(()())" , Ok("" )), |
406 | ("( x ( x ) x ( x ) x )" , Ok("" )), |
407 | ]; |
408 | |
409 | for (test_in, expected) in testdata.iter() { |
410 | let actual = comment_2822(test_in).map(|(s, _)| s); |
411 | assert_eq!( |
412 | *expected, actual, |
413 | " {:?} expected to produce {:?}, but produced {:?}." , |
414 | test_in, expected, actual |
415 | ); |
416 | } |
417 | } |
418 | |