scan.rs source code [crates/chrono/src/format/scan.rs]

1	// This is a part of Chrono.
2	// See README.md and LICENSE.txt for details.
3
4	/!*
5	* Various scanning routines for the parser.
6	*/
7
8	#![allow(deprecated)]
9
10	use super::{ParseResult, INVALID, OUT_OF_RANGE, TOO_SHORT};
11	use crate::Weekday;
12
13	/// Returns true when two slices are equal case-insensitively (in ASCII).
14	/// Assumes that the `pattern` is already converted to lower case.
15	fn equals(s: &[u8], pattern: &str) -> bool {
16	let mut xs: impl Iterator = s.iter().map(\|&c: u8\| match c {
17	b'A'..=b'Z' => c + `32`,
18	_ => c,
19	});
20	let mut ys: impl Iterator = pattern.as_bytes().iter().cloned();
21	loop {
22	match (xs.next(), ys.next()) {
23	(None, None) => return `true`,
24	(None, _) \| (_, None) => return `false`,
25	(Some(x: u8), Some(y: u8)) if x != y => return `false`,
26	_ => (),
27	}
28	}
29	}
30
31	/// Tries to parse the non-negative number from `min` to `max` digits.
32	///
33	/// The absence of digits at all is an unconditional error.
34	/// More than `max` digits are consumed up to the first `max` digits.
35	/// Any number that does not fit in `i64` is an error.
36	#[inline]
37	pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
38	assert!(min <= max);
39
40	// We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
41	// the first non-numeric byte, which may be another ascii character or beginning of multi-byte
42	// UTF-8 character.
43	let bytes = s.as_bytes();
44	if bytes.len() < min {
45	return Err(TOO_SHORT);
46	}
47
48	let mut n = `0i64`;
49	for (i, c) in bytes.iter().take(max).cloned().enumerate() {
50	// cloned() = copied()
51	if !c.is_ascii_digit() {
52	if i < min {
53	return Err(INVALID);
54	} else {
55	return Ok((&s[i..], n));
56	}
57	}
58
59	n = match n.checked_mul(`10`).and_then(\|n\| n.checked_add((c - b'0') as i64)) {
60	Some(n) => n,
61	None => return Err(OUT_OF_RANGE),
62	};
63	}
64
65	Ok((&s[core::cmp::min(max, bytes.len())..], n))
66	}
67
68	/// Tries to consume at least one digits as a fractional second.
69	/// Returns the number of whole nanoseconds (0--999,999,999).
70	pub(super) fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
71	// record the number of digits consumed for later scaling.
72	let origlen: usize = s.len();
73	let (s: &str, v: i64) = number(s, min:`1`, max:`9`)?;
74	let consumed: usize = origlen - s.len();
75
76	// scale the number accordingly.
77	static SCALE: [i64; `10`] =
78	[`0`, `100_000_000`, `10_000_000`, `1_000_000`, `100_000`, `10_000`, `1_000`, `100`, `10`, `1`];
79	let v: i64 = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
80
81	// if there are more than 9 digits, skip next digits.
82	let s: &str = s.trim_left_matches(\|c: char\| c.is_ascii_digit());
83
84	Ok((s, v))
85	}
86
87	/// Tries to consume a fixed number of digits as a fractional second.
88	/// Returns the number of whole nanoseconds (0--999,999,999).
89	pub(super) fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
90	// record the number of digits consumed for later scaling.
91	let (s: &str, v: i64) = number(s, min:digits, max:digits)?;
92
93	// scale the number accordingly.
94	static SCALE: [i64; `10`] =
95	[`0`, `100_000_000`, `10_000_000`, `1_000_000`, `100_000`, `10_000`, `1_000`, `100`, `10`, `1`];
96	let v: i64 = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
97
98	Ok((s, v))
99	}
100
101	/// Tries to parse the month index (0 through 11) with the first three ASCII letters.
102	pub(super) fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
103	if s.len() < `3` {
104	return Err(TOO_SHORT);
105	}
106	let buf: &[u8] = s.as_bytes();
107	let month0: u8 = match (buf[`0`] \| `32`, buf[`1`] \| `32`, buf[`2`] \| `32`) {
108	(b'j', b'a', b'n') => `0`,
109	(b'f', b'e', b'b') => `1`,
110	(b'm', b'a', b'r') => `2`,
111	(b'a', b'p', b'r') => `3`,
112	(b'm', b'a', b'y') => `4`,
113	(b'j', b'u', b'n') => `5`,
114	(b'j', b'u', b'l') => `6`,
115	(b'a', b'u', b'g') => `7`,
116	(b's', b'e', b'p') => `8`,
117	(b'o', b'c', b't') => `9`,
118	(b'n', b'o', b'v') => `10`,
119	(b'd', b'e', b'c') => `11`,
120	_ => return Err(INVALID),
121	};
122	Ok((&s[`3`..], month0))
123	}
124
125	/// Tries to parse the weekday with the first three ASCII letters.
126	pub(super) fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
127	if s.len() < `3` {
128	return Err(TOO_SHORT);
129	}
130	let buf: &[u8] = s.as_bytes();
131	let weekday: Weekday = match (buf[`0`] \| `32`, buf[`1`] \| `32`, buf[`2`] \| `32`) {
132	(b'm', b'o', b'n') => Weekday::Mon,
133	(b't', b'u', b'e') => Weekday::Tue,
134	(b'w', b'e', b'd') => Weekday::Wed,
135	(b't', b'h', b'u') => Weekday::Thu,
136	(b'f', b'r', b'i') => Weekday::Fri,
137	(b's', b'a', b't') => Weekday::Sat,
138	(b's', b'u', b'n') => Weekday::Sun,
139	_ => return Err(INVALID),
140	};
141	Ok((&s[`3`..], weekday))
142	}
143
144	/// Tries to parse the month index (0 through 11) with short or long month names.
145	/// It prefers long month names to short month names when both are possible.
146	pub(super) fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
147	// lowercased month names, minus first three chars
148	static LONG_MONTH_SUFFIXES: [&str; `12`] =
149	["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"];
150
151	let (mut s: &str, month0: u8) = short_month0(s)?;
152
153	// tries to consume the suffix if possible
154	let suffix: &str = LONG_MONTH_SUFFIXES[month0 as usize];
155	if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) {
156	s = &s[suffix.len()..];
157	}
158
159	Ok((s, month0))
160	}
161
162	/// Tries to parse the weekday with short or long weekday names.
163	/// It prefers long weekday names to short weekday names when both are possible.
164	pub(super) fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
165	// lowercased weekday names, minus first three chars
166	static LONG_WEEKDAY_SUFFIXES: [&str; `7`] =
167	["day", "sday", "nesday", "rsday", "day", "urday", "day"];
168
169	let (mut s: &str, weekday: Weekday) = short_weekday(s)?;
170
171	// tries to consume the suffix if possible
172	let suffix: &str = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
173	if s.len() >= suffix.len() && equals(&s.as_bytes()[..suffix.len()], pattern:suffix) {
174	s = &s[suffix.len()..];
175	}
176
177	Ok((s, weekday))
178	}
179
180	/// Tries to consume exactly one given character.
181	pub(super) fn char(s: &str, c1: u8) -> ParseResult<&str> {
182	match s.as_bytes().first() {
183	Some(&c: u8) if c == c1 => Ok(&s[`1`..]),
184	Some(_) => Err(INVALID),
185	None => Err(TOO_SHORT),
186	}
187	}
188
189	/// Tries to consume one or more whitespace.
190	pub(super) fn space(s: &str) -> ParseResult<&str> {
191	let s_: &str = s.trim_left();
192	if s_.len() < s.len() {
193	Ok(s_)
194	} else if s.is_empty() {
195	Err(TOO_SHORT)
196	} else {
197	Err(INVALID)
198	}
199	}
200
201	/// Consumes any number (including zero) of colon or spaces.
202	pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> {
203	Ok(s.trim_left_matches(\|c: char\| c == ':' \|\| c.is_whitespace()))
204	}
205
206	/// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
207	///
208	/// The additional `colon` may be used to parse a mandatory or optional `:`
209	/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
210	pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
211	where
212	F: FnMut(&str) -> ParseResult<&str>,
213	{
214	timezone_offset_internal(s, consume_colon, allow_missing_minutes:`false`)
215	}
216
217	fn timezone_offset_internal<F>(
218	mut s: &str,
219	mut consume_colon: F,
220	allow_missing_minutes: bool,
221	) -> ParseResult<(&str, i32)>
222	where
223	F: FnMut(&str) -> ParseResult<&str>,
224	{
225	const fn digits(s: &str) -> ParseResult<(u8, u8)> {
226	let b = s.as_bytes();
227	if b.len() < `2` {
228	Err(TOO_SHORT)
229	} else {
230	Ok((b[`0`], b[`1`]))
231	}
232	}
233	let negative = match s.as_bytes().first() {
234	Some(&b'+') => `false`,
235	Some(&b'-') => `true`,
236	Some(_) => return Err(INVALID),
237	None => return Err(TOO_SHORT),
238	};
239	s = &s[`1`..];
240
241	// hours (00--99)
242	let hours = match digits(s)? {
243	(h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * `10` + (h2 - b'0')),
244	_ => return Err(INVALID),
245	};
246	s = &s[`2`..];
247
248	// colons (and possibly other separators)
249	s = consume_colon(s)?;
250
251	// minutes (00--59)
252	// if the next two items are digits then we have to add minutes
253	let minutes = if let Ok(ds) = digits(s) {
254	match ds {
255	(m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * `10` + (m2 - b'0')),
256	(b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE),
257	_ => return Err(INVALID),
258	}
259	} else if allow_missing_minutes {
260	`0`
261	} else {
262	return Err(TOO_SHORT);
263	};
264	s = match s.len() {
265	len if len >= `2` => &s[`2`..],
266	len if len == `0` => s,
267	_ => return Err(TOO_SHORT),
268	};
269
270	let seconds = hours * `3600` + minutes * `60`;
271	Ok((s, if negative { -seconds } else { seconds }))
272	}
273
274	/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`.
275	pub(super) fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
276	where
277	F: FnMut(&str) -> ParseResult<&str>,
278	{
279	let bytes: &[u8] = s.as_bytes();
280	match bytes.first() {
281	Some(&b'z') \| Some(&b'Z') => Ok((&s[`1`..], `0`)),
282	Some(&b'u') \| Some(&b'U') => {
283	if bytes.len() >= `3` {
284	let (b: u8, c: u8) = (bytes[`1`], bytes[`2`]);
285	match (b \| `32`, c \| `32`) {
286	(b't', b'c') => Ok((&s[`3`..], `0`)),
287	_ => Err(INVALID),
288	}
289	} else {
290	Err(INVALID)
291	}
292	}
293	_ => timezone_offset(s, consume_colon:colon),
294	}
295	}
296
297	/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as
298	/// `+00:00`, and allows missing minutes entirely.
299	pub(super) fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
300	where
301	F: FnMut(&str) -> ParseResult<&str>,
302	{
303	match s.as_bytes().first() {
304	Some(&b'z') \| Some(&b'Z') => Ok((&s[`1`..], `0`)),
305	_ => timezone_offset_internal(s, consume_colon:colon, allow_missing_minutes:`true`),
306	}
307	}
308
309	/// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
310	/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
311	/// See [RFC 2822 Section 4.3].
312	///
313	/// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3
314	pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
315	// tries to parse legacy time zone names
316	let upto = s.as_bytes().iter().position(\|&c\| !c.is_ascii_alphabetic()).unwrap_or(s.len());
317	if upto > `0` {
318	let name = &s.as_bytes()[..upto];
319	let s = &s[upto..];
320	let offset_hours = \|o\| Ok((s, Some(o * `3600`)));
321	if equals(name, "gmt") \|\| equals(name, "ut") {
322	offset_hours(`0`)
323	} else if equals(name, "edt") {
324	offset_hours(`-4`)
325	} else if equals(name, "est") \|\| equals(name, "cdt") {
326	offset_hours(`-5`)
327	} else if equals(name, "cst") \|\| equals(name, "mdt") {
328	offset_hours(`-6`)
329	} else if equals(name, "mst") \|\| equals(name, "pdt") {
330	offset_hours(`-7`)
331	} else if equals(name, "pst") {
332	offset_hours(`-8`)
333	} else if name.len() == `1` {
334	match name[`0`] {
335	// recommended by RFC 2822: consume but treat it as -0000
336	b'a'..=b'i' \| b'k'..=b'z' \| b'A'..=b'I' \| b'K'..=b'Z' => offset_hours(`0`),
337	_ => Ok((s, None)),
338	}
339	} else {
340	Ok((s, None))
341	}
342	} else {
343	let (s_, offset) = timezone_offset(s, \|s\| Ok(s))?;
344	Ok((s_, Some(offset)))
345	}
346	}
347
348	/// Tries to consume everything until next whitespace-like symbol.
349	/// Does not provide any offset information from the consumed data.
350	pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> {
351	Ok((s.trim_left_matches(\|c: char\| !c.is_whitespace()), ()))
352	}
353
354	/// Tries to consume an RFC2822 comment including preceding ` `.
355	///
356	/// Returns the remaining string after the closing parenthesis.
357	pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> {
358	use CommentState::*;
359
360	let s: &str = s.trim_start();
361
362	let mut state: CommentState = Start;
363	for (i: usize, c: u8) in s.bytes().enumerate() {
364	state = match (state, c) {
365	(Start, b'(') => Next(`1`),
366	(Next(`1`), b')') => return Ok((&s[i + `1`..], ())),
367	(Next(depth: usize), b'`\\`') => Escape(depth),
368	(Next(depth: usize), b'(') => Next(depth + `1`),
369	(Next(depth: usize), b')') => Next(depth - `1`),
370	(Next(depth: usize), _) \| (Escape(depth: usize), _) => Next(depth),
371	_ => return Err(INVALID),
372	};
373	}
374
375	Err(TOO_SHORT)
376	}
377
378	enum CommentState {
379	Start,
380	Next(usize),
381	Escape(usize),
382	}
383
384	#[cfg(test)]
385	#[test]
386	fn test_rfc2822_comments() {
387	let testdata = [
388	("", Err(TOO_SHORT)),
389	(" ", Err(TOO_SHORT)),
390	("x", Err(INVALID)),
391	("(", Err(TOO_SHORT)),
392	("()", Ok("")),
393	(" `\r\n\t`()", Ok("")),
394	("() ", Ok(" ")),
395	("()z", Ok("z")),
396	("(x)", Ok("")),
397	("(())", Ok("")),
398	("((()))", Ok("")),
399	("(x(x(x)x)x)", Ok("")),
400	("( x ( x ( x ) x ) x )", Ok("")),
401	(r"(\)", Err(TOO_SHORT)),
402	(r"(\()", Ok("")),
403	(r"(\))", Ok("")),
404	(r"(\\)", Ok("")),
405	("(()())", Ok("")),
406	("( x ( x ) x ( x ) x )", Ok("")),
407	];
408
409	for (test_in, expected) in testdata.iter() {
410	let actual = comment_2822(test_in).map(\|(s, _)\| s);
411	assert_eq!(
412	*expected, actual,
413	"{:?} expected to produce {:?}, but produced {:?}.",
414	test_in, expected, actual
415	);
416	}
417	}
418