parse.rs source code [crates/jiff/src/util/parse.rs]

1	use crate::{
2	error::{err, Error},
3	util::escape::{Byte, Bytes},
4	};
5
6	/// Parses an `i64` number from the beginning to the end of the given slice of
7	/// ASCII digit characters.
8	///
9	/// If any byte in the given slice is not `[0-9]`, then this returns an error.
10	/// Similarly, if the number parsed does not fit into a `i64`, then this
11	/// returns an error. Notably, this routine does not permit parsing a negative
12	/// integer. (We use `i64` because everything in this crate uses signed
13	/// integers, and because a higher level routine might want to parse the sign
14	/// and then apply it to the result of this routine.)
15	#[cfg_attr(feature = "perf-inline", inline(always))]
16	pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
17	if bytes.is_empty() {
18	return Err(err!("invalid number, no digits found"));
19	}
20	let mut n: i64 = `0`;
21	for &byte in bytes {
22	let digit = match byte.checked_sub(b'0') {
23	None => {
24	return Err(err!(
25	"invalid digit, expected 0-9 but got {}",
26	Byte(byte),
27	));
28	}
29	Some(digit) if digit > `9` => {
30	return Err(err!(
31	"invalid digit, expected 0-9 but got {}",
32	Byte(byte),
33	))
34	}
35	Some(digit) => {
36	debug_assert!((`0`..=`9`).contains(&digit));
37	i64::from(digit)
38	}
39	};
40	n = n.checked_mul(`10`).and_then(\|n\| n.checked_add(digit)).ok_or_else(
41	\|\| {
42	err!(
43	"number '{}' too big to parse into 64-bit integer",
44	Bytes(bytes),
45	)
46	},
47	)?;
48	}
49	Ok(n)
50	}
51
52	/// Parses an `i64` fractional number from the beginning to the end of the
53	/// given slice of ASCII digit characters.
54	///
55	/// The fraction's maximum precision must be provided. The returned integer
56	/// will always be in units of `10^{max_precision}`. For example, to parse a
57	/// fractional amount of seconds with a maximum precision of nanoseconds, then
58	/// use `max_precision=9`.
59	///
60	/// If any byte in the given slice is not `[0-9]`, then this returns an error.
61	/// Similarly, if the fraction parsed does not fit into a `i64`, then this
62	/// returns an error. Notably, this routine does not permit parsing a negative
63	/// integer. (We use `i64` because everything in this crate uses signed
64	/// integers, and because a higher level routine might want to parse the sign
65	/// and then apply it to the result of this routine.)
66	pub(crate) fn fraction(
67	bytes: &[u8],
68	max_precision: usize,
69	) -> Result<i64, Error> {
70	if bytes.is_empty() {
71	return Err(err!("invalid fraction, no digits found"));
72	} else if bytes.len() > max_precision {
73	return Err(err!(
74	"invalid fraction, too many digits \
75	(at most {max_precision} are allowed"
76	));
77	}
78	let mut n: i64 = `0`;
79	for &byte in bytes {
80	let digit = match byte.checked_sub(b'0') {
81	None => {
82	return Err(err!(
83	"invalid fractional digit, expected 0-9 but got {}",
84	Byte(byte),
85	));
86	}
87	Some(digit) if digit > `9` => {
88	return Err(err!(
89	"invalid fractional digit, expected 0-9 but got {}",
90	Byte(byte),
91	))
92	}
93	Some(digit) => {
94	debug_assert!((`0`..=`9`).contains(&digit));
95	i64::from(digit)
96	}
97	};
98	n = n.checked_mul(`10`).and_then(\|n\| n.checked_add(digit)).ok_or_else(
99	\|\| {
100	err!(
101	"fractional '{}' too big to parse into 64-bit integer",
102	Bytes(bytes),
103	)
104	},
105	)?;
106	}
107	for _ in bytes.len()..max_precision {
108	n = n.checked_mul(`10`).ok_or_else(\|\| {
109	err!(
110	"fractional '{}' too big to parse into 64-bit integer \
111	(too much precision supported)",
112	Bytes(bytes)
113	)
114	})?;
115	}
116	Ok(n)
117	}
118
119	/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
120	///
121	/// This is effectively `OsStr::to_str`, but with a slightly better error
122	/// message.
123	#[cfg(feature = "tzdb-zoneinfo")]
124	pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
125	where
126	O: ?Sized + AsRef<std::ffi::OsStr>,
127	{
128	let os_str = os_str.as_ref();
129	os_str
130	.to_str()
131	.ok_or_else(\|\| err!("environment value {os_str:?} is not valid UTF-8"))
132	}
133
134	/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
135	///
136	/// The main difference between this and `OsStr::to_str` is that this will
137	/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
138	/// will do UTF-8 validation and return an error if it's invalid UTF-8.
139	#[cfg(feature = "tz-system")]
140	pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
141	where
142	O: ?Sized + AsRef<std::ffi::OsStr>,
143	{
144	let os_str = os_str.as_ref();
145	#[cfg(unix)]
146	{
147	use std::os::unix::ffi::OsStrExt;
148	Ok(os_str.as_bytes())
149	}
150	#[cfg(not(unix))]
151	{
152	let string = os_str.to_str().ok_or_else(\|\| {
153	err!("environment value {os_str:?} is not valid UTF-8")
154	})?;
155	// It is suspect that we're doing UTF-8 validation and then throwing
156	// away the fact that we did UTF-8 validation. So this could lead
157	// to an extra UTF-8 check if the caller ultimately needs UTF-8. If
158	// that's important, we can add a new API that returns a `&str`. But it
159	// probably won't matter because an `OsStr` in this crate is usually
160	// just an environment variable.
161	Ok(string.as_bytes())
162	}
163	}
164
165	/// Splits the given input into two slices at the given position.
166	///
167	/// If the position is greater than the length of the slice given, then this
168	/// returns `None`.
169	#[cfg_attr(feature = "perf-inline", inline(always))]
170	pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
171	if at > input.len() {
172	None
173	} else {
174	Some(input.split_at(mid:at))
175	}
176	}
177
178	/// Returns a function that converts two slices to an offset.
179	///
180	/// It takes the starting point as input and returns a function that, when
181	/// given an ending point (greater than or equal to the starting point), then
182	/// the corresponding pointers are subtracted and an offset relative to the
183	/// starting point is returned.
184	///
185	/// This is useful as a helper function in parsing routines that use slices
186	/// but want to report offsets.
187	///
188	/// # Panics
189	///
190	/// This may panic if the ending point is not a suffix slice of `start`.
191	pub(crate) fn offseter<'a>(
192	start: &'a [u8],
193	) -> impl Fn(&'a [u8]) -> usize + 'a {
194	move \|end: &'a [u8]\| (end.as_ptr() as usize) - (start.as_ptr() as usize)
195	}
196
197	/// Returns a function that converts two slices to the slice between them.
198	///
199	/// This takes a starting point as input and returns a function that, when
200	/// given an ending point (greater than or equal to the starting point), it
201	/// returns a slice beginning at the starting point and ending just at the
202	/// ending point.
203	///
204	/// This is useful as a helper function in parsing routines.
205	///
206	/// # Panics
207	///
208	/// This may panic if the ending point is not a suffix slice of `start`.
209	pub(crate) fn slicer<'a>(
210	start: &'a [u8],
211	) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
212	let mkoffset: impl Fn(&[u8]) -> usize = offseter(start);
213	move \|end: &'a [u8]\| {
214	let offset: usize = mkoffset(end);
215	&start[..offset]
216	}
217	}
218