parse.rs source code [crates/core/src/num/dec2flt/parse.rs]

1	//! Functions to parse floating-point numbers.
2
3	use crate::num::dec2flt::common::{is_8digits, ByteSlice};
4	use crate::num::dec2flt::float::RawFloat;
5	use crate::num::dec2flt::number::Number;
6
7	const MIN_19DIGIT_INT: u64 = `100_0000_0000_0000_0000`;
8
9	/// Parse 8 digits, loaded as bytes in little-endian order.
10	///
11	/// This uses the trick where every digit is in [0x030, 0x39],
12	/// and therefore can be parsed in 3 multiplications, much
13	/// faster than the normal 8.
14	///
15	/// This is based off the algorithm described in "Fast numeric string to
16	/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
17	fn parse_8digits(mut v: u64) -> u64 {
18	const MASK: u64 = `0x0000_00FF_0000_00FF`;
19	const MUL1: u64 = `0x000F_4240_0000_0064`;
20	const MUL2: u64 = `0x0000_2710_0000_0001`;
21	v -= `0x3030_3030_3030_3030`;
22	v = (v * `10`) + (v >> `8`); // will not overflow, fits in 63 bits
23	let v1: u64 = (v & MASK).wrapping_mul(MUL1);
24	let v2: u64 = ((v >> `16`) & MASK).wrapping_mul(MUL2);
25	((v1.wrapping_add(v2) >> `32`) as u32) as u64
26	}
27
28	/// Parse digits until a non-digit character is found.
29	fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
30	// may cause overflows, to be handled later
31
32	while s.len() >= `8` {
33	let num: u64 = s.read_u64();
34	if is_8digits(num) {
35	x = x.wrapping_mul(`1_0000_0000`).wrapping_add(parse_8digits(num));
36	s = &s[`8`..];
37	} else {
38	break;
39	}
40	}
41
42	s = s.parse_digits(\|digit: u8\| {
43	x = x.wrapping_mul(`10`).wrapping_add(digit as _);
44	});
45
46	(s, x)
47	}
48
49	/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
50	fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
51	let mut s: &[u8] = *s_ref;
52
53	while *x < MIN_19DIGIT_INT {
54	// FIXME: Can't use s.split_first() here yet,
55	// see https://github.com/rust-lang/rust/issues/109328
56	if let [c: &u8, s_next: &[u8] @ ..] = s {
57	let digit: u8 = c.wrapping_sub(b'0');
58
59	if digit < `10` {
60	x = (x * `10`) + digit as u64; // no overflows here
61	s = s_next;
62	} else {
63	break;
64	}
65	} else {
66	break;
67	}
68	}
69
70	*s_ref = s;
71	}
72
73	/// Parse the scientific notation component of a float.
74	fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
75	let mut exponent = `0i64`;
76	let mut negative = `false`;
77
78	let mut s = *s_ref;
79
80	if let Some((&c, s_next)) = s.split_first() {
81	negative = c == b'-';
82	if c == b'-' \|\| c == b'+' {
83	s = s_next;
84	}
85	}
86
87	if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
88	*s_ref = s.parse_digits(\|digit\| {
89	// no overflows here, saturate well before overflow
90	if exponent < `0x10000` {
91	exponent = `10` * exponent + digit as i64;
92	}
93	});
94	if negative { Some(-exponent) } else { Some(exponent) }
95	} else {
96	*s_ref = s;
97	None
98	}
99	}
100
101	/// Parse a partial, non-special floating point number.
102	///
103	/// This creates a representation of the float as the
104	/// significant digits and the decimal exponent.
105	fn parse_partial_number(mut s: &[u8]) -> Option<(Number, usize)> {
106	debug_assert!(!s.is_empty());
107
108	// parse initial digits before dot
109	let mut mantissa = `0_u64`;
110	let start = s;
111	let tmp = try_parse_digits(s, mantissa);
112	s = tmp.0;
113	mantissa = tmp.1;
114	let mut n_digits = s.offset_from(start);
115
116	// handle dot with the following digits
117	let mut n_after_dot = `0`;
118	let mut exponent = `0_i64`;
119	let int_end = s;
120
121	if let Some((&b'.', s_next)) = s.split_first() {
122	s = s_next;
123	let before = s;
124	let tmp = try_parse_digits(s, mantissa);
125	s = tmp.0;
126	mantissa = tmp.1;
127	n_after_dot = s.offset_from(before);
128	exponent = -n_after_dot as i64;
129	}
130
131	n_digits += n_after_dot;
132	if n_digits == `0` {
133	return None;
134	}
135
136	// handle scientific format
137	let mut exp_number = `0_i64`;
138	if let Some((&c, s_next)) = s.split_first() {
139	if c == b'e' \|\| c == b'E' {
140	s = s_next;
141	// If None, we have no trailing digits after exponent, or an invalid float.
142	exp_number = parse_scientific(&mut s)?;
143	exponent += exp_number;
144	}
145	}
146
147	let len = s.offset_from(start) as _;
148
149	// handle uncommon case with many digits
150	if n_digits <= `19` {
151	return Some((Number { exponent, mantissa, negative: `false`, many_digits: `false` }, len));
152	}
153
154	n_digits -= `19`;
155	let mut many_digits = `false`;
156	let mut p = start;
157	while let Some((&c, p_next)) = p.split_first() {
158	if c == b'.' \|\| c == b'0' {
159	n_digits -= c.saturating_sub(b'0' - `1`) as isize;
160	p = p_next;
161	} else {
162	break;
163	}
164	}
165	if n_digits > `0` {
166	// at this point we have more than 19 significant digits, let's try again
167	many_digits = `true`;
168	mantissa = `0`;
169	let mut s = start;
170	try_parse_19digits(&mut s, &mut mantissa);
171	exponent = if mantissa >= MIN_19DIGIT_INT {
172	// big int
173	int_end.offset_from(s)
174	} else {
175	s = &s[`1`..];
176	let before = s;
177	try_parse_19digits(&mut s, &mut mantissa);
178	-s.offset_from(before)
179	} as i64;
180	// add back the explicit part
181	exponent += exp_number;
182	}
183
184	Some((Number { exponent, mantissa, negative: `false`, many_digits }, len))
185	}
186
187	/// Try to parse a non-special floating point number,
188	/// as well as two slices with integer and fractional parts
189	/// and the parsed exponent.
190	pub fn parse_number(s: &[u8]) -> Option<Number> {
191	if let Some((float: Number, rest: usize)) = parse_partial_number(s) {
192	if rest == s.len() {
193	return Some(float);
194	}
195	}
196	None
197	}
198
199	/// Try to parse a special, non-finite float.
200	pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
201	// Since a valid string has at most the length 8, we can load
202	// all relevant characters into a u64 and work from there.
203	// This also generates much better code.
204
205	let mut register;
206	let len: usize;
207
208	// All valid strings are either of length 8 or 3.
209	if s.len() == `8` {
210	register = s.read_u64();
211	len = `8`;
212	} else if s.len() == `3` {
213	let a = s[`0`] as u64;
214	let b = s[`1`] as u64;
215	let c = s[`2`] as u64;
216	register = (c << `16`) \| (b << `8`) \| a;
217	len = `3`;
218	} else {
219	return None;
220	}
221
222	// Clear out the bits which turn ASCII uppercase characters into
223	// lowercase characters. The resulting string is all uppercase.
224	// What happens to other characters is irrelevant.
225	register &= `0xDFDFDFDFDFDFDFDF`;
226
227	// u64 values corresponding to relevant cases
228	const INF_3: u64 = `0x464E49`; // "INF"
229	const INF_8: u64 = `0x5954494E49464E49`; // "INFINITY"
230	const NAN: u64 = `0x4E414E`; // "NAN"
231
232	// Match register value to constant to parse string.
233	// Also match on the string length to catch edge cases
234	// like "inf\0\0\0\0\0".
235	let float = match (register, len) {
236	(INF_3, `3`) => F::INFINITY,
237	(INF_8, `8`) => F::INFINITY,
238	(NAN, `3`) => F::NAN,
239	_ => return None,
240	};
241
242	if negative { Some(-float) } else { Some(float) }
243	}
244

Provided by KDAB

Definitions

Learn Rust with the experts

Find out more