1//! Functions to parse floating-point numbers.
2
3use crate::num::dec2flt::common::{is_8digits, ByteSlice};
4use crate::num::dec2flt::float::RawFloat;
5use crate::num::dec2flt::number::Number;
6
7const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
8
9/// Parse 8 digits, loaded as bytes in little-endian order.
10///
11/// This uses the trick where every digit is in [0x030, 0x39],
12/// and therefore can be parsed in 3 multiplications, much
13/// faster than the normal 8.
14///
15/// This is based off the algorithm described in "Fast numeric string to
16/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
17fn parse_8digits(mut v: u64) -> u64 {
18 const MASK: u64 = 0x0000_00FF_0000_00FF;
19 const MUL1: u64 = 0x000F_4240_0000_0064;
20 const MUL2: u64 = 0x0000_2710_0000_0001;
21 v -= 0x3030_3030_3030_3030;
22 v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
23 let v1: u64 = (v & MASK).wrapping_mul(MUL1);
24 let v2: u64 = ((v >> 16) & MASK).wrapping_mul(MUL2);
25 ((v1.wrapping_add(v2) >> 32) as u32) as u64
26}
27
28/// Parse digits until a non-digit character is found.
29fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
30 // may cause overflows, to be handled later
31
32 while s.len() >= 8 {
33 let num: u64 = s.read_u64();
34 if is_8digits(num) {
35 x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
36 s = &s[8..];
37 } else {
38 break;
39 }
40 }
41
42 s = s.parse_digits(|digit: u8| {
43 x = x.wrapping_mul(10).wrapping_add(digit as _);
44 });
45
46 (s, x)
47}
48
49/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
50fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
51 let mut s: &[u8] = *s_ref;
52
53 while *x < MIN_19DIGIT_INT {
54 // FIXME: Can't use s.split_first() here yet,
55 // see https://github.com/rust-lang/rust/issues/109328
56 if let [c: &u8, s_next: &[u8] @ ..] = s {
57 let digit: u8 = c.wrapping_sub(b'0');
58
59 if digit < 10 {
60 *x = (*x * 10) + digit as u64; // no overflows here
61 s = s_next;
62 } else {
63 break;
64 }
65 } else {
66 break;
67 }
68 }
69
70 *s_ref = s;
71}
72
73/// Parse the scientific notation component of a float.
74fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
75 let mut exponent = 0i64;
76 let mut negative = false;
77
78 let mut s = *s_ref;
79
80 if let Some((&c, s_next)) = s.split_first() {
81 negative = c == b'-';
82 if c == b'-' || c == b'+' {
83 s = s_next;
84 }
85 }
86
87 if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
88 *s_ref = s.parse_digits(|digit| {
89 // no overflows here, saturate well before overflow
90 if exponent < 0x10000 {
91 exponent = 10 * exponent + digit as i64;
92 }
93 });
94 if negative { Some(-exponent) } else { Some(exponent) }
95 } else {
96 *s_ref = s;
97 None
98 }
99}
100
101/// Parse a partial, non-special floating point number.
102///
103/// This creates a representation of the float as the
104/// significant digits and the decimal exponent.
105fn parse_partial_number(mut s: &[u8]) -> Option<(Number, usize)> {
106 debug_assert!(!s.is_empty());
107
108 // parse initial digits before dot
109 let mut mantissa = 0_u64;
110 let start = s;
111 let tmp = try_parse_digits(s, mantissa);
112 s = tmp.0;
113 mantissa = tmp.1;
114 let mut n_digits = s.offset_from(start);
115
116 // handle dot with the following digits
117 let mut n_after_dot = 0;
118 let mut exponent = 0_i64;
119 let int_end = s;
120
121 if let Some((&b'.', s_next)) = s.split_first() {
122 s = s_next;
123 let before = s;
124 let tmp = try_parse_digits(s, mantissa);
125 s = tmp.0;
126 mantissa = tmp.1;
127 n_after_dot = s.offset_from(before);
128 exponent = -n_after_dot as i64;
129 }
130
131 n_digits += n_after_dot;
132 if n_digits == 0 {
133 return None;
134 }
135
136 // handle scientific format
137 let mut exp_number = 0_i64;
138 if let Some((&c, s_next)) = s.split_first() {
139 if c == b'e' || c == b'E' {
140 s = s_next;
141 // If None, we have no trailing digits after exponent, or an invalid float.
142 exp_number = parse_scientific(&mut s)?;
143 exponent += exp_number;
144 }
145 }
146
147 let len = s.offset_from(start) as _;
148
149 // handle uncommon case with many digits
150 if n_digits <= 19 {
151 return Some((Number { exponent, mantissa, negative: false, many_digits: false }, len));
152 }
153
154 n_digits -= 19;
155 let mut many_digits = false;
156 let mut p = start;
157 while let Some((&c, p_next)) = p.split_first() {
158 if c == b'.' || c == b'0' {
159 n_digits -= c.saturating_sub(b'0' - 1) as isize;
160 p = p_next;
161 } else {
162 break;
163 }
164 }
165 if n_digits > 0 {
166 // at this point we have more than 19 significant digits, let's try again
167 many_digits = true;
168 mantissa = 0;
169 let mut s = start;
170 try_parse_19digits(&mut s, &mut mantissa);
171 exponent = if mantissa >= MIN_19DIGIT_INT {
172 // big int
173 int_end.offset_from(s)
174 } else {
175 s = &s[1..];
176 let before = s;
177 try_parse_19digits(&mut s, &mut mantissa);
178 -s.offset_from(before)
179 } as i64;
180 // add back the explicit part
181 exponent += exp_number;
182 }
183
184 Some((Number { exponent, mantissa, negative: false, many_digits }, len))
185}
186
187/// Try to parse a non-special floating point number,
188/// as well as two slices with integer and fractional parts
189/// and the parsed exponent.
190pub fn parse_number(s: &[u8]) -> Option<Number> {
191 if let Some((float: Number, rest: usize)) = parse_partial_number(s) {
192 if rest == s.len() {
193 return Some(float);
194 }
195 }
196 None
197}
198
199/// Try to parse a special, non-finite float.
200pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
201 // Since a valid string has at most the length 8, we can load
202 // all relevant characters into a u64 and work from there.
203 // This also generates much better code.
204
205 let mut register;
206 let len: usize;
207
208 // All valid strings are either of length 8 or 3.
209 if s.len() == 8 {
210 register = s.read_u64();
211 len = 8;
212 } else if s.len() == 3 {
213 let a = s[0] as u64;
214 let b = s[1] as u64;
215 let c = s[2] as u64;
216 register = (c << 16) | (b << 8) | a;
217 len = 3;
218 } else {
219 return None;
220 }
221
222 // Clear out the bits which turn ASCII uppercase characters into
223 // lowercase characters. The resulting string is all uppercase.
224 // What happens to other characters is irrelevant.
225 register &= 0xDFDFDFDFDFDFDFDF;
226
227 // u64 values corresponding to relevant cases
228 const INF_3: u64 = 0x464E49; // "INF"
229 const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
230 const NAN: u64 = 0x4E414E; // "NAN"
231
232 // Match register value to constant to parse string.
233 // Also match on the string length to catch edge cases
234 // like "inf\0\0\0\0\0".
235 let float = match (register, len) {
236 (INF_3, 3) => F::INFINITY,
237 (INF_8, 8) => F::INFINITY,
238 (NAN, 3) => F::NAN,
239 _ => return None,
240 };
241
242 if negative { Some(-float) } else { Some(float) }
243}
244