1//! Arbitrary-precision decimal type used by fallback algorithms.
2//!
3//! This is only used if the fast-path (native floats) and
4//! the Eisel-Lemire algorithm are unable to unambiguously
5//! determine the float.
6//!
7//! The technique used is "Simple Decimal Conversion", developed
8//! by Nigel Tao and Ken Thompson. A detailed description of the
9//! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion",
10//! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>.
11
12use crate::num::dec2flt::common::{ByteSlice, is_8digits};
13
14/// A decimal floating-point number, represented as a sequence of decimal digits.
15#[derive(Clone, Debug, PartialEq)]
16pub struct DecimalSeq {
17 /// The number of significant digits in the decimal.
18 pub num_digits: usize,
19 /// The offset of the decimal point in the significant digits.
20 pub decimal_point: i32,
21 /// If the number of significant digits stored in the decimal is truncated.
22 pub truncated: bool,
23 /// Buffer of the raw digits, in the range [0, 9].
24 pub digits: [u8; Self::MAX_DIGITS],
25}
26
27impl Default for DecimalSeq {
28 fn default() -> Self {
29 Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] }
30 }
31}
32
33impl DecimalSeq {
34 /// The maximum number of digits required to unambiguously round up to a 64-bit float.
35 ///
36 /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1.
37 ///
38 /// We can exactly represent a float in radix `b` from radix 2 if
39 /// `b` is divisible by 2. This function calculates the exact number of
40 /// digits required to exactly represent that float.
41 ///
42 /// According to the "Handbook of Floating Point Arithmetic",
43 /// for IEEE754, with `emin` being the min exponent, `p2` being the
44 /// precision, and `b` being the radix, the number of digits follows as:
45 ///
46 /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
47 ///
48 /// For f32, this follows as:
49 /// emin = -126
50 /// p2 = 24
51 ///
52 /// For f64, this follows as:
53 /// emin = -1022
54 /// p2 = 53
55 ///
56 /// In Python:
57 /// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
58 pub const MAX_DIGITS: usize = 768;
59
60 /// The max decimal digits that can be exactly represented in a 64-bit integer.
61 pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
62 pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
63
64 /// Append a digit to the buffer if it fits.
65 // FIXME(tgross35): it may be better for this to return an option
66 // FIXME(tgross35): incrementing the digit counter even if we don't push anything
67 // seems incorrect.
68 pub(super) fn try_add_digit(&mut self, digit: u8) {
69 if self.num_digits < Self::MAX_DIGITS {
70 self.digits[self.num_digits] = digit;
71 }
72 self.num_digits += 1;
73 }
74
75 /// Trim trailing zeros from the buffer.
76 // FIXME(tgross35): this could be `.rev().position()` if perf is okay
77 pub fn trim(&mut self) {
78 // All of the following calls to `DecimalSeq::trim` can't panic because:
79 //
80 // 1. `parse_decimal` sets `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
81 // 2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`.
82 // 3. `left_shift` `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
83 //
84 // Trim is only called in `right_shift` and `left_shift`.
85 debug_assert!(self.num_digits <= Self::MAX_DIGITS);
86 while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 {
87 self.num_digits -= 1;
88 }
89 }
90
91 pub(super) fn round(&self) -> u64 {
92 if self.num_digits == 0 || self.decimal_point < 0 {
93 return 0;
94 } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 {
95 return 0xFFFF_FFFF_FFFF_FFFF_u64;
96 }
97
98 let dp = self.decimal_point as usize;
99 let mut n = 0_u64;
100
101 for i in 0..dp {
102 n *= 10;
103 if i < self.num_digits {
104 n += self.digits[i] as u64;
105 }
106 }
107
108 let mut round_up = false;
109
110 if dp < self.num_digits {
111 round_up = self.digits[dp] >= 5;
112 if self.digits[dp] == 5 && dp + 1 == self.num_digits {
113 round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0))
114 }
115 }
116
117 if round_up {
118 n += 1;
119 }
120 n
121 }
122
123 /// Computes decimal * 2^shift.
124 pub(super) fn left_shift(&mut self, shift: usize) {
125 if self.num_digits == 0 {
126 return;
127 }
128 let num_new_digits = number_of_digits_decimal_left_shift(self, shift);
129 let mut read_index = self.num_digits;
130 let mut write_index = self.num_digits + num_new_digits;
131 let mut n = 0_u64;
132
133 while read_index != 0 {
134 read_index -= 1;
135 write_index -= 1;
136 n += (self.digits[read_index] as u64) << shift;
137 let quotient = n / 10;
138 let remainder = n - (10 * quotient);
139 if write_index < Self::MAX_DIGITS {
140 self.digits[write_index] = remainder as u8;
141 } else if remainder > 0 {
142 self.truncated = true;
143 }
144 n = quotient;
145 }
146
147 while n > 0 {
148 write_index -= 1;
149 let quotient = n / 10;
150 let remainder = n - (10 * quotient);
151 if write_index < Self::MAX_DIGITS {
152 self.digits[write_index] = remainder as u8;
153 } else if remainder > 0 {
154 self.truncated = true;
155 }
156 n = quotient;
157 }
158
159 self.num_digits += num_new_digits;
160
161 if self.num_digits > Self::MAX_DIGITS {
162 self.num_digits = Self::MAX_DIGITS;
163 }
164
165 self.decimal_point += num_new_digits as i32;
166 self.trim();
167 }
168
169 /// Computes decimal * 2^-shift.
170 pub(super) fn right_shift(&mut self, shift: usize) {
171 let mut read_index = 0;
172 let mut write_index = 0;
173 let mut n = 0_u64;
174 while (n >> shift) == 0 {
175 if read_index < self.num_digits {
176 n = (10 * n) + self.digits[read_index] as u64;
177 read_index += 1;
178 } else if n == 0 {
179 return;
180 } else {
181 while (n >> shift) == 0 {
182 n *= 10;
183 read_index += 1;
184 }
185 break;
186 }
187 }
188 self.decimal_point -= read_index as i32 - 1;
189 if self.decimal_point < -Self::DECIMAL_POINT_RANGE {
190 // `self = Self::Default()`, but without the overhead of clearing `digits`.
191 self.num_digits = 0;
192 self.decimal_point = 0;
193 self.truncated = false;
194 return;
195 }
196 let mask = (1_u64 << shift) - 1;
197 while read_index < self.num_digits {
198 let new_digit = (n >> shift) as u8;
199 n = (10 * (n & mask)) + self.digits[read_index] as u64;
200 read_index += 1;
201 self.digits[write_index] = new_digit;
202 write_index += 1;
203 }
204 while n > 0 {
205 let new_digit = (n >> shift) as u8;
206 n = 10 * (n & mask);
207 if write_index < Self::MAX_DIGITS {
208 self.digits[write_index] = new_digit;
209 write_index += 1;
210 } else if new_digit > 0 {
211 self.truncated = true;
212 }
213 }
214 self.num_digits = write_index;
215 self.trim();
216 }
217}
218
219/// Parse a big integer representation of the float as a decimal.
220pub fn parse_decimal_seq(mut s: &[u8]) -> DecimalSeq {
221 let mut d = DecimalSeq::default();
222 let start = s;
223
224 while let Some((&b'0', s_next)) = s.split_first() {
225 s = s_next;
226 }
227
228 s = s.parse_digits(|digit| d.try_add_digit(digit));
229
230 if let Some((b'.', s_next)) = s.split_first() {
231 s = s_next;
232 let first = s;
233 // Skip leading zeros.
234 if d.num_digits == 0 {
235 while let Some((&b'0', s_next)) = s.split_first() {
236 s = s_next;
237 }
238 }
239 while s.len() >= 8 && d.num_digits + 8 < DecimalSeq::MAX_DIGITS {
240 let v = s.read_u64();
241 if !is_8digits(v) {
242 break;
243 }
244 d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
245 d.num_digits += 8;
246 s = &s[8..];
247 }
248 s = s.parse_digits(|digit| d.try_add_digit(digit));
249 d.decimal_point = s.len() as i32 - first.len() as i32;
250 }
251
252 if d.num_digits != 0 {
253 // Ignore the trailing zeros if there are any
254 let mut n_trailing_zeros = 0;
255 for &c in start[..(start.len() - s.len())].iter().rev() {
256 if c == b'0' {
257 n_trailing_zeros += 1;
258 } else if c != b'.' {
259 break;
260 }
261 }
262 d.decimal_point += n_trailing_zeros as i32;
263 d.num_digits -= n_trailing_zeros;
264 d.decimal_point += d.num_digits as i32;
265 if d.num_digits > DecimalSeq::MAX_DIGITS {
266 d.truncated = true;
267 d.num_digits = DecimalSeq::MAX_DIGITS;
268 }
269 }
270
271 if let Some((&ch, s_next)) = s.split_first() {
272 if ch == b'e' || ch == b'E' {
273 s = s_next;
274 let mut neg_exp = false;
275 if let Some((&ch, s_next)) = s.split_first() {
276 neg_exp = ch == b'-';
277 if ch == b'-' || ch == b'+' {
278 s = s_next;
279 }
280 }
281 let mut exp_num = 0_i32;
282
283 s.parse_digits(|digit| {
284 if exp_num < 0x10000 {
285 exp_num = 10 * exp_num + digit as i32;
286 }
287 });
288
289 d.decimal_point += if neg_exp { -exp_num } else { exp_num };
290 }
291 }
292
293 for i in d.num_digits..DecimalSeq::MAX_DIGITS_WITHOUT_OVERFLOW {
294 d.digits[i] = 0;
295 }
296
297 d
298}
299
300fn number_of_digits_decimal_left_shift(d: &DecimalSeq, mut shift: usize) -> usize {
301 #[rustfmt::skip]
302 const TABLE: [u16; 65] = [
303 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024,
304 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C,
305 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169,
306 0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B,
307 0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02,
308 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C,
309 ];
310 #[rustfmt::skip]
311 const TABLE_POW5: [u8; 0x051C] = [
312 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1,
313 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5,
314 1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2,
315 5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
316 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
317 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9,
318 1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6,
319 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
320 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2,
321 3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
322 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5,
323 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
324 5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5,
325 3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4,
326 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
327 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
328 6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3,
329 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8,
330 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4,
331 7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
332 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5,
333 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7,
334 7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
335 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
336 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
337 8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1,
338 0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2,
339 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
340 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0,
341 6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
342 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2,
343 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
344 9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1,
345 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8,
346 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
347 8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
348 3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8,
349 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0,
350 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2,
351 5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
352 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2,
353 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3,
354 8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2,
355 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
356 ];
357
358 shift &= 63;
359 let x_a = TABLE[shift];
360 let x_b = TABLE[shift + 1];
361 let num_new_digits = (x_a >> 11) as _;
362 let pow5_a = (0x7FF & x_a) as usize;
363 let pow5_b = (0x7FF & x_b) as usize;
364 let pow5 = &TABLE_POW5[pow5_a..];
365
366 for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) {
367 if i >= d.num_digits {
368 return num_new_digits - 1;
369 } else if d.digits[i] == p5 {
370 continue;
371 } else if d.digits[i] < p5 {
372 return num_new_digits - 1;
373 } else {
374 return num_new_digits;
375 }
376 }
377
378 num_new_digits
379}
380