1 | //! Arbitrary-precision decimal type used by fallback algorithms. |
2 | //! |
3 | //! This is only used if the fast-path (native floats) and |
4 | //! the Eisel-Lemire algorithm are unable to unambiguously |
5 | //! determine the float. |
6 | //! |
7 | //! The technique used is "Simple Decimal Conversion", developed |
8 | //! by Nigel Tao and Ken Thompson. A detailed description of the |
9 | //! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion", |
10 | //! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>. |
11 | |
12 | use crate::num::dec2flt::common::{ByteSlice, is_8digits}; |
13 | |
14 | /// A decimal floating-point number, represented as a sequence of decimal digits. |
15 | #[derive (Clone, Debug, PartialEq)] |
16 | pub struct DecimalSeq { |
17 | /// The number of significant digits in the decimal. |
18 | pub num_digits: usize, |
19 | /// The offset of the decimal point in the significant digits. |
20 | pub decimal_point: i32, |
21 | /// If the number of significant digits stored in the decimal is truncated. |
22 | pub truncated: bool, |
23 | /// Buffer of the raw digits, in the range [0, 9]. |
24 | pub digits: [u8; Self::MAX_DIGITS], |
25 | } |
26 | |
27 | impl Default for DecimalSeq { |
28 | fn default() -> Self { |
29 | Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] } |
30 | } |
31 | } |
32 | |
33 | impl DecimalSeq { |
34 | /// The maximum number of digits required to unambiguously round up to a 64-bit float. |
35 | /// |
36 | /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1. |
37 | /// |
38 | /// We can exactly represent a float in radix `b` from radix 2 if |
39 | /// `b` is divisible by 2. This function calculates the exact number of |
40 | /// digits required to exactly represent that float. |
41 | /// |
42 | /// According to the "Handbook of Floating Point Arithmetic", |
43 | /// for IEEE754, with `emin` being the min exponent, `p2` being the |
44 | /// precision, and `b` being the radix, the number of digits follows as: |
45 | /// |
46 | /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋` |
47 | /// |
48 | /// For f32, this follows as: |
49 | /// emin = -126 |
50 | /// p2 = 24 |
51 | /// |
52 | /// For f64, this follows as: |
53 | /// emin = -1022 |
54 | /// p2 = 53 |
55 | /// |
56 | /// In Python: |
57 | /// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))` |
58 | pub const MAX_DIGITS: usize = 768; |
59 | |
60 | /// The max decimal digits that can be exactly represented in a 64-bit integer. |
61 | pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19; |
62 | pub(super) const DECIMAL_POINT_RANGE: i32 = 2047; |
63 | |
64 | /// Append a digit to the buffer if it fits. |
65 | // FIXME(tgross35): it may be better for this to return an option |
66 | // FIXME(tgross35): incrementing the digit counter even if we don't push anything |
67 | // seems incorrect. |
68 | pub(super) fn try_add_digit(&mut self, digit: u8) { |
69 | if self.num_digits < Self::MAX_DIGITS { |
70 | self.digits[self.num_digits] = digit; |
71 | } |
72 | self.num_digits += 1; |
73 | } |
74 | |
75 | /// Trim trailing zeros from the buffer. |
76 | // FIXME(tgross35): this could be `.rev().position()` if perf is okay |
77 | pub fn trim(&mut self) { |
78 | // All of the following calls to `DecimalSeq::trim` can't panic because: |
79 | // |
80 | // 1. `parse_decimal` sets `num_digits` to a max of `DecimalSeq::MAX_DIGITS`. |
81 | // 2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`. |
82 | // 3. `left_shift` `num_digits` to a max of `DecimalSeq::MAX_DIGITS`. |
83 | // |
84 | // Trim is only called in `right_shift` and `left_shift`. |
85 | debug_assert!(self.num_digits <= Self::MAX_DIGITS); |
86 | while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 { |
87 | self.num_digits -= 1; |
88 | } |
89 | } |
90 | |
91 | pub(super) fn round(&self) -> u64 { |
92 | if self.num_digits == 0 || self.decimal_point < 0 { |
93 | return 0; |
94 | } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 { |
95 | return 0xFFFF_FFFF_FFFF_FFFF_u64; |
96 | } |
97 | |
98 | let dp = self.decimal_point as usize; |
99 | let mut n = 0_u64; |
100 | |
101 | for i in 0..dp { |
102 | n *= 10; |
103 | if i < self.num_digits { |
104 | n += self.digits[i] as u64; |
105 | } |
106 | } |
107 | |
108 | let mut round_up = false; |
109 | |
110 | if dp < self.num_digits { |
111 | round_up = self.digits[dp] >= 5; |
112 | if self.digits[dp] == 5 && dp + 1 == self.num_digits { |
113 | round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0)) |
114 | } |
115 | } |
116 | |
117 | if round_up { |
118 | n += 1; |
119 | } |
120 | n |
121 | } |
122 | |
123 | /// Computes decimal * 2^shift. |
124 | pub(super) fn left_shift(&mut self, shift: usize) { |
125 | if self.num_digits == 0 { |
126 | return; |
127 | } |
128 | let num_new_digits = number_of_digits_decimal_left_shift(self, shift); |
129 | let mut read_index = self.num_digits; |
130 | let mut write_index = self.num_digits + num_new_digits; |
131 | let mut n = 0_u64; |
132 | |
133 | while read_index != 0 { |
134 | read_index -= 1; |
135 | write_index -= 1; |
136 | n += (self.digits[read_index] as u64) << shift; |
137 | let quotient = n / 10; |
138 | let remainder = n - (10 * quotient); |
139 | if write_index < Self::MAX_DIGITS { |
140 | self.digits[write_index] = remainder as u8; |
141 | } else if remainder > 0 { |
142 | self.truncated = true; |
143 | } |
144 | n = quotient; |
145 | } |
146 | |
147 | while n > 0 { |
148 | write_index -= 1; |
149 | let quotient = n / 10; |
150 | let remainder = n - (10 * quotient); |
151 | if write_index < Self::MAX_DIGITS { |
152 | self.digits[write_index] = remainder as u8; |
153 | } else if remainder > 0 { |
154 | self.truncated = true; |
155 | } |
156 | n = quotient; |
157 | } |
158 | |
159 | self.num_digits += num_new_digits; |
160 | |
161 | if self.num_digits > Self::MAX_DIGITS { |
162 | self.num_digits = Self::MAX_DIGITS; |
163 | } |
164 | |
165 | self.decimal_point += num_new_digits as i32; |
166 | self.trim(); |
167 | } |
168 | |
169 | /// Computes decimal * 2^-shift. |
170 | pub(super) fn right_shift(&mut self, shift: usize) { |
171 | let mut read_index = 0; |
172 | let mut write_index = 0; |
173 | let mut n = 0_u64; |
174 | while (n >> shift) == 0 { |
175 | if read_index < self.num_digits { |
176 | n = (10 * n) + self.digits[read_index] as u64; |
177 | read_index += 1; |
178 | } else if n == 0 { |
179 | return; |
180 | } else { |
181 | while (n >> shift) == 0 { |
182 | n *= 10; |
183 | read_index += 1; |
184 | } |
185 | break; |
186 | } |
187 | } |
188 | self.decimal_point -= read_index as i32 - 1; |
189 | if self.decimal_point < -Self::DECIMAL_POINT_RANGE { |
190 | // `self = Self::Default()`, but without the overhead of clearing `digits`. |
191 | self.num_digits = 0; |
192 | self.decimal_point = 0; |
193 | self.truncated = false; |
194 | return; |
195 | } |
196 | let mask = (1_u64 << shift) - 1; |
197 | while read_index < self.num_digits { |
198 | let new_digit = (n >> shift) as u8; |
199 | n = (10 * (n & mask)) + self.digits[read_index] as u64; |
200 | read_index += 1; |
201 | self.digits[write_index] = new_digit; |
202 | write_index += 1; |
203 | } |
204 | while n > 0 { |
205 | let new_digit = (n >> shift) as u8; |
206 | n = 10 * (n & mask); |
207 | if write_index < Self::MAX_DIGITS { |
208 | self.digits[write_index] = new_digit; |
209 | write_index += 1; |
210 | } else if new_digit > 0 { |
211 | self.truncated = true; |
212 | } |
213 | } |
214 | self.num_digits = write_index; |
215 | self.trim(); |
216 | } |
217 | } |
218 | |
219 | /// Parse a big integer representation of the float as a decimal. |
220 | pub fn parse_decimal_seq(mut s: &[u8]) -> DecimalSeq { |
221 | let mut d = DecimalSeq::default(); |
222 | let start = s; |
223 | |
224 | while let Some((&b'0' , s_next)) = s.split_first() { |
225 | s = s_next; |
226 | } |
227 | |
228 | s = s.parse_digits(|digit| d.try_add_digit(digit)); |
229 | |
230 | if let Some((b'.' , s_next)) = s.split_first() { |
231 | s = s_next; |
232 | let first = s; |
233 | // Skip leading zeros. |
234 | if d.num_digits == 0 { |
235 | while let Some((&b'0' , s_next)) = s.split_first() { |
236 | s = s_next; |
237 | } |
238 | } |
239 | while s.len() >= 8 && d.num_digits + 8 < DecimalSeq::MAX_DIGITS { |
240 | let v = s.read_u64(); |
241 | if !is_8digits(v) { |
242 | break; |
243 | } |
244 | d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030); |
245 | d.num_digits += 8; |
246 | s = &s[8..]; |
247 | } |
248 | s = s.parse_digits(|digit| d.try_add_digit(digit)); |
249 | d.decimal_point = s.len() as i32 - first.len() as i32; |
250 | } |
251 | |
252 | if d.num_digits != 0 { |
253 | // Ignore the trailing zeros if there are any |
254 | let mut n_trailing_zeros = 0; |
255 | for &c in start[..(start.len() - s.len())].iter().rev() { |
256 | if c == b'0' { |
257 | n_trailing_zeros += 1; |
258 | } else if c != b'.' { |
259 | break; |
260 | } |
261 | } |
262 | d.decimal_point += n_trailing_zeros as i32; |
263 | d.num_digits -= n_trailing_zeros; |
264 | d.decimal_point += d.num_digits as i32; |
265 | if d.num_digits > DecimalSeq::MAX_DIGITS { |
266 | d.truncated = true; |
267 | d.num_digits = DecimalSeq::MAX_DIGITS; |
268 | } |
269 | } |
270 | |
271 | if let Some((&ch, s_next)) = s.split_first() { |
272 | if ch == b'e' || ch == b'E' { |
273 | s = s_next; |
274 | let mut neg_exp = false; |
275 | if let Some((&ch, s_next)) = s.split_first() { |
276 | neg_exp = ch == b'-' ; |
277 | if ch == b'-' || ch == b'+' { |
278 | s = s_next; |
279 | } |
280 | } |
281 | let mut exp_num = 0_i32; |
282 | |
283 | s.parse_digits(|digit| { |
284 | if exp_num < 0x10000 { |
285 | exp_num = 10 * exp_num + digit as i32; |
286 | } |
287 | }); |
288 | |
289 | d.decimal_point += if neg_exp { -exp_num } else { exp_num }; |
290 | } |
291 | } |
292 | |
293 | for i in d.num_digits..DecimalSeq::MAX_DIGITS_WITHOUT_OVERFLOW { |
294 | d.digits[i] = 0; |
295 | } |
296 | |
297 | d |
298 | } |
299 | |
300 | fn number_of_digits_decimal_left_shift(d: &DecimalSeq, mut shift: usize) -> usize { |
301 | #[rustfmt::skip] |
302 | const TABLE: [u16; 65] = [ |
303 | 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024, |
304 | 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C, |
305 | 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, |
306 | 0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, |
307 | 0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02, |
308 | 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C, |
309 | ]; |
310 | #[rustfmt::skip] |
311 | const TABLE_POW5: [u8; 0x051C] = [ |
312 | 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1, |
313 | 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, |
314 | 1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, |
315 | 5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6, |
316 | 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, |
317 | 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, |
318 | 1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6, |
319 | 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, |
320 | 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, |
321 | 3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6, |
322 | 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5, |
323 | 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, |
324 | 5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, |
325 | 3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4, |
326 | 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, |
327 | 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, |
328 | 6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3, |
329 | 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, |
330 | 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, |
331 | 7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5, |
332 | 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5, |
333 | 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, |
334 | 7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, |
335 | 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0, |
336 | 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, |
337 | 8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, |
338 | 0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2, |
339 | 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, |
340 | 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, |
341 | 6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3, |
342 | 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, |
343 | 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, |
344 | 9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1, |
345 | 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8, |
346 | 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, |
347 | 8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, |
348 | 3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8, |
349 | 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, |
350 | 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, |
351 | 5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1, |
352 | 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, |
353 | 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, |
354 | 8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2, |
355 | 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, |
356 | ]; |
357 | |
358 | shift &= 63; |
359 | let x_a = TABLE[shift]; |
360 | let x_b = TABLE[shift + 1]; |
361 | let num_new_digits = (x_a >> 11) as _; |
362 | let pow5_a = (0x7FF & x_a) as usize; |
363 | let pow5_b = (0x7FF & x_b) as usize; |
364 | let pow5 = &TABLE_POW5[pow5_a..]; |
365 | |
366 | for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) { |
367 | if i >= d.num_digits { |
368 | return num_new_digits - 1; |
369 | } else if d.digits[i] == p5 { |
370 | continue; |
371 | } else if d.digits[i] < p5 { |
372 | return num_new_digits - 1; |
373 | } else { |
374 | return num_new_digits; |
375 | } |
376 | } |
377 | |
378 | num_new_digits |
379 | } |
380 | |