1 | //! Slow, fallback algorithm for cases the Eisel-Lemire algorithm cannot round. |
2 | |
3 | use crate::num::dec2flt::common::BiasedFp; |
4 | use crate::num::dec2flt::decimal::{parse_decimal, Decimal}; |
5 | use crate::num::dec2flt::float::RawFloat; |
6 | |
7 | /// Parse the significant digits and biased, binary exponent of a float. |
8 | /// |
9 | /// This is a fallback algorithm that uses a big-integer representation |
10 | /// of the float, and therefore is considerably slower than faster |
11 | /// approximations. However, it will always determine how to round |
12 | /// the significant digits to the nearest machine float, allowing |
13 | /// use to handle near half-way cases. |
14 | /// |
15 | /// Near half-way cases are halfway between two consecutive machine floats. |
16 | /// For example, the float `16777217.0` has a bitwise representation of |
17 | /// `100000000000000000000000 1`. Rounding to a single-precision float, |
18 | /// the trailing `1` is truncated. Using round-nearest, tie-even, any |
19 | /// value above `16777217.0` must be rounded up to `16777218.0`, while |
20 | /// any value before or equal to `16777217.0` must be rounded down |
21 | /// to `16777216.0`. These near-halfway conversions therefore may require |
22 | /// a large number of digits to unambiguously determine how to round. |
23 | /// |
24 | /// The algorithms described here are based on "Processing Long Numbers Quickly", |
25 | /// available here: <https://arxiv.org/pdf/2101.11408.pdf#section.11>. |
26 | pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp { |
27 | const MAX_SHIFT: usize = 60; |
28 | const NUM_POWERS: usize = 19; |
29 | const POWERS: [u8; 19] = |
30 | [0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59]; |
31 | |
32 | let get_shift = |n| { |
33 | if n < NUM_POWERS { POWERS[n] as usize } else { MAX_SHIFT } |
34 | }; |
35 | |
36 | let fp_zero = BiasedFp::zero_pow2(0); |
37 | let fp_inf = BiasedFp::zero_pow2(F::INFINITE_POWER); |
38 | |
39 | let mut d = parse_decimal(s); |
40 | |
41 | // Short-circuit if the value can only be a literal 0 or infinity. |
42 | if d.num_digits == 0 || d.decimal_point < -324 { |
43 | return fp_zero; |
44 | } else if d.decimal_point >= 310 { |
45 | return fp_inf; |
46 | } |
47 | let mut exp2 = 0_i32; |
48 | // Shift right toward (1/2 ... 1]. |
49 | while d.decimal_point > 0 { |
50 | let n = d.decimal_point as usize; |
51 | let shift = get_shift(n); |
52 | d.right_shift(shift); |
53 | if d.decimal_point < -Decimal::DECIMAL_POINT_RANGE { |
54 | return fp_zero; |
55 | } |
56 | exp2 += shift as i32; |
57 | } |
58 | // Shift left toward (1/2 ... 1]. |
59 | while d.decimal_point <= 0 { |
60 | let shift = if d.decimal_point == 0 { |
61 | match d.digits[0] { |
62 | digit if digit >= 5 => break, |
63 | 0 | 1 => 2, |
64 | _ => 1, |
65 | } |
66 | } else { |
67 | get_shift((-d.decimal_point) as _) |
68 | }; |
69 | d.left_shift(shift); |
70 | if d.decimal_point > Decimal::DECIMAL_POINT_RANGE { |
71 | return fp_inf; |
72 | } |
73 | exp2 -= shift as i32; |
74 | } |
75 | // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. |
76 | exp2 -= 1; |
77 | while (F::MINIMUM_EXPONENT + 1) > exp2 { |
78 | let mut n = ((F::MINIMUM_EXPONENT + 1) - exp2) as usize; |
79 | if n > MAX_SHIFT { |
80 | n = MAX_SHIFT; |
81 | } |
82 | d.right_shift(n); |
83 | exp2 += n as i32; |
84 | } |
85 | if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { |
86 | return fp_inf; |
87 | } |
88 | // Shift the decimal to the hidden bit, and then round the value |
89 | // to get the high mantissa+1 bits. |
90 | d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1); |
91 | let mut mantissa = d.round(); |
92 | if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) { |
93 | // Rounding up overflowed to the carry bit, need to |
94 | // shift back to the hidden bit. |
95 | d.right_shift(1); |
96 | exp2 += 1; |
97 | mantissa = d.round(); |
98 | if (exp2 - F::MINIMUM_EXPONENT) >= F::INFINITE_POWER { |
99 | return fp_inf; |
100 | } |
101 | } |
102 | let mut power2 = exp2 - F::MINIMUM_EXPONENT; |
103 | if mantissa < (1_u64 << F::MANTISSA_EXPLICIT_BITS) { |
104 | power2 -= 1; |
105 | } |
106 | // Zero out all the bits above the explicit mantissa bits. |
107 | mantissa &= (1_u64 << F::MANTISSA_EXPLICIT_BITS) - 1; |
108 | BiasedFp { f: mantissa, e: power2 } |
109 | } |
110 | |