1 | //! Implementation of the Eisel-Lemire algorithm. |
2 | |
3 | use crate::num::dec2flt::common::BiasedFp; |
4 | use crate::num::dec2flt::float::RawFloat; |
5 | use crate::num::dec2flt::table::{ |
6 | LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE, |
7 | }; |
8 | |
9 | /// Compute w * 10^q using an extended-precision float representation. |
10 | /// |
11 | /// Fast conversion of a the significant digits and decimal exponent |
12 | /// a float to an extended representation with a binary float. This |
13 | /// algorithm will accurately parse the vast majority of cases, |
14 | /// and uses a 128-bit representation (with a fallback 192-bit |
15 | /// representation). |
16 | /// |
17 | /// This algorithm scales the exponent by the decimal exponent |
18 | /// using pre-computed powers-of-5, and calculates if the |
19 | /// representation can be unambiguously rounded to the nearest |
20 | /// machine float. Near-halfway cases are not handled here, |
21 | /// and are represented by a negative, biased binary exponent. |
22 | /// |
23 | /// The algorithm is described in detail in "Daniel Lemire, Number Parsing |
24 | /// at a Gigabyte per Second" in section 5, "Fast Algorithm", and |
25 | /// section 6, "Exact Numbers And Ties", available online: |
26 | /// <https://arxiv.org/abs/2101.11408.pdf>. |
27 | pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp { |
28 | let fp_zero = BiasedFp::zero_pow2(0); |
29 | let fp_inf = BiasedFp::zero_pow2(F::INFINITE_POWER); |
30 | let fp_error = BiasedFp::zero_pow2(-1); |
31 | |
32 | // Short-circuit if the value can only be a literal 0 or infinity. |
33 | if w == 0 || q < F::SMALLEST_POWER_OF_TEN as i64 { |
34 | return fp_zero; |
35 | } else if q > F::LARGEST_POWER_OF_TEN as i64 { |
36 | return fp_inf; |
37 | } |
38 | // Normalize our significant digits, so the most-significant bit is set. |
39 | let lz = w.leading_zeros(); |
40 | w <<= lz; |
41 | let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3); |
42 | if lo == 0xFFFF_FFFF_FFFF_FFFF { |
43 | // If we have failed to approximate w x 5^-q with our 128-bit value. |
44 | // Since the addition of 1 could lead to an overflow which could then |
45 | // round up over the half-way point, this can lead to improper rounding |
46 | // of a float. |
47 | // |
48 | // However, this can only occur if q ∈ [-27, 55]. The upper bound of q |
49 | // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64, |
50 | // since otherwise the product can be represented in 64-bits, producing |
51 | // an exact result. For negative exponents, rounding-to-even can |
52 | // only occur if 5^-q < 2^64. |
53 | // |
54 | // For detailed explanations of rounding for negative exponents, see |
55 | // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed |
56 | // explanations of rounding for positive exponents, see |
57 | // <https://arxiv.org/pdf/2101.11408.pdf#section.8>. |
58 | let inside_safe_exponent = (q >= -27) && (q <= 55); |
59 | if !inside_safe_exponent { |
60 | return fp_error; |
61 | } |
62 | } |
63 | let upperbit = (hi >> 63) as i32; |
64 | let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3); |
65 | let mut power2 = power(q as i32) + upperbit - lz as i32 - F::MINIMUM_EXPONENT; |
66 | if power2 <= 0 { |
67 | if -power2 + 1 >= 64 { |
68 | // Have more than 64 bits below the minimum exponent, must be 0. |
69 | return fp_zero; |
70 | } |
71 | // Have a subnormal value. |
72 | mantissa >>= -power2 + 1; |
73 | mantissa += mantissa & 1; |
74 | mantissa >>= 1; |
75 | power2 = (mantissa >= (1_u64 << F::MANTISSA_EXPLICIT_BITS)) as i32; |
76 | return BiasedFp { f: mantissa, e: power2 }; |
77 | } |
78 | // Need to handle rounding ties. Normally, we need to round up, |
79 | // but if we fall right in between and we have an even basis, we |
80 | // need to round down. |
81 | // |
82 | // This will only occur if: |
83 | // 1. The lower 64 bits of the 128-bit representation is 0. |
84 | // IE, 5^q fits in single 64-bit word. |
85 | // 2. The least-significant bit prior to truncated mantissa is odd. |
86 | // 3. All the bits truncated when shifting to mantissa bits + 1 are 0. |
87 | // |
88 | // Or, we may fall between two floats: we are exactly halfway. |
89 | if lo <= 1 |
90 | && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64 |
91 | && q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64 |
92 | && mantissa & 3 == 1 |
93 | && (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi |
94 | { |
95 | // Zero the lowest bit, so we don't round up. |
96 | mantissa &= !1_u64; |
97 | } |
98 | // Round-to-even, then shift the significant digits into place. |
99 | mantissa += mantissa & 1; |
100 | mantissa >>= 1; |
101 | if mantissa >= (2_u64 << F::MANTISSA_EXPLICIT_BITS) { |
102 | // Rounding up overflowed, so the carry bit is set. Set the |
103 | // mantissa to 1 (only the implicit, hidden bit is set) and |
104 | // increase the exponent. |
105 | mantissa = 1_u64 << F::MANTISSA_EXPLICIT_BITS; |
106 | power2 += 1; |
107 | } |
108 | // Zero out the hidden bit. |
109 | mantissa &= !(1_u64 << F::MANTISSA_EXPLICIT_BITS); |
110 | if power2 >= F::INFINITE_POWER { |
111 | // Exponent is above largest normal value, must be infinite. |
112 | return fp_inf; |
113 | } |
114 | BiasedFp { f: mantissa, e: power2 } |
115 | } |
116 | |
117 | /// Calculate a base 2 exponent from a decimal exponent. |
118 | /// This uses a pre-computed integer approximation for |
119 | /// log2(10), where 217706 / 2^16 is accurate for the |
120 | /// entire range of non-finite decimal exponents. |
121 | #[inline ] |
122 | fn power(q: i32) -> i32 { |
123 | (q.wrapping_mul(152_170 + 65536) >> 16) + 63 |
124 | } |
125 | |
126 | #[inline ] |
127 | fn full_multiplication(a: u64, b: u64) -> (u64, u64) { |
128 | let r: u128 = (a as u128) * (b as u128); |
129 | (r as u64, (r >> 64) as u64) |
130 | } |
131 | |
132 | // This will compute or rather approximate w * 5**q and return a pair of 64-bit words |
133 | // approximating the result, with the "high" part corresponding to the most significant |
134 | // bits and the low part corresponding to the least significant bits. |
135 | fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) { |
136 | debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64); |
137 | debug_assert!(q <= LARGEST_POWER_OF_FIVE as i64); |
138 | debug_assert!(precision <= 64); |
139 | |
140 | let mask = if precision < 64 { |
141 | 0xFFFF_FFFF_FFFF_FFFF_u64 >> precision |
142 | } else { |
143 | 0xFFFF_FFFF_FFFF_FFFF_u64 |
144 | }; |
145 | |
146 | // 5^q < 2^64, then the multiplication always provides an exact value. |
147 | // That means whenever we need to round ties to even, we always have |
148 | // an exact value. |
149 | let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize; |
150 | let (lo5, hi5) = POWER_OF_FIVE_128[index]; |
151 | // Only need one multiplication as long as there is 1 zero but |
152 | // in the explicit mantissa bits, +1 for the hidden bit, +1 to |
153 | // determine the rounding direction, +1 for if the computed |
154 | // product has a leading zero. |
155 | let (mut first_lo, mut first_hi) = full_multiplication(w, lo5); |
156 | if first_hi & mask == mask { |
157 | // Need to do a second multiplication to get better precision |
158 | // for the lower product. This will always be exact |
159 | // where q is < 55, since 5^55 < 2^128. If this wraps, |
160 | // then we need to need to round up the hi product. |
161 | let (_, second_hi) = full_multiplication(w, hi5); |
162 | first_lo = first_lo.wrapping_add(second_hi); |
163 | if second_hi > first_lo { |
164 | first_hi += 1; |
165 | } |
166 | } |
167 | (first_lo, first_hi) |
168 | } |
169 | |