| 1 | //! Implementation of the Eisel-Lemire algorithm. |
| 2 | //! |
| 3 | //! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust), |
| 4 | //! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust. |
| 5 | |
| 6 | #![cfg (not(feature = "compact" ))] |
| 7 | #![doc (hidden)] |
| 8 | |
| 9 | use crate::extended_float::ExtendedFloat; |
| 10 | use crate::num::Float; |
| 11 | use crate::number::Number; |
| 12 | use crate::table::{LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE}; |
| 13 | |
| 14 | /// Ensure truncation of digits doesn't affect our computation, by doing 2 passes. |
| 15 | #[inline ] |
| 16 | pub fn lemire<F: Float>(num: &Number) -> ExtendedFloat { |
| 17 | // If significant digits were truncated, then we can have rounding error |
| 18 | // only if `mantissa + 1` produces a different result. We also avoid |
| 19 | // redundantly using the Eisel-Lemire algorithm if it was unable to |
| 20 | // correctly round on the first pass. |
| 21 | let mut fp: ExtendedFloat = compute_float::<F>(q:num.exponent, w:num.mantissa); |
| 22 | if num.many_digits && fp.exp >= 0 && fp != compute_float::<F>(q:num.exponent, w:num.mantissa + 1) { |
| 23 | // Need to re-calculate, since the previous values are rounded |
| 24 | // when the slow path algorithm expects a normalized extended float. |
| 25 | fp = compute_error::<F>(q:num.exponent, w:num.mantissa); |
| 26 | } |
| 27 | fp |
| 28 | } |
| 29 | |
| 30 | /// Compute a float using an extended-precision representation. |
| 31 | /// |
| 32 | /// Fast conversion of a the significant digits and decimal exponent |
| 33 | /// a float to a extended representation with a binary float. This |
| 34 | /// algorithm will accurately parse the vast majority of cases, |
| 35 | /// and uses a 128-bit representation (with a fallback 192-bit |
| 36 | /// representation). |
| 37 | /// |
| 38 | /// This algorithm scales the exponent by the decimal exponent |
| 39 | /// using pre-computed powers-of-5, and calculates if the |
| 40 | /// representation can be unambiguously rounded to the nearest |
| 41 | /// machine float. Near-halfway cases are not handled here, |
| 42 | /// and are represented by a negative, biased binary exponent. |
| 43 | /// |
| 44 | /// The algorithm is described in detail in "Daniel Lemire, Number Parsing |
| 45 | /// at a Gigabyte per Second" in section 5, "Fast Algorithm", and |
| 46 | /// section 6, "Exact Numbers And Ties", available online: |
| 47 | /// <https://arxiv.org/abs/2101.11408.pdf>. |
| 48 | pub fn compute_float<F: Float>(q: i32, mut w: u64) -> ExtendedFloat { |
| 49 | let fp_zero = ExtendedFloat { |
| 50 | mant: 0, |
| 51 | exp: 0, |
| 52 | }; |
| 53 | let fp_inf = ExtendedFloat { |
| 54 | mant: 0, |
| 55 | exp: F::INFINITE_POWER, |
| 56 | }; |
| 57 | |
| 58 | // Short-circuit if the value can only be a literal 0 or infinity. |
| 59 | if w == 0 || q < F::SMALLEST_POWER_OF_TEN { |
| 60 | return fp_zero; |
| 61 | } else if q > F::LARGEST_POWER_OF_TEN { |
| 62 | return fp_inf; |
| 63 | } |
| 64 | // Normalize our significant digits, so the most-significant bit is set. |
| 65 | let lz = w.leading_zeros() as i32; |
| 66 | w <<= lz; |
| 67 | let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3); |
| 68 | if lo == 0xFFFF_FFFF_FFFF_FFFF { |
| 69 | // If we have failed to approximate w x 5^-q with our 128-bit value. |
| 70 | // Since the addition of 1 could lead to an overflow which could then |
| 71 | // round up over the half-way point, this can lead to improper rounding |
| 72 | // of a float. |
| 73 | // |
| 74 | // However, this can only occur if q ∈ [-27, 55]. The upper bound of q |
| 75 | // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64, |
| 76 | // since otherwise the product can be represented in 64-bits, producing |
| 77 | // an exact result. For negative exponents, rounding-to-even can |
| 78 | // only occur if 5^-q < 2^64. |
| 79 | // |
| 80 | // For detailed explanations of rounding for negative exponents, see |
| 81 | // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed |
| 82 | // explanations of rounding for positive exponents, see |
| 83 | // <https://arxiv.org/pdf/2101.11408.pdf#section.8>. |
| 84 | let inside_safe_exponent = (q >= -27) && (q <= 55); |
| 85 | if !inside_safe_exponent { |
| 86 | return compute_error_scaled::<F>(q, hi, lz); |
| 87 | } |
| 88 | } |
| 89 | let upperbit = (hi >> 63) as i32; |
| 90 | let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_SIZE - 3); |
| 91 | let mut power2 = power(q) + upperbit - lz - F::MINIMUM_EXPONENT; |
| 92 | if power2 <= 0 { |
| 93 | if -power2 + 1 >= 64 { |
| 94 | // Have more than 64 bits below the minimum exponent, must be 0. |
| 95 | return fp_zero; |
| 96 | } |
| 97 | // Have a subnormal value. |
| 98 | mantissa >>= -power2 + 1; |
| 99 | mantissa += mantissa & 1; |
| 100 | mantissa >>= 1; |
| 101 | power2 = (mantissa >= (1_u64 << F::MANTISSA_SIZE)) as i32; |
| 102 | return ExtendedFloat { |
| 103 | mant: mantissa, |
| 104 | exp: power2, |
| 105 | }; |
| 106 | } |
| 107 | // Need to handle rounding ties. Normally, we need to round up, |
| 108 | // but if we fall right in between and and we have an even basis, we |
| 109 | // need to round down. |
| 110 | // |
| 111 | // This will only occur if: |
| 112 | // 1. The lower 64 bits of the 128-bit representation is 0. |
| 113 | // IE, 5^q fits in single 64-bit word. |
| 114 | // 2. The least-significant bit prior to truncated mantissa is odd. |
| 115 | // 3. All the bits truncated when shifting to mantissa bits + 1 are 0. |
| 116 | // |
| 117 | // Or, we may fall between two floats: we are exactly halfway. |
| 118 | if lo <= 1 |
| 119 | && q >= F::MIN_EXPONENT_ROUND_TO_EVEN |
| 120 | && q <= F::MAX_EXPONENT_ROUND_TO_EVEN |
| 121 | && mantissa & 3 == 1 |
| 122 | && (mantissa << (upperbit + 64 - F::MANTISSA_SIZE - 3)) == hi |
| 123 | { |
| 124 | // Zero the lowest bit, so we don't round up. |
| 125 | mantissa &= !1_u64; |
| 126 | } |
| 127 | // Round-to-even, then shift the significant digits into place. |
| 128 | mantissa += mantissa & 1; |
| 129 | mantissa >>= 1; |
| 130 | if mantissa >= (2_u64 << F::MANTISSA_SIZE) { |
| 131 | // Rounding up overflowed, so the carry bit is set. Set the |
| 132 | // mantissa to 1 (only the implicit, hidden bit is set) and |
| 133 | // increase the exponent. |
| 134 | mantissa = 1_u64 << F::MANTISSA_SIZE; |
| 135 | power2 += 1; |
| 136 | } |
| 137 | // Zero out the hidden bit. |
| 138 | mantissa &= !(1_u64 << F::MANTISSA_SIZE); |
| 139 | if power2 >= F::INFINITE_POWER { |
| 140 | // Exponent is above largest normal value, must be infinite. |
| 141 | return fp_inf; |
| 142 | } |
| 143 | ExtendedFloat { |
| 144 | mant: mantissa, |
| 145 | exp: power2, |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | /// Fallback algorithm to calculate the non-rounded representation. |
| 150 | /// This calculates the extended representation, and then normalizes |
| 151 | /// the resulting representation, so the high bit is set. |
| 152 | #[inline ] |
| 153 | pub fn compute_error<F: Float>(q: i32, mut w: u64) -> ExtendedFloat { |
| 154 | let lz: i32 = w.leading_zeros() as i32; |
| 155 | w <<= lz; |
| 156 | let hi: u64 = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3).1; |
| 157 | compute_error_scaled::<F>(q, w:hi, lz) |
| 158 | } |
| 159 | |
| 160 | /// Compute the error from a mantissa scaled to the exponent. |
| 161 | #[inline ] |
| 162 | pub fn compute_error_scaled<F: Float>(q: i32, mut w: u64, lz: i32) -> ExtendedFloat { |
| 163 | // Want to normalize the float, but this is faster than ctlz on most architectures. |
| 164 | let hilz: i32 = (w >> 63) as i32 ^ 1; |
| 165 | w <<= hilz; |
| 166 | let power2: i32 = power(q as i32) + F::EXPONENT_BIAS - hilz - lz - 62; |
| 167 | |
| 168 | ExtendedFloat { |
| 169 | mant: w, |
| 170 | exp: power2 + F::INVALID_FP, |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | /// Calculate a base 2 exponent from a decimal exponent. |
| 175 | /// This uses a pre-computed integer approximation for |
| 176 | /// log2(10), where 217706 / 2^16 is accurate for the |
| 177 | /// entire range of non-finite decimal exponents. |
| 178 | #[inline ] |
| 179 | fn power(q: i32) -> i32 { |
| 180 | (q.wrapping_mul(152_170 + 65536) >> 16) + 63 |
| 181 | } |
| 182 | |
| 183 | #[inline ] |
| 184 | fn full_multiplication(a: u64, b: u64) -> (u64, u64) { |
| 185 | let r: u128 = (a as u128) * (b as u128); |
| 186 | (r as u64, (r >> 64) as u64) |
| 187 | } |
| 188 | |
| 189 | // This will compute or rather approximate w * 5**q and return a pair of 64-bit words |
| 190 | // approximating the result, with the "high" part corresponding to the most significant |
| 191 | // bits and the low part corresponding to the least significant bits. |
| 192 | fn compute_product_approx(q: i32, w: u64, precision: usize) -> (u64, u64) { |
| 193 | debug_assert!(q >= SMALLEST_POWER_OF_FIVE); |
| 194 | debug_assert!(q <= LARGEST_POWER_OF_FIVE); |
| 195 | debug_assert!(precision <= 64); |
| 196 | |
| 197 | let mask = if precision < 64 { |
| 198 | 0xFFFF_FFFF_FFFF_FFFF_u64 >> precision |
| 199 | } else { |
| 200 | 0xFFFF_FFFF_FFFF_FFFF_u64 |
| 201 | }; |
| 202 | |
| 203 | // 5^q < 2^64, then the multiplication always provides an exact value. |
| 204 | // That means whenever we need to round ties to even, we always have |
| 205 | // an exact value. |
| 206 | let index = (q - SMALLEST_POWER_OF_FIVE) as usize; |
| 207 | let (lo5, hi5) = POWER_OF_FIVE_128[index]; |
| 208 | // Only need one multiplication as long as there is 1 zero but |
| 209 | // in the explicit mantissa bits, +1 for the hidden bit, +1 to |
| 210 | // determine the rounding direction, +1 for if the computed |
| 211 | // product has a leading zero. |
| 212 | let (mut first_lo, mut first_hi) = full_multiplication(w, lo5); |
| 213 | if first_hi & mask == mask { |
| 214 | // Need to do a second multiplication to get better precision |
| 215 | // for the lower product. This will always be exact |
| 216 | // where q is < 55, since 5^55 < 2^128. If this wraps, |
| 217 | // then we need to need to round up the hi product. |
| 218 | let (_, second_hi) = full_multiplication(w, hi5); |
| 219 | first_lo = first_lo.wrapping_add(second_hi); |
| 220 | if second_hi > first_lo { |
| 221 | first_hi += 1; |
| 222 | } |
| 223 | } |
| 224 | (first_lo, first_hi) |
| 225 | } |
| 226 | |