1 | //! Slow, fallback cases where we cannot unambiguously round a float. |
2 | //! |
3 | //! This occurs when we cannot determine the exact representation using |
4 | //! both the fast path (native) cases nor the Lemire/Bellerophon algorithms, |
5 | //! and therefore must fallback to a slow, arbitrary-precision representation. |
6 | |
7 | #![doc (hidden)] |
8 | |
9 | use crate::bigint::{Bigint, Limb, LIMB_BITS}; |
10 | use crate::extended_float::{extended_to_float, ExtendedFloat}; |
11 | use crate::num::Float; |
12 | use crate::number::Number; |
13 | use crate::rounding::{round, round_down, round_nearest_tie_even}; |
14 | use core::cmp; |
15 | |
16 | // ALGORITHM |
17 | // --------- |
18 | |
19 | /// Parse the significant digits and biased, binary exponent of a float. |
20 | /// |
21 | /// This is a fallback algorithm that uses a big-integer representation |
22 | /// of the float, and therefore is considerably slower than faster |
23 | /// approximations. However, it will always determine how to round |
24 | /// the significant digits to the nearest machine float, allowing |
25 | /// use to handle near half-way cases. |
26 | /// |
27 | /// Near half-way cases are halfway between two consecutive machine floats. |
28 | /// For example, the float `16777217.0` has a bitwise representation of |
29 | /// `100000000000000000000000 1`. Rounding to a single-precision float, |
30 | /// the trailing `1` is truncated. Using round-nearest, tie-even, any |
31 | /// value above `16777217.0` must be rounded up to `16777218.0`, while |
32 | /// any value before or equal to `16777217.0` must be rounded down |
33 | /// to `16777216.0`. These near-halfway conversions therefore may require |
34 | /// a large number of digits to unambiguously determine how to round. |
35 | #[inline ] |
36 | pub fn slow<'a, F, Iter1, Iter2>( |
37 | num: Number, |
38 | fp: ExtendedFloat, |
39 | integer: Iter1, |
40 | fraction: Iter2, |
41 | ) -> ExtendedFloat |
42 | where |
43 | F: Float, |
44 | Iter1: Iterator<Item = &'a u8> + Clone, |
45 | Iter2: Iterator<Item = &'a u8> + Clone, |
46 | { |
47 | // Ensure our preconditions are valid: |
48 | // 1. The significant digits are not shifted into place. |
49 | debug_assert!(fp.mant & (1 << 63) != 0); |
50 | |
51 | // This assumes the sign bit has already been parsed, and we're |
52 | // starting with the integer digits, and the float format has been |
53 | // correctly validated. |
54 | let sci_exp: i32 = scientific_exponent(&num); |
55 | |
56 | // We have 2 major algorithms we use for this: |
57 | // 1. An algorithm with a finite number of digits and a positive exponent. |
58 | // 2. An algorithm with a finite number of digits and a negative exponent. |
59 | let (bigmant: Bigint, digits: usize) = parse_mantissa(integer, fraction, F::MAX_DIGITS); |
60 | let exponent: i32 = sci_exp + 1 - digits as i32; |
61 | if exponent >= 0 { |
62 | positive_digit_comp::<F>(bigmant, exponent) |
63 | } else { |
64 | negative_digit_comp::<F>(bigmant, fp, exponent) |
65 | } |
66 | } |
67 | |
68 | /// Generate the significant digits with a positive exponent relative to mantissa. |
69 | pub fn positive_digit_comp<F: Float>(mut bigmant: Bigint, exponent: i32) -> ExtendedFloat { |
70 | // Simple, we just need to multiply by the power of the radix. |
71 | // Now, we can calculate the mantissa and the exponent from this. |
72 | // The binary exponent is the binary exponent for the mantissa |
73 | // shifted to the hidden bit. |
74 | bigmant.pow(10, exponent as u32).unwrap(); |
75 | |
76 | // Get the exact representation of the float from the big integer. |
77 | // hi64 checks **all** the remaining bits after the mantissa, |
78 | // so it will check if **any** truncated digits exist. |
79 | let (mant, is_truncated) = bigmant.hi64(); |
80 | let exp = bigmant.bit_length() as i32 - 64 + F::EXPONENT_BIAS; |
81 | let mut fp = ExtendedFloat { |
82 | mant, |
83 | exp, |
84 | }; |
85 | |
86 | // Shift the digits into position and determine if we need to round-up. |
87 | round::<F, _>(&mut fp, |f, s| { |
88 | round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| { |
89 | is_above || (is_halfway && is_truncated) || (is_odd && is_halfway) |
90 | }); |
91 | }); |
92 | fp |
93 | } |
94 | |
95 | /// Generate the significant digits with a negative exponent relative to mantissa. |
96 | /// |
97 | /// This algorithm is quite simple: we have the significant digits `m1 * b^N1`, |
98 | /// where `m1` is the bigint mantissa, `b` is the radix, and `N1` is the radix |
99 | /// exponent. We then calculate the theoretical representation of `b+h`, which |
100 | /// is `m2 * 2^N2`, where `m2` is the bigint mantissa and `N2` is the binary |
101 | /// exponent. If we had infinite, efficient floating precision, this would be |
102 | /// equal to `m1 / b^-N1` and then compare it to `m2 * 2^N2`. |
103 | /// |
104 | /// Since we cannot divide and keep precision, we must multiply the other: |
105 | /// if we want to do `m1 / b^-N1 >= m2 * 2^N2`, we can do |
106 | /// `m1 >= m2 * b^-N1 * 2^N2` Going to the decimal case, we can show and example |
107 | /// and simplify this further: `m1 >= m2 * 2^N2 * 10^-N1`. Since we can remove |
108 | /// a power-of-two, this is `m1 >= m2 * 2^(N2 - N1) * 5^-N1`. Therefore, if |
109 | /// `N2 - N1 > 0`, we need have `m1 >= m2 * 2^(N2 - N1) * 5^-N1`, otherwise, |
110 | /// we have `m1 * 2^(N1 - N2) >= m2 * 5^-N1`, where the resulting exponents |
111 | /// are all positive. |
112 | /// |
113 | /// This allows us to compare both floats using integers efficiently |
114 | /// without any loss of precision. |
115 | #[allow (clippy::comparison_chain)] |
116 | pub fn negative_digit_comp<F: Float>( |
117 | bigmant: Bigint, |
118 | mut fp: ExtendedFloat, |
119 | exponent: i32, |
120 | ) -> ExtendedFloat { |
121 | // Ensure our preconditions are valid: |
122 | // 1. The significant digits are not shifted into place. |
123 | debug_assert!(fp.mant & (1 << 63) != 0); |
124 | |
125 | // Get the significant digits and radix exponent for the real digits. |
126 | let mut real_digits = bigmant; |
127 | let real_exp = exponent; |
128 | debug_assert!(real_exp < 0); |
129 | |
130 | // Round down our extended-precision float and calculate `b`. |
131 | let mut b = fp; |
132 | round::<F, _>(&mut b, round_down); |
133 | let b = extended_to_float::<F>(b); |
134 | |
135 | // Get the significant digits and the binary exponent for `b+h`. |
136 | let theor = bh(b); |
137 | let mut theor_digits = Bigint::from_u64(theor.mant); |
138 | let theor_exp = theor.exp; |
139 | |
140 | // We need to scale the real digits and `b+h` digits to be the same |
141 | // order. We currently have `real_exp`, in `radix`, that needs to be |
142 | // shifted to `theor_digits` (since it is negative), and `theor_exp` |
143 | // to either `theor_digits` or `real_digits` as a power of 2 (since it |
144 | // may be positive or negative). Try to remove as many powers of 2 |
145 | // as possible. All values are relative to `theor_digits`, that is, |
146 | // reflect the power you need to multiply `theor_digits` by. |
147 | // |
148 | // Both are on opposite-sides of equation, can factor out a |
149 | // power of two. |
150 | // |
151 | // Example: 10^-10, 2^-10 -> ( 0, 10, 0) |
152 | // Example: 10^-10, 2^-15 -> (-5, 10, 0) |
153 | // Example: 10^-10, 2^-5 -> ( 5, 10, 0) |
154 | // Example: 10^-10, 2^5 -> (15, 10, 0) |
155 | let binary_exp = theor_exp - real_exp; |
156 | let halfradix_exp = -real_exp; |
157 | if halfradix_exp != 0 { |
158 | theor_digits.pow(5, halfradix_exp as u32).unwrap(); |
159 | } |
160 | if binary_exp > 0 { |
161 | theor_digits.pow(2, binary_exp as u32).unwrap(); |
162 | } else if binary_exp < 0 { |
163 | real_digits.pow(2, (-binary_exp) as u32).unwrap(); |
164 | } |
165 | |
166 | // Compare our theoretical and real digits and round nearest, tie even. |
167 | let ord = real_digits.data.cmp(&theor_digits.data); |
168 | round::<F, _>(&mut fp, |f, s| { |
169 | round_nearest_tie_even(f, s, |is_odd, _, _| { |
170 | // Can ignore `is_halfway` and `is_above`, since those were |
171 | // calculates using less significant digits. |
172 | match ord { |
173 | cmp::Ordering::Greater => true, |
174 | cmp::Ordering::Less => false, |
175 | cmp::Ordering::Equal if is_odd => true, |
176 | cmp::Ordering::Equal => false, |
177 | } |
178 | }); |
179 | }); |
180 | fp |
181 | } |
182 | |
183 | /// Add a digit to the temporary value. |
184 | macro_rules! add_digit { |
185 | ($c:ident, $value:ident, $counter:ident, $count:ident) => {{ |
186 | let digit = $c - b'0' ; |
187 | $value *= 10 as Limb; |
188 | $value += digit as Limb; |
189 | |
190 | // Increment our counters. |
191 | $counter += 1; |
192 | $count += 1; |
193 | }}; |
194 | } |
195 | |
196 | /// Add a temporary value to our mantissa. |
197 | macro_rules! add_temporary { |
198 | // Multiply by the small power and add the native value. |
199 | (@mul $result:ident, $power:expr, $value:expr) => { |
200 | $result.data.mul_small($power).unwrap(); |
201 | $result.data.add_small($value).unwrap(); |
202 | }; |
203 | |
204 | // # Safety |
205 | // |
206 | // Safe is `counter <= step`, or smaller than the table size. |
207 | ($format:ident, $result:ident, $counter:ident, $value:ident) => { |
208 | if $counter != 0 { |
209 | // SAFETY: safe, since `counter <= step`, or smaller than the table size. |
210 | let small_power = unsafe { f64::int_pow_fast_path($counter, 10) }; |
211 | add_temporary!(@mul $result, small_power as Limb, $value); |
212 | $counter = 0; |
213 | $value = 0; |
214 | } |
215 | }; |
216 | |
217 | // Add a temporary where we won't read the counter results internally. |
218 | // |
219 | // # Safety |
220 | // |
221 | // Safe is `counter <= step`, or smaller than the table size. |
222 | (@end $format:ident, $result:ident, $counter:ident, $value:ident) => { |
223 | if $counter != 0 { |
224 | // SAFETY: safe, since `counter <= step`, or smaller than the table size. |
225 | let small_power = unsafe { f64::int_pow_fast_path($counter, 10) }; |
226 | add_temporary!(@mul $result, small_power as Limb, $value); |
227 | } |
228 | }; |
229 | |
230 | // Add the maximum native value. |
231 | (@max $format:ident, $result:ident, $counter:ident, $value:ident, $max:ident) => { |
232 | add_temporary!(@mul $result, $max, $value); |
233 | $counter = 0; |
234 | $value = 0; |
235 | }; |
236 | } |
237 | |
238 | /// Round-up a truncated value. |
239 | macro_rules! round_up_truncated { |
240 | ($format:ident, $result:ident, $count:ident) => {{ |
241 | // Need to round-up. |
242 | // Can't just add 1, since this can accidentally round-up |
243 | // values to a halfway point, which can cause invalid results. |
244 | add_temporary!(@mul $result, 10, 1); |
245 | $count += 1; |
246 | }}; |
247 | } |
248 | |
249 | /// Check and round-up the fraction if any non-zero digits exist. |
250 | macro_rules! round_up_nonzero { |
251 | ($format:ident, $iter:expr, $result:ident, $count:ident) => {{ |
252 | for &digit in $iter { |
253 | if digit != b'0' { |
254 | round_up_truncated!($format, $result, $count); |
255 | return ($result, $count); |
256 | } |
257 | } |
258 | }}; |
259 | } |
260 | |
261 | /// Parse the full mantissa into a big integer. |
262 | /// |
263 | /// Returns the parsed mantissa and the number of digits in the mantissa. |
264 | /// The max digits is the maximum number of digits plus one. |
265 | pub fn parse_mantissa<'a, Iter1, Iter2>( |
266 | mut integer: Iter1, |
267 | mut fraction: Iter2, |
268 | max_digits: usize, |
269 | ) -> (Bigint, usize) |
270 | where |
271 | Iter1: Iterator<Item = &'a u8> + Clone, |
272 | Iter2: Iterator<Item = &'a u8> + Clone, |
273 | { |
274 | // Iteratively process all the data in the mantissa. |
275 | // We do this via small, intermediate values which once we reach |
276 | // the maximum number of digits we can process without overflow, |
277 | // we add the temporary to the big integer. |
278 | let mut counter: usize = 0; |
279 | let mut count: usize = 0; |
280 | let mut value: Limb = 0; |
281 | let mut result = Bigint::new(); |
282 | |
283 | // Now use our pre-computed small powers iteratively. |
284 | // This is calculated as `⌊log(2^BITS - 1, 10)⌋`. |
285 | let step: usize = if LIMB_BITS == 32 { |
286 | 9 |
287 | } else { |
288 | 19 |
289 | }; |
290 | let max_native = (10 as Limb).pow(step as u32); |
291 | |
292 | // Process the integer digits. |
293 | 'integer: loop { |
294 | // Parse a digit at a time, until we reach step. |
295 | while counter < step && count < max_digits { |
296 | if let Some(&c) = integer.next() { |
297 | add_digit!(c, value, counter, count); |
298 | } else { |
299 | break 'integer; |
300 | } |
301 | } |
302 | |
303 | // Check if we've exhausted our max digits. |
304 | if count == max_digits { |
305 | // Need to check if we're truncated, and round-up accordingly. |
306 | // SAFETY: safe since `counter <= step`. |
307 | add_temporary!(@end format, result, counter, value); |
308 | round_up_nonzero!(format, integer, result, count); |
309 | round_up_nonzero!(format, fraction, result, count); |
310 | return (result, count); |
311 | } else { |
312 | // Add our temporary from the loop. |
313 | // SAFETY: safe since `counter <= step`. |
314 | add_temporary!(@max format, result, counter, value, max_native); |
315 | } |
316 | } |
317 | |
318 | // Skip leading fraction zeros. |
319 | // Required to get an accurate count. |
320 | if count == 0 { |
321 | for &c in &mut fraction { |
322 | if c != b'0' { |
323 | add_digit!(c, value, counter, count); |
324 | break; |
325 | } |
326 | } |
327 | } |
328 | |
329 | // Process the fraction digits. |
330 | 'fraction: loop { |
331 | // Parse a digit at a time, until we reach step. |
332 | while counter < step && count < max_digits { |
333 | if let Some(&c) = fraction.next() { |
334 | add_digit!(c, value, counter, count); |
335 | } else { |
336 | break 'fraction; |
337 | } |
338 | } |
339 | |
340 | // Check if we've exhausted our max digits. |
341 | if count == max_digits { |
342 | // SAFETY: safe since `counter <= step`. |
343 | add_temporary!(@end format, result, counter, value); |
344 | round_up_nonzero!(format, fraction, result, count); |
345 | return (result, count); |
346 | } else { |
347 | // Add our temporary from the loop. |
348 | // SAFETY: safe since `counter <= step`. |
349 | add_temporary!(@max format, result, counter, value, max_native); |
350 | } |
351 | } |
352 | |
353 | // We will always have a remainder, as long as we entered the loop |
354 | // once, or counter % step is 0. |
355 | // SAFETY: safe since `counter <= step`. |
356 | add_temporary!(@end format, result, counter, value); |
357 | |
358 | (result, count) |
359 | } |
360 | |
361 | // SCALING |
362 | // ------- |
363 | |
364 | /// Calculate the scientific exponent from a `Number` value. |
365 | /// Any other attempts would require slowdowns for faster algorithms. |
366 | #[inline ] |
367 | pub fn scientific_exponent(num: &Number) -> i32 { |
368 | // Use power reduction to make this faster. |
369 | let mut mantissa: u64 = num.mantissa; |
370 | let mut exponent: i32 = num.exponent; |
371 | while mantissa >= 10000 { |
372 | mantissa /= 10000; |
373 | exponent += 4; |
374 | } |
375 | while mantissa >= 100 { |
376 | mantissa /= 100; |
377 | exponent += 2; |
378 | } |
379 | while mantissa >= 10 { |
380 | mantissa /= 10; |
381 | exponent += 1; |
382 | } |
383 | exponent as i32 |
384 | } |
385 | |
386 | /// Calculate `b` from a a representation of `b` as a float. |
387 | #[inline ] |
388 | pub fn b<F: Float>(float: F) -> ExtendedFloat { |
389 | ExtendedFloat { |
390 | mant: float.mantissa(), |
391 | exp: float.exponent(), |
392 | } |
393 | } |
394 | |
395 | /// Calculate `b+h` from a a representation of `b` as a float. |
396 | #[inline ] |
397 | pub fn bh<F: Float>(float: F) -> ExtendedFloat { |
398 | let fp: ExtendedFloat = b(float); |
399 | ExtendedFloat { |
400 | mant: (fp.mant << 1) + 1, |
401 | exp: fp.exp - 1, |
402 | } |
403 | } |
404 | |