| 1 | //! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly | 
| 2 | //! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's | 
|---|
| 3 | //! very quick for most inputs. | 
|---|
| 4 | //! | 
|---|
| 5 | //! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and | 
|---|
| 6 | //!   accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243. | 
|---|
| 7 |  | 
|---|
| 8 | use crate::mem::MaybeUninit; | 
|---|
| 9 | use crate::num::diy_float::Fp; | 
|---|
| 10 | use crate::num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up}; | 
|---|
| 11 |  | 
|---|
| 12 | // see the comments in `format_shortest_opt` for the rationale. | 
|---|
| 13 | #[ doc(hidden)] | 
|---|
| 14 | pub const ALPHA: i16 = -60; | 
|---|
| 15 | #[ doc(hidden)] | 
|---|
| 16 | pub const GAMMA: i16 = -32; | 
|---|
| 17 |  | 
|---|
| 18 | /* | 
|---|
| 19 | # the following Python code generates this table: | 
|---|
| 20 | for i in xrange(-308, 333, 8): | 
|---|
| 21 | if i >= 0: f = 10**i; e = 0 | 
|---|
| 22 | else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80 | 
|---|
| 23 | l = f.bit_length() | 
|---|
| 24 | f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64 | 
|---|
| 25 | print '    (%#018x, %5d, %4d),' % (f, e, i) | 
|---|
| 26 | */ | 
|---|
| 27 |  | 
|---|
| 28 | #[ doc(hidden)] | 
|---|
| 29 | pub static CACHED_POW10: [(u64, i16, i16); 81] = [ | 
|---|
| 30 | // (f, e, k) | 
|---|
| 31 | (0xe61acf033d1a45df, -1087, -308), | 
|---|
| 32 | (0xab70fe17c79ac6ca, -1060, -300), | 
|---|
| 33 | (0xff77b1fcbebcdc4f, -1034, -292), | 
|---|
| 34 | (0xbe5691ef416bd60c, -1007, -284), | 
|---|
| 35 | (0x8dd01fad907ffc3c, -980, -276), | 
|---|
| 36 | (0xd3515c2831559a83, -954, -268), | 
|---|
| 37 | (0x9d71ac8fada6c9b5, -927, -260), | 
|---|
| 38 | (0xea9c227723ee8bcb, -901, -252), | 
|---|
| 39 | (0xaecc49914078536d, -874, -244), | 
|---|
| 40 | (0x823c12795db6ce57, -847, -236), | 
|---|
| 41 | (0xc21094364dfb5637, -821, -228), | 
|---|
| 42 | (0x9096ea6f3848984f, -794, -220), | 
|---|
| 43 | (0xd77485cb25823ac7, -768, -212), | 
|---|
| 44 | (0xa086cfcd97bf97f4, -741, -204), | 
|---|
| 45 | (0xef340a98172aace5, -715, -196), | 
|---|
| 46 | (0xb23867fb2a35b28e, -688, -188), | 
|---|
| 47 | (0x84c8d4dfd2c63f3b, -661, -180), | 
|---|
| 48 | (0xc5dd44271ad3cdba, -635, -172), | 
|---|
| 49 | (0x936b9fcebb25c996, -608, -164), | 
|---|
| 50 | (0xdbac6c247d62a584, -582, -156), | 
|---|
| 51 | (0xa3ab66580d5fdaf6, -555, -148), | 
|---|
| 52 | (0xf3e2f893dec3f126, -529, -140), | 
|---|
| 53 | (0xb5b5ada8aaff80b8, -502, -132), | 
|---|
| 54 | (0x87625f056c7c4a8b, -475, -124), | 
|---|
| 55 | (0xc9bcff6034c13053, -449, -116), | 
|---|
| 56 | (0x964e858c91ba2655, -422, -108), | 
|---|
| 57 | (0xdff9772470297ebd, -396, -100), | 
|---|
| 58 | (0xa6dfbd9fb8e5b88f, -369, -92), | 
|---|
| 59 | (0xf8a95fcf88747d94, -343, -84), | 
|---|
| 60 | (0xb94470938fa89bcf, -316, -76), | 
|---|
| 61 | (0x8a08f0f8bf0f156b, -289, -68), | 
|---|
| 62 | (0xcdb02555653131b6, -263, -60), | 
|---|
| 63 | (0x993fe2c6d07b7fac, -236, -52), | 
|---|
| 64 | (0xe45c10c42a2b3b06, -210, -44), | 
|---|
| 65 | (0xaa242499697392d3, -183, -36), | 
|---|
| 66 | (0xfd87b5f28300ca0e, -157, -28), | 
|---|
| 67 | (0xbce5086492111aeb, -130, -20), | 
|---|
| 68 | (0x8cbccc096f5088cc, -103, -12), | 
|---|
| 69 | (0xd1b71758e219652c, -77, -4), | 
|---|
| 70 | (0x9c40000000000000, -50, 4), | 
|---|
| 71 | (0xe8d4a51000000000, -24, 12), | 
|---|
| 72 | (0xad78ebc5ac620000, 3, 20), | 
|---|
| 73 | (0x813f3978f8940984, 30, 28), | 
|---|
| 74 | (0xc097ce7bc90715b3, 56, 36), | 
|---|
| 75 | (0x8f7e32ce7bea5c70, 83, 44), | 
|---|
| 76 | (0xd5d238a4abe98068, 109, 52), | 
|---|
| 77 | (0x9f4f2726179a2245, 136, 60), | 
|---|
| 78 | (0xed63a231d4c4fb27, 162, 68), | 
|---|
| 79 | (0xb0de65388cc8ada8, 189, 76), | 
|---|
| 80 | (0x83c7088e1aab65db, 216, 84), | 
|---|
| 81 | (0xc45d1df942711d9a, 242, 92), | 
|---|
| 82 | (0x924d692ca61be758, 269, 100), | 
|---|
| 83 | (0xda01ee641a708dea, 295, 108), | 
|---|
| 84 | (0xa26da3999aef774a, 322, 116), | 
|---|
| 85 | (0xf209787bb47d6b85, 348, 124), | 
|---|
| 86 | (0xb454e4a179dd1877, 375, 132), | 
|---|
| 87 | (0x865b86925b9bc5c2, 402, 140), | 
|---|
| 88 | (0xc83553c5c8965d3d, 428, 148), | 
|---|
| 89 | (0x952ab45cfa97a0b3, 455, 156), | 
|---|
| 90 | (0xde469fbd99a05fe3, 481, 164), | 
|---|
| 91 | (0xa59bc234db398c25, 508, 172), | 
|---|
| 92 | (0xf6c69a72a3989f5c, 534, 180), | 
|---|
| 93 | (0xb7dcbf5354e9bece, 561, 188), | 
|---|
| 94 | (0x88fcf317f22241e2, 588, 196), | 
|---|
| 95 | (0xcc20ce9bd35c78a5, 614, 204), | 
|---|
| 96 | (0x98165af37b2153df, 641, 212), | 
|---|
| 97 | (0xe2a0b5dc971f303a, 667, 220), | 
|---|
| 98 | (0xa8d9d1535ce3b396, 694, 228), | 
|---|
| 99 | (0xfb9b7cd9a4a7443c, 720, 236), | 
|---|
| 100 | (0xbb764c4ca7a44410, 747, 244), | 
|---|
| 101 | (0x8bab8eefb6409c1a, 774, 252), | 
|---|
| 102 | (0xd01fef10a657842c, 800, 260), | 
|---|
| 103 | (0x9b10a4e5e9913129, 827, 268), | 
|---|
| 104 | (0xe7109bfba19c0c9d, 853, 276), | 
|---|
| 105 | (0xac2820d9623bf429, 880, 284), | 
|---|
| 106 | (0x80444b5e7aa7cf85, 907, 292), | 
|---|
| 107 | (0xbf21e44003acdd2d, 933, 300), | 
|---|
| 108 | (0x8e679c2f5e44ff8f, 960, 308), | 
|---|
| 109 | (0xd433179d9c8cb841, 986, 316), | 
|---|
| 110 | (0x9e19db92b4e31ba9, 1013, 324), | 
|---|
| 111 | (0xeb96bf6ebadf77d9, 1039, 332), | 
|---|
| 112 | ]; | 
|---|
| 113 |  | 
|---|
| 114 | #[ doc(hidden)] | 
|---|
| 115 | pub const CACHED_POW10_FIRST_E: i16 = -1087; | 
|---|
| 116 | #[ doc(hidden)] | 
|---|
| 117 | pub const CACHED_POW10_LAST_E: i16 = 1039; | 
|---|
| 118 |  | 
|---|
| 119 | #[ doc(hidden)] | 
|---|
| 120 | pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) { | 
|---|
| 121 | let offset: i32 = CACHED_POW10_FIRST_E as i32; | 
|---|
| 122 | let range: i32 = (CACHED_POW10.len() as i32) - 1; | 
|---|
| 123 | let domain: i32 = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32; | 
|---|
| 124 | let idx: i32 = ((gamma as i32) - offset) * range / domain; | 
|---|
| 125 | let (f: u64, e: i16, k: i16) = CACHED_POW10[idx as usize]; | 
|---|
| 126 | debug_assert!(alpha <= e && e <= gamma); | 
|---|
| 127 | (k, Fp { f, e }) | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | /// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`. | 
|---|
| 131 | #[ doc(hidden)] | 
|---|
| 132 | pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) { | 
|---|
| 133 | debug_assert!(x > 0); | 
|---|
| 134 |  | 
|---|
| 135 | const X9: u32 = 10_0000_0000; | 
|---|
| 136 | const X8: u32 = 1_0000_0000; | 
|---|
| 137 | const X7: u32 = 1000_0000; | 
|---|
| 138 | const X6: u32 = 100_0000; | 
|---|
| 139 | const X5: u32 = 10_0000; | 
|---|
| 140 | const X4: u32 = 1_0000; | 
|---|
| 141 | const X3: u32 = 1000; | 
|---|
| 142 | const X2: u32 = 100; | 
|---|
| 143 | const X1: u32 = 10; | 
|---|
| 144 |  | 
|---|
| 145 | if x < X4 { | 
|---|
| 146 | if x < X2 { | 
|---|
| 147 | if x < X1 { (0, 1) } else { (1, X1) } | 
|---|
| 148 | } else { | 
|---|
| 149 | if x < X3 { (2, X2) } else { (3, X3) } | 
|---|
| 150 | } | 
|---|
| 151 | } else { | 
|---|
| 152 | if x < X6 { | 
|---|
| 153 | if x < X5 { (4, X4) } else { (5, X5) } | 
|---|
| 154 | } else if x < X8 { | 
|---|
| 155 | if x < X7 { (6, X6) } else { (7, X7) } | 
|---|
| 156 | } else { | 
|---|
| 157 | if x < X9 { (8, X8) } else { (9, X9) } | 
|---|
| 158 | } | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /// The shortest mode implementation for Grisu. | 
|---|
| 163 | /// | 
|---|
| 164 | /// It returns `None` when it would return an inexact representation otherwise. | 
|---|
| 165 | pub fn format_shortest_opt<'a>( | 
|---|
| 166 | d: &Decoded, | 
|---|
| 167 | buf: &'a mut [MaybeUninit<u8>], | 
|---|
| 168 | ) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> { | 
|---|
| 169 | assert!(d.mant > 0); | 
|---|
| 170 | assert!(d.minus > 0); | 
|---|
| 171 | assert!(d.plus > 0); | 
|---|
| 172 | assert!(d.mant.checked_add(d.plus).is_some()); | 
|---|
| 173 | assert!(d.mant.checked_sub(d.minus).is_some()); | 
|---|
| 174 | assert!(buf.len() >= MAX_SIG_DIGITS); | 
|---|
| 175 | assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision | 
|---|
| 176 |  | 
|---|
| 177 | // start with the normalized values with the shared exponent | 
|---|
| 178 | let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize(); | 
|---|
| 179 | let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e); | 
|---|
| 180 | let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e); | 
|---|
| 181 |  | 
|---|
| 182 | // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`. | 
|---|
| 183 | // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`; | 
|---|
| 184 | // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`. | 
|---|
| 185 | // | 
|---|
| 186 | // it is obviously desirable to maximize `GAMMA - ALPHA`, | 
|---|
| 187 | // so that we don't need many cached powers of 10, but there are some considerations: | 
|---|
| 188 | // | 
|---|
| 189 | // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division. | 
|---|
| 190 | //    (this is not really avoidable, remainder is required for accuracy estimation.) | 
|---|
| 191 | // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10, | 
|---|
| 192 | //    and it should not overflow. | 
|---|
| 193 | // | 
|---|
| 194 | // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`; | 
|---|
| 195 | // -60 and -32 is the maximal range with this constraint, and V8 also uses them. | 
|---|
| 196 | let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64); | 
|---|
| 197 |  | 
|---|
| 198 | // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1). | 
|---|
| 199 | let plus = plus.mul(cached); | 
|---|
| 200 | let minus = minus.mul(cached); | 
|---|
| 201 | let v = v.mul(cached); | 
|---|
| 202 | debug_assert_eq!(plus.e, minus.e); | 
|---|
| 203 | debug_assert_eq!(plus.e, v.e); | 
|---|
| 204 |  | 
|---|
| 205 | //         +- actual range of minus | 
|---|
| 206 | //   | <---|---------------------- unsafe region --------------------------> | | 
|---|
| 207 | //   |     |                                                                 | | 
|---|
| 208 | //   |  |<--->|  | <--------------- safe region ---------------> |           | | 
|---|
| 209 | //   |  |     |  |                                               |           | | 
|---|
| 210 | //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp| | 
|---|
| 211 | //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->| | 
|---|
| 212 | //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----| | 
|---|
| 213 | //   |   minus   |                 |     v     |                 |   plus    | | 
|---|
| 214 | // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1 | 
|---|
| 215 | // | 
|---|
| 216 | // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp). | 
|---|
| 217 | // as we don't know the error is positive or negative, we use two approximations spaced equally | 
|---|
| 218 | // and have the maximal error of 2 ulps. | 
|---|
| 219 | // | 
|---|
| 220 | // the "unsafe region" is a liberal interval which we initially generate. | 
|---|
| 221 | // the "safe region" is a conservative interval which we only accept. | 
|---|
| 222 | // we start with the correct repr within the unsafe region, and try to find the closest repr | 
|---|
| 223 | // to `v` which is also within the safe region. if we can't, we give up. | 
|---|
| 224 | let plus1 = plus.f + 1; | 
|---|
| 225 | //  let plus0 = plus.f - 1; // only for explanation | 
|---|
| 226 | //  let minus0 = minus.f + 1; // only for explanation | 
|---|
| 227 | let minus1 = minus.f - 1; | 
|---|
| 228 | let e = -plus.e as usize; // shared exponent | 
|---|
| 229 |  | 
|---|
| 230 | // divide `plus1` into integral and fractional parts. | 
|---|
| 231 | // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32` | 
|---|
| 232 | // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement. | 
|---|
| 233 | let plus1int = (plus1 >> e) as u32; | 
|---|
| 234 | let plus1frac = plus1 & ((1 << e) - 1); | 
|---|
| 235 |  | 
|---|
| 236 | // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`). | 
|---|
| 237 | // this is an upper bound of `kappa` below. | 
|---|
| 238 | let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int); | 
|---|
| 239 |  | 
|---|
| 240 | let mut i = 0; | 
|---|
| 241 | let exp = max_kappa as i16 - minusk + 1; | 
|---|
| 242 |  | 
|---|
| 243 | // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`, | 
|---|
| 244 | //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest | 
|---|
| 245 | //              representations (with the minimal number of significant digits) in that range. | 
|---|
| 246 | // | 
|---|
| 247 | // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2. | 
|---|
| 248 | // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead. | 
|---|
| 249 | // (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.) | 
|---|
| 250 | // the algorithm relies on the later verification phase to exclude `y`. | 
|---|
| 251 | let delta1 = plus1 - minus1; | 
|---|
| 252 | //  let delta1int = (delta1 >> e) as usize; // only for explanation | 
|---|
| 253 | let delta1frac = delta1 & ((1 << e) - 1); | 
|---|
| 254 |  | 
|---|
| 255 | // render integral parts, while checking for the accuracy at each step. | 
|---|
| 256 | let mut ten_kappa = max_ten_kappa; // 10^kappa | 
|---|
| 257 | let mut remainder = plus1int; // digits yet to be rendered | 
|---|
| 258 | loop { | 
|---|
| 259 | // we always have at least one digit to render, as `plus1 >= 10^kappa` | 
|---|
| 260 | // invariants: | 
|---|
| 261 | // - `delta1int <= remainder < 10^(kappa+1)` | 
|---|
| 262 | // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder` | 
|---|
| 263 | //   (it follows that `remainder = plus1int % 10^(kappa+1)`) | 
|---|
| 264 |  | 
|---|
| 265 | // divide `remainder` by `10^kappa`. both are scaled by `2^-e`. | 
|---|
| 266 | let q = remainder / ten_kappa; | 
|---|
| 267 | let r = remainder % ten_kappa; | 
|---|
| 268 | debug_assert!(q < 10); | 
|---|
| 269 | buf[i] = MaybeUninit::new( b'0'+ q as u8); | 
|---|
| 270 | i += 1; | 
|---|
| 271 |  | 
|---|
| 272 | let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e | 
|---|
| 273 | if plus1rem < delta1 { | 
|---|
| 274 | // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`. | 
|---|
| 275 | let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent | 
|---|
| 276 | return round_and_weed( | 
|---|
| 277 | // SAFETY: we initialized that memory above. | 
|---|
| 278 | unsafe { buf[..i].assume_init_mut() }, | 
|---|
| 279 | exp, | 
|---|
| 280 | plus1rem, | 
|---|
| 281 | delta1, | 
|---|
| 282 | plus1 - v.f, | 
|---|
| 283 | ten_kappa, | 
|---|
| 284 | 1, | 
|---|
| 285 | ); | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | // break the loop when we have rendered all integral digits. | 
|---|
| 289 | // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`. | 
|---|
| 290 | if i > max_kappa as usize { | 
|---|
| 291 | debug_assert_eq!(ten_kappa, 1); | 
|---|
| 292 | break; | 
|---|
| 293 | } | 
|---|
| 294 |  | 
|---|
| 295 | // restore invariants | 
|---|
| 296 | ten_kappa /= 10; | 
|---|
| 297 | remainder = r; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | // render fractional parts, while checking for the accuracy at each step. | 
|---|
| 301 | // this time we rely on repeated multiplications, as division will lose the precision. | 
|---|
| 302 | let mut remainder = plus1frac; | 
|---|
| 303 | let mut threshold = delta1frac; | 
|---|
| 304 | let mut ulp = 1; | 
|---|
| 305 | loop { | 
|---|
| 306 | // the next digit should be significant as we've tested that before breaking out | 
|---|
| 307 | // invariants, where `m = max_kappa + 1` (# of digits in the integral part): | 
|---|
| 308 | // - `remainder < 2^e` | 
|---|
| 309 | // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder` | 
|---|
| 310 |  | 
|---|
| 311 | remainder *= 10; // won't overflow, `2^e * 10 < 2^64` | 
|---|
| 312 | threshold *= 10; | 
|---|
| 313 | ulp *= 10; | 
|---|
| 314 |  | 
|---|
| 315 | // divide `remainder` by `10^kappa`. | 
|---|
| 316 | // both are scaled by `2^e / 10^kappa`, so the latter is implicit here. | 
|---|
| 317 | let q = remainder >> e; | 
|---|
| 318 | let r = remainder & ((1 << e) - 1); | 
|---|
| 319 | debug_assert!(q < 10); | 
|---|
| 320 | buf[i] = MaybeUninit::new( b'0'+ q as u8); | 
|---|
| 321 | i += 1; | 
|---|
| 322 |  | 
|---|
| 323 | if r < threshold { | 
|---|
| 324 | let ten_kappa = 1 << e; // implicit divisor | 
|---|
| 325 | return round_and_weed( | 
|---|
| 326 | // SAFETY: we initialized that memory above. | 
|---|
| 327 | unsafe { buf[..i].assume_init_mut() }, | 
|---|
| 328 | exp, | 
|---|
| 329 | r, | 
|---|
| 330 | threshold, | 
|---|
| 331 | (plus1 - v.f) * ulp, | 
|---|
| 332 | ten_kappa, | 
|---|
| 333 | ulp, | 
|---|
| 334 | ); | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | // restore invariants | 
|---|
| 338 | remainder = r; | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | // we've generated all significant digits of `plus1`, but not sure if it's the optimal one. | 
|---|
| 342 | // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different | 
|---|
| 343 | // shortest representation from 3.14154 to 3.14158 but we only have the greatest one. | 
|---|
| 344 | // we have to successively decrease the last digit and check if this is the optimal repr. | 
|---|
| 345 | // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase) | 
|---|
| 346 | // | 
|---|
| 347 | // the function checks if this "optimal" repr is actually within the ulp ranges, | 
|---|
| 348 | // and also, it is possible that the "second-to-optimal" repr can actually be optimal | 
|---|
| 349 | // due to the rounding error. in either cases this returns `None`. ("weeding" phase) | 
|---|
| 350 | // | 
|---|
| 351 | // all arguments here are scaled by the common (but implicit) value `k`, so that: | 
|---|
| 352 | // - `remainder = (plus1 % 10^kappa) * k` | 
|---|
| 353 | // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`) | 
|---|
| 354 | // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants) | 
|---|
| 355 | // - `ten_kappa = 10^kappa * k` | 
|---|
| 356 | // - `ulp = 2^-e * k` | 
|---|
| 357 | fn round_and_weed( | 
|---|
| 358 | buf: &mut [u8], | 
|---|
| 359 | exp: i16, | 
|---|
| 360 | remainder: u64, | 
|---|
| 361 | threshold: u64, | 
|---|
| 362 | plus1v: u64, | 
|---|
| 363 | ten_kappa: u64, | 
|---|
| 364 | ulp: u64, | 
|---|
| 365 | ) -> Option<(&[u8], i16)> { | 
|---|
| 366 | assert!(!buf.is_empty()); | 
|---|
| 367 |  | 
|---|
| 368 | // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps. | 
|---|
| 369 | // the resulting representation should be the closest representation to both. | 
|---|
| 370 | // | 
|---|
| 371 | // here `plus1 - v` is used since calculations are done with respect to `plus1` | 
|---|
| 372 | // in order to avoid overflow/underflow (hence the seemingly swapped names). | 
|---|
| 373 | let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp) | 
|---|
| 374 | let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp) | 
|---|
| 375 |  | 
|---|
| 376 | // decrease the last digit and stop at the closest representation to `v + 1 ulp`. | 
|---|
| 377 | let mut plus1w = remainder; // plus1w(n) = plus1 - w(n) | 
|---|
| 378 | { | 
|---|
| 379 | let last = buf.last_mut().unwrap(); | 
|---|
| 380 |  | 
|---|
| 381 | // we work with the approximated digits `w(n)`, which is initially equal to `plus1 - | 
|---|
| 382 | // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 - | 
|---|
| 383 | // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) = | 
|---|
| 384 | // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks. | 
|---|
| 385 | // note that `plus1w(n)` is always increasing. | 
|---|
| 386 | // | 
|---|
| 387 | // we have three conditions to terminate. any of them will make the loop unable to | 
|---|
| 388 | // proceed, but we then have at least one valid representation known to be closest to | 
|---|
| 389 | // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity. | 
|---|
| 390 | // | 
|---|
| 391 | // TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one. | 
|---|
| 392 | // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`. | 
|---|
| 393 | // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible | 
|---|
| 394 | // overflow on the calculation of `plus1w(n)`. | 
|---|
| 395 | // | 
|---|
| 396 | // TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`. | 
|---|
| 397 | // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa > | 
|---|
| 398 | // plus1 - minus1 = threshold`. the left hand side can overflow, but we know | 
|---|
| 399 | // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) > | 
|---|
| 400 | // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if | 
|---|
| 401 | // `threshold - plus1w(n) < 10^kappa` instead. | 
|---|
| 402 | // | 
|---|
| 403 | // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is | 
|---|
| 404 | // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`, | 
|---|
| 405 | // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have | 
|---|
| 406 | // `z(n) > 0`. we have two cases to consider: | 
|---|
| 407 | // | 
|---|
| 408 | // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing, | 
|---|
| 409 | //   `z(n)` should be decreasing and this is clearly false. | 
|---|
| 410 | // - when `z(n+1) < 0`: | 
|---|
| 411 | //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is | 
|---|
| 412 | //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow. | 
|---|
| 413 | //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >= | 
|---|
| 414 | //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1 | 
|---|
| 415 | //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when | 
|---|
| 416 | //     combined with TC3a. | 
|---|
| 417 | // | 
|---|
| 418 | // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is | 
|---|
| 419 | // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`. | 
|---|
| 420 | while plus1w < plus1v_up | 
|---|
| 421 | && threshold - plus1w >= ten_kappa | 
|---|
| 422 | && (plus1w + ten_kappa < plus1v_up | 
|---|
| 423 | || plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up) | 
|---|
| 424 | { | 
|---|
| 425 | *last -= 1; | 
|---|
| 426 | debug_assert!(*last > b'0'); // the shortest repr cannot end with `0` | 
|---|
| 427 | plus1w += ten_kappa; | 
|---|
| 428 | } | 
|---|
| 429 | } | 
|---|
| 430 |  | 
|---|
| 431 | // check if this representation is also the closest representation to `v - 1 ulp`. | 
|---|
| 432 | // | 
|---|
| 433 | // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up` | 
|---|
| 434 | // replaced by `plus1v_down` instead. overflow analysis equally holds. | 
|---|
| 435 | if plus1w < plus1v_down | 
|---|
| 436 | && threshold - plus1w >= ten_kappa | 
|---|
| 437 | && (plus1w + ten_kappa < plus1v_down | 
|---|
| 438 | || plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down) | 
|---|
| 439 | { | 
|---|
| 440 | return None; | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | // now we have the closest representation to `v` between `plus1` and `minus1`. | 
|---|
| 444 | // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`, | 
|---|
| 445 | // i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts | 
|---|
| 446 | // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`. | 
|---|
| 447 | if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp { Some((buf, exp)) } else { None } | 
|---|
| 448 | } | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | /// The shortest mode implementation for Grisu with Dragon fallback. | 
|---|
| 452 | /// | 
|---|
| 453 | /// This should be used for most cases. | 
|---|
| 454 | pub fn format_shortest<'a>( | 
|---|
| 455 | d: &Decoded, | 
|---|
| 456 | buf: &'a mut [MaybeUninit<u8>], | 
|---|
| 457 | ) -> (/*digits*/ &'a [u8], /*exp*/ i16) { | 
|---|
| 458 | use crate::num::flt2dec::strategy::dragon::format_shortest as fallback; | 
|---|
| 459 | // SAFETY: The borrow checker is not smart enough to let us use `buf` | 
|---|
| 460 | // in the second branch, so we launder the lifetime here. But we only re-use | 
|---|
| 461 | // `buf` if `format_shortest_opt` returned `None` so this is okay. | 
|---|
| 462 | match format_shortest_opt(d, buf:unsafe { &mut *(buf as *mut _) }) { | 
|---|
| 463 | Some(ret: (&[u8], i16)) => ret, | 
|---|
| 464 | None => fallback(d, buf), | 
|---|
| 465 | } | 
|---|
| 466 | } | 
|---|
| 467 |  | 
|---|
| 468 | /// The exact and fixed mode implementation for Grisu. | 
|---|
| 469 | /// | 
|---|
| 470 | /// It returns `None` when it would return an inexact representation otherwise. | 
|---|
| 471 | pub fn format_exact_opt<'a>( | 
|---|
| 472 | d: &Decoded, | 
|---|
| 473 | buf: &'a mut [MaybeUninit<u8>], | 
|---|
| 474 | limit: i16, | 
|---|
| 475 | ) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> { | 
|---|
| 476 | assert!(d.mant > 0); | 
|---|
| 477 | assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision | 
|---|
| 478 | assert!(!buf.is_empty()); | 
|---|
| 479 |  | 
|---|
| 480 | // normalize and scale `v`. | 
|---|
| 481 | let v = Fp { f: d.mant, e: d.exp }.normalize(); | 
|---|
| 482 | let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64); | 
|---|
| 483 | let v = v.mul(cached); | 
|---|
| 484 |  | 
|---|
| 485 | // divide `v` into integral and fractional parts. | 
|---|
| 486 | let e = -v.e as usize; | 
|---|
| 487 | let vint = (v.f >> e) as u32; | 
|---|
| 488 | let vfrac = v.f & ((1 << e) - 1); | 
|---|
| 489 |  | 
|---|
| 490 | let requested_digits = buf.len(); | 
|---|
| 491 |  | 
|---|
| 492 | const POW10_UP_TO_9: [u32; 10] = | 
|---|
| 493 | [1, 10, 100, 1000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 1_000_000_000]; | 
|---|
| 494 |  | 
|---|
| 495 | // We deviate from the original algorithm here and do some early checks to determine if we can satisfy requested_digits. | 
|---|
| 496 | // If we determine that we can't, we exit early and avoid most of the heavy lifting that the algorithm otherwise does. | 
|---|
| 497 | // | 
|---|
| 498 | // When vfrac is zero, we can easily determine if vint can satisfy requested digits: | 
|---|
| 499 | //      If requested_digits >= 11, vint is not able to exhaust the count by itself since 10^(11 -1) > u32 max value >= vint. | 
|---|
| 500 | //      If vint < 10^(requested_digits - 1), vint cannot exhaust the count. | 
|---|
| 501 | //      Otherwise, vint might be able to exhaust the count and we need to execute the rest of the code. | 
|---|
| 502 | if (vfrac == 0) && ((requested_digits >= 11) || (vint < POW10_UP_TO_9[requested_digits - 1])) { | 
|---|
| 503 | return None; | 
|---|
| 504 | } | 
|---|
| 505 |  | 
|---|
| 506 | // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1). | 
|---|
| 507 | // as we don't know the error is positive or negative, we use two approximations | 
|---|
| 508 | // spaced equally and have the maximal error of 2 ulps (same to the shortest case). | 
|---|
| 509 | // | 
|---|
| 510 | // the goal is to find the exactly rounded series of digits that are common to | 
|---|
| 511 | // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident. | 
|---|
| 512 | // if this is not possible, we don't know which one is the correct output for `v`, | 
|---|
| 513 | // so we give up and fall back. | 
|---|
| 514 | // | 
|---|
| 515 | // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`), | 
|---|
| 516 | // and we will scale it whenever `v` gets scaled. | 
|---|
| 517 | let mut err = 1; | 
|---|
| 518 |  | 
|---|
| 519 | // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`). | 
|---|
| 520 | // this is an upper bound of `kappa` below. | 
|---|
| 521 | let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint); | 
|---|
| 522 |  | 
|---|
| 523 | let mut i = 0; | 
|---|
| 524 | let exp = max_kappa as i16 - minusk + 1; | 
|---|
| 525 |  | 
|---|
| 526 | // if we are working with the last-digit limitation, we need to shorten the buffer | 
|---|
| 527 | // before the actual rendering in order to avoid double rounding. | 
|---|
| 528 | // note that we have to enlarge the buffer again when rounding up happens! | 
|---|
| 529 | let len = if exp <= limit { | 
|---|
| 530 | // oops, we cannot even produce *one* digit. | 
|---|
| 531 | // this is possible when, say, we've got something like 9.5 and it's being rounded to 10. | 
|---|
| 532 | // | 
|---|
| 533 | // in principle we can immediately call `possibly_round` with an empty buffer, | 
|---|
| 534 | // but scaling `max_ten_kappa << e` by 10 can result in overflow. | 
|---|
| 535 | // thus we are being sloppy here and widen the error range by a factor of 10. | 
|---|
| 536 | // this will increase the false negative rate, but only very, *very* slightly; | 
|---|
| 537 | // it can only matter noticeably when the mantissa is bigger than 60 bits. | 
|---|
| 538 | // | 
|---|
| 539 | // SAFETY: `len=0`, so the obligation of having initialized this memory is trivial. | 
|---|
| 540 | return unsafe { | 
|---|
| 541 | possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e) | 
|---|
| 542 | }; | 
|---|
| 543 | } else if ((exp as i32 - limit as i32) as usize) < buf.len() { | 
|---|
| 544 | (exp - limit) as usize | 
|---|
| 545 | } else { | 
|---|
| 546 | buf.len() | 
|---|
| 547 | }; | 
|---|
| 548 | debug_assert!(len > 0); | 
|---|
| 549 |  | 
|---|
| 550 | // render integral parts. | 
|---|
| 551 | // the error is entirely fractional, so we don't need to check it in this part. | 
|---|
| 552 | let mut kappa = max_kappa as i16; | 
|---|
| 553 | let mut ten_kappa = max_ten_kappa; // 10^kappa | 
|---|
| 554 | let mut remainder = vint; // digits yet to be rendered | 
|---|
| 555 | loop { | 
|---|
| 556 | // we always have at least one digit to render | 
|---|
| 557 | // invariants: | 
|---|
| 558 | // - `remainder < 10^(kappa+1)` | 
|---|
| 559 | // - `vint = d[0..n-1] * 10^(kappa+1) + remainder` | 
|---|
| 560 | //   (it follows that `remainder = vint % 10^(kappa+1)`) | 
|---|
| 561 |  | 
|---|
| 562 | // divide `remainder` by `10^kappa`. both are scaled by `2^-e`. | 
|---|
| 563 | let q = remainder / ten_kappa; | 
|---|
| 564 | let r = remainder % ten_kappa; | 
|---|
| 565 | debug_assert!(q < 10); | 
|---|
| 566 | buf[i] = MaybeUninit::new( b'0'+ q as u8); | 
|---|
| 567 | i += 1; | 
|---|
| 568 |  | 
|---|
| 569 | // is the buffer full? run the rounding pass with the remainder. | 
|---|
| 570 | if i == len { | 
|---|
| 571 | let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e | 
|---|
| 572 | // SAFETY: we have initialized `len` many bytes. | 
|---|
| 573 | return unsafe { | 
|---|
| 574 | possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e) | 
|---|
| 575 | }; | 
|---|
| 576 | } | 
|---|
| 577 |  | 
|---|
| 578 | // break the loop when we have rendered all integral digits. | 
|---|
| 579 | // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`. | 
|---|
| 580 | if i > max_kappa as usize { | 
|---|
| 581 | debug_assert_eq!(ten_kappa, 1); | 
|---|
| 582 | debug_assert_eq!(kappa, 0); | 
|---|
| 583 | break; | 
|---|
| 584 | } | 
|---|
| 585 |  | 
|---|
| 586 | // restore invariants | 
|---|
| 587 | kappa -= 1; | 
|---|
| 588 | ten_kappa /= 10; | 
|---|
| 589 | remainder = r; | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | // render fractional parts. | 
|---|
| 593 | // | 
|---|
| 594 | // in principle we can continue to the last available digit and check for the accuracy. | 
|---|
| 595 | // unfortunately we are working with the finite-sized integers, so we need some criterion | 
|---|
| 596 | // to detect the overflow. V8 uses `remainder > err`, which becomes false when | 
|---|
| 597 | // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects | 
|---|
| 598 | // too many otherwise valid input. | 
|---|
| 599 | // | 
|---|
| 600 | // since the later phase has a correct overflow detection, we instead use tighter criterion: | 
|---|
| 601 | // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and | 
|---|
| 602 | // `v + 1 ulp` definitely contains two or more rounded representations. this is same to | 
|---|
| 603 | // the first two comparisons from `possibly_round`, for the reference. | 
|---|
| 604 | let mut remainder = vfrac; | 
|---|
| 605 | let maxerr = 1 << (e - 1); | 
|---|
| 606 | while err < maxerr { | 
|---|
| 607 | // invariants, where `m = max_kappa + 1` (# of digits in the integral part): | 
|---|
| 608 | // - `remainder < 2^e` | 
|---|
| 609 | // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder` | 
|---|
| 610 | // - `err = 10^(n-m)` | 
|---|
| 611 |  | 
|---|
| 612 | remainder *= 10; // won't overflow, `2^e * 10 < 2^64` | 
|---|
| 613 | err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64` | 
|---|
| 614 |  | 
|---|
| 615 | // divide `remainder` by `10^kappa`. | 
|---|
| 616 | // both are scaled by `2^e / 10^kappa`, so the latter is implicit here. | 
|---|
| 617 | let q = remainder >> e; | 
|---|
| 618 | let r = remainder & ((1 << e) - 1); | 
|---|
| 619 | debug_assert!(q < 10); | 
|---|
| 620 | buf[i] = MaybeUninit::new( b'0'+ q as u8); | 
|---|
| 621 | i += 1; | 
|---|
| 622 |  | 
|---|
| 623 | // is the buffer full? run the rounding pass with the remainder. | 
|---|
| 624 | if i == len { | 
|---|
| 625 | // SAFETY: we have initialized `len` many bytes. | 
|---|
| 626 | return unsafe { possibly_round(buf, len, exp, limit, r, 1 << e, err) }; | 
|---|
| 627 | } | 
|---|
| 628 |  | 
|---|
| 629 | // restore invariants | 
|---|
| 630 | remainder = r; | 
|---|
| 631 | } | 
|---|
| 632 |  | 
|---|
| 633 | // further calculation is useless (`possibly_round` definitely fails), so we give up. | 
|---|
| 634 | return None; | 
|---|
| 635 |  | 
|---|
| 636 | // we've generated all requested digits of `v`, which should be also same to corresponding | 
|---|
| 637 | // digits of `v - 1 ulp`. now we check if there is a unique representation shared by | 
|---|
| 638 | // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or | 
|---|
| 639 | // to the rounded-up version of those digits. if the range contains multiple representations | 
|---|
| 640 | // of the same length, we cannot be sure and should return `None` instead. | 
|---|
| 641 | // | 
|---|
| 642 | // all arguments here are scaled by the common (but implicit) value `k`, so that: | 
|---|
| 643 | // - `remainder = (v % 10^kappa) * k` | 
|---|
| 644 | // - `ten_kappa = 10^kappa * k` | 
|---|
| 645 | // - `ulp = 2^-e * k` | 
|---|
| 646 | // | 
|---|
| 647 | // SAFETY: the first `len` bytes of `buf` must be initialized. | 
|---|
| 648 | unsafe fn possibly_round( | 
|---|
| 649 | buf: &mut [MaybeUninit<u8>], | 
|---|
| 650 | mut len: usize, | 
|---|
| 651 | mut exp: i16, | 
|---|
| 652 | limit: i16, | 
|---|
| 653 | remainder: u64, | 
|---|
| 654 | ten_kappa: u64, | 
|---|
| 655 | ulp: u64, | 
|---|
| 656 | ) -> Option<(&[u8], i16)> { | 
|---|
| 657 | debug_assert!(remainder < ten_kappa); | 
|---|
| 658 |  | 
|---|
| 659 | //           10^kappa | 
|---|
| 660 | //    :   :   :<->:   : | 
|---|
| 661 | //    :   :   :   :   : | 
|---|
| 662 | //    :|1 ulp|1 ulp|  : | 
|---|
| 663 | //    :|<--->|<--->|  : | 
|---|
| 664 | // ----|-----|-----|---- | 
|---|
| 665 | //     |     v     | | 
|---|
| 666 | // v - 1 ulp   v + 1 ulp | 
|---|
| 667 | // | 
|---|
| 668 | // (for the reference, the dotted line indicates the exact value for | 
|---|
| 669 | // possible representations in given number of digits.) | 
|---|
| 670 | // | 
|---|
| 671 | // error is too large that there are at least three possible representations | 
|---|
| 672 | // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct. | 
|---|
| 673 | if ulp >= ten_kappa { | 
|---|
| 674 | return None; | 
|---|
| 675 | } | 
|---|
| 676 |  | 
|---|
| 677 | //    10^kappa | 
|---|
| 678 | //   :<------->: | 
|---|
| 679 | //   :         : | 
|---|
| 680 | //   : |1 ulp|1 ulp| | 
|---|
| 681 | //   : |<--->|<--->| | 
|---|
| 682 | // ----|-----|-----|---- | 
|---|
| 683 | //     |     v     | | 
|---|
| 684 | // v - 1 ulp   v + 1 ulp | 
|---|
| 685 | // | 
|---|
| 686 | // in fact, 1/2 ulp is enough to introduce two possible representations. | 
|---|
| 687 | // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.) | 
|---|
| 688 | // this won't overflow, as `ulp < ten_kappa` from the first check. | 
|---|
| 689 | if ten_kappa - ulp <= ulp { | 
|---|
| 690 | return None; | 
|---|
| 691 | } | 
|---|
| 692 |  | 
|---|
| 693 | //     remainder | 
|---|
| 694 | //       :<->|                           : | 
|---|
| 695 | //       :   |                           : | 
|---|
| 696 | //       :<--------- 10^kappa ---------->: | 
|---|
| 697 | //     | :   |                           : | 
|---|
| 698 | //     |1 ulp|1 ulp|                     : | 
|---|
| 699 | //     |<--->|<--->|                     : | 
|---|
| 700 | // ----|-----|-----|------------------------ | 
|---|
| 701 | //     |     v     | | 
|---|
| 702 | // v - 1 ulp   v + 1 ulp | 
|---|
| 703 | // | 
|---|
| 704 | // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`), | 
|---|
| 705 | // then we can safely return. note that `v - 1 ulp` *can* be less than the current | 
|---|
| 706 | // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough: | 
|---|
| 707 | // the distance between `v - 1 ulp` and the current representation | 
|---|
| 708 | // cannot exceed `10^kappa / 2`. | 
|---|
| 709 | // | 
|---|
| 710 | // the condition equals to `remainder + ulp < 10^kappa / 2`. | 
|---|
| 711 | // since this can easily overflow, first check if `remainder < 10^kappa / 2`. | 
|---|
| 712 | // we've already verified that `ulp < 10^kappa / 2`, so as long as | 
|---|
| 713 | // `10^kappa` did not overflow after all, the second check is fine. | 
|---|
| 714 | if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp { | 
|---|
| 715 | // SAFETY: our caller initialized that memory. | 
|---|
| 716 | return Some((unsafe { buf[..len].assume_init_ref() }, exp)); | 
|---|
| 717 | } | 
|---|
| 718 |  | 
|---|
| 719 | //   :<------- remainder ------>|   : | 
|---|
| 720 | //   :                          |   : | 
|---|
| 721 | //   :<--------- 10^kappa --------->: | 
|---|
| 722 | //   :                    |     |   : | | 
|---|
| 723 | //   :                    |1 ulp|1 ulp| | 
|---|
| 724 | //   :                    |<--->|<--->| | 
|---|
| 725 | // -----------------------|-----|-----|----- | 
|---|
| 726 | //                        |     v     | | 
|---|
| 727 | //                    v - 1 ulp   v + 1 ulp | 
|---|
| 728 | // | 
|---|
| 729 | // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation, | 
|---|
| 730 | // we should round up and return. for the same reason we don't need to check `v + 1 ulp`. | 
|---|
| 731 | // | 
|---|
| 732 | // the condition equals to `remainder - ulp >= 10^kappa / 2`. | 
|---|
| 733 | // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`, | 
|---|
| 734 | // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`, | 
|---|
| 735 | // so the second check does not overflow. | 
|---|
| 736 | if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp { | 
|---|
| 737 | if let Some(c) = | 
|---|
| 738 | // SAFETY: our caller must have initialized that memory. | 
|---|
| 739 | round_up(unsafe { buf[..len].assume_init_mut() }) | 
|---|
| 740 | { | 
|---|
| 741 | // only add an additional digit when we've been requested the fixed precision. | 
|---|
| 742 | // we also need to check that, if the original buffer was empty, | 
|---|
| 743 | // the additional digit can only be added when `exp == limit` (edge case). | 
|---|
| 744 | exp += 1; | 
|---|
| 745 | if exp > limit && len < buf.len() { | 
|---|
| 746 | buf[len] = MaybeUninit::new(c); | 
|---|
| 747 | len += 1; | 
|---|
| 748 | } | 
|---|
| 749 | } | 
|---|
| 750 | // SAFETY: we and our caller initialized that memory. | 
|---|
| 751 | return Some((unsafe { buf[..len].assume_init_ref() }, exp)); | 
|---|
| 752 | } | 
|---|
| 753 |  | 
|---|
| 754 | // otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are | 
|---|
| 755 | // rounding down and others are rounding up) and give up. | 
|---|
| 756 | None | 
|---|
| 757 | } | 
|---|
| 758 | } | 
|---|
| 759 |  | 
|---|
| 760 | /// The exact and fixed mode implementation for Grisu with Dragon fallback. | 
|---|
| 761 | /// | 
|---|
| 762 | /// This should be used for most cases. | 
|---|
| 763 | pub fn format_exact<'a>( | 
|---|
| 764 | d: &Decoded, | 
|---|
| 765 | buf: &'a mut [MaybeUninit<u8>], | 
|---|
| 766 | limit: i16, | 
|---|
| 767 | ) -> (/*digits*/ &'a [u8], /*exp*/ i16) { | 
|---|
| 768 | use crate::num::flt2dec::strategy::dragon::format_exact as fallback; | 
|---|
| 769 | // SAFETY: The borrow checker is not smart enough to let us use `buf` | 
|---|
| 770 | // in the second branch, so we launder the lifetime here. But we only re-use | 
|---|
| 771 | // `buf` if `format_exact_opt` returned `None` so this is okay. | 
|---|
| 772 | match format_exact_opt(d, buf:unsafe { &mut *(buf as *mut _) }, limit) { | 
|---|
| 773 | Some(ret: (&[u8], i16)) => ret, | 
|---|
| 774 | None => fallback(d, buf, limit), | 
|---|
| 775 | } | 
|---|
| 776 | } | 
|---|
| 777 |  | 
|---|