grisu.rs source code [crates/core/src/num/flt2dec/strategy/grisu.rs]

1	//! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly
2	//! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's
3	//! very quick for most inputs.
4	//!
5	//! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and
6	//! accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
7
8	use crate::mem::MaybeUninit;
9	use crate::num::diy_float::Fp;
10	use crate::num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
11
12	// see the comments in `format_shortest_opt` for the rationale.
13	#[doc(hidden)]
14	pub const ALPHA: i16 = `-60`;
15	#[doc(hidden)]
16	pub const GAMMA: i16 = `-32`;
17
18	/*
19	# the following Python code generates this table:
20	for i in xrange(-308, 333, 8):
21	if i >= 0: f = 10i; e = 0
22	else: f = 2(80-4i) // 10*-i; e = 4 i - 80*
23	l = f.bit_length()
24	f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
25	print ' (%#018x, %5d, %4d),' % (f, e, i)
26	*/
27
28	#[doc(hidden)]
29	pub static CACHED_POW10: [(u64, i16, i16); `81`] = [
30	// (f, e, k)
31	(`0xe61acf033d1a45df`, `-1087`, `-308`),
32	(`0xab70fe17c79ac6ca`, `-1060`, `-300`),
33	(`0xff77b1fcbebcdc4f`, `-1034`, `-292`),
34	(`0xbe5691ef416bd60c`, `-1007`, `-284`),
35	(`0x8dd01fad907ffc3c`, `-980`, `-276`),
36	(`0xd3515c2831559a83`, `-954`, `-268`),
37	(`0x9d71ac8fada6c9b5`, `-927`, `-260`),
38	(`0xea9c227723ee8bcb`, `-901`, `-252`),
39	(`0xaecc49914078536d`, `-874`, `-244`),
40	(`0x823c12795db6ce57`, `-847`, `-236`),
41	(`0xc21094364dfb5637`, `-821`, `-228`),
42	(`0x9096ea6f3848984f`, `-794`, `-220`),
43	(`0xd77485cb25823ac7`, `-768`, `-212`),
44	(`0xa086cfcd97bf97f4`, `-741`, `-204`),
45	(`0xef340a98172aace5`, `-715`, `-196`),
46	(`0xb23867fb2a35b28e`, `-688`, `-188`),
47	(`0x84c8d4dfd2c63f3b`, `-661`, `-180`),
48	(`0xc5dd44271ad3cdba`, `-635`, `-172`),
49	(`0x936b9fcebb25c996`, `-608`, `-164`),
50	(`0xdbac6c247d62a584`, `-582`, `-156`),
51	(`0xa3ab66580d5fdaf6`, `-555`, `-148`),
52	(`0xf3e2f893dec3f126`, `-529`, `-140`),
53	(`0xb5b5ada8aaff80b8`, `-502`, `-132`),
54	(`0x87625f056c7c4a8b`, `-475`, `-124`),
55	(`0xc9bcff6034c13053`, `-449`, `-116`),
56	(`0x964e858c91ba2655`, `-422`, `-108`),
57	(`0xdff9772470297ebd`, `-396`, `-100`),
58	(`0xa6dfbd9fb8e5b88f`, `-369`, `-92`),
59	(`0xf8a95fcf88747d94`, `-343`, `-84`),
60	(`0xb94470938fa89bcf`, `-316`, `-76`),
61	(`0x8a08f0f8bf0f156b`, `-289`, `-68`),
62	(`0xcdb02555653131b6`, `-263`, `-60`),
63	(`0x993fe2c6d07b7fac`, `-236`, `-52`),
64	(`0xe45c10c42a2b3b06`, `-210`, `-44`),
65	(`0xaa242499697392d3`, `-183`, `-36`),
66	(`0xfd87b5f28300ca0e`, `-157`, `-28`),
67	(`0xbce5086492111aeb`, `-130`, `-20`),
68	(`0x8cbccc096f5088cc`, `-103`, `-12`),
69	(`0xd1b71758e219652c`, `-77`, `-4`),
70	(`0x9c40000000000000`, `-50`, `4`),
71	(`0xe8d4a51000000000`, `-24`, `12`),
72	(`0xad78ebc5ac620000`, `3`, `20`),
73	(`0x813f3978f8940984`, `30`, `28`),
74	(`0xc097ce7bc90715b3`, `56`, `36`),
75	(`0x8f7e32ce7bea5c70`, `83`, `44`),
76	(`0xd5d238a4abe98068`, `109`, `52`),
77	(`0x9f4f2726179a2245`, `136`, `60`),
78	(`0xed63a231d4c4fb27`, `162`, `68`),
79	(`0xb0de65388cc8ada8`, `189`, `76`),
80	(`0x83c7088e1aab65db`, `216`, `84`),
81	(`0xc45d1df942711d9a`, `242`, `92`),
82	(`0x924d692ca61be758`, `269`, `100`),
83	(`0xda01ee641a708dea`, `295`, `108`),
84	(`0xa26da3999aef774a`, `322`, `116`),
85	(`0xf209787bb47d6b85`, `348`, `124`),
86	(`0xb454e4a179dd1877`, `375`, `132`),
87	(`0x865b86925b9bc5c2`, `402`, `140`),
88	(`0xc83553c5c8965d3d`, `428`, `148`),
89	(`0x952ab45cfa97a0b3`, `455`, `156`),
90	(`0xde469fbd99a05fe3`, `481`, `164`),
91	(`0xa59bc234db398c25`, `508`, `172`),
92	(`0xf6c69a72a3989f5c`, `534`, `180`),
93	(`0xb7dcbf5354e9bece`, `561`, `188`),
94	(`0x88fcf317f22241e2`, `588`, `196`),
95	(`0xcc20ce9bd35c78a5`, `614`, `204`),
96	(`0x98165af37b2153df`, `641`, `212`),
97	(`0xe2a0b5dc971f303a`, `667`, `220`),
98	(`0xa8d9d1535ce3b396`, `694`, `228`),
99	(`0xfb9b7cd9a4a7443c`, `720`, `236`),
100	(`0xbb764c4ca7a44410`, `747`, `244`),
101	(`0x8bab8eefb6409c1a`, `774`, `252`),
102	(`0xd01fef10a657842c`, `800`, `260`),
103	(`0x9b10a4e5e9913129`, `827`, `268`),
104	(`0xe7109bfba19c0c9d`, `853`, `276`),
105	(`0xac2820d9623bf429`, `880`, `284`),
106	(`0x80444b5e7aa7cf85`, `907`, `292`),
107	(`0xbf21e44003acdd2d`, `933`, `300`),
108	(`0x8e679c2f5e44ff8f`, `960`, `308`),
109	(`0xd433179d9c8cb841`, `986`, `316`),
110	(`0x9e19db92b4e31ba9`, `1013`, `324`),
111	(`0xeb96bf6ebadf77d9`, `1039`, `332`),
112	];
113
114	#[doc(hidden)]
115	pub const CACHED_POW10_FIRST_E: i16 = `-1087`;
116	#[doc(hidden)]
117	pub const CACHED_POW10_LAST_E: i16 = `1039`;
118
119	#[doc(hidden)]
120	pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
121	let offset: i32 = CACHED_POW10_FIRST_E as i32;
122	let range: i32 = (CACHED_POW10.len() as i32) - `1`;
123	let domain: i32 = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
124	let idx: i32 = ((gamma as i32) - offset) * range / domain;
125	let (f: u64, e: i16, k: i16) = CACHED_POW10[idx as usize];
126	debug_assert!(alpha <= e && e <= gamma);
127	(k, Fp { f, e })
128	}
129
130	/// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
131	#[doc(hidden)]
132	pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
133	debug_assert!(x > `0`);
134
135	const X9: u32 = `10_0000_0000`;
136	const X8: u32 = `1_0000_0000`;
137	const X7: u32 = `1000_0000`;
138	const X6: u32 = `100_0000`;
139	const X5: u32 = `10_0000`;
140	const X4: u32 = `1_0000`;
141	const X3: u32 = `1000`;
142	const X2: u32 = `100`;
143	const X1: u32 = `10`;
144
145	if x < X4 {
146	if x < X2 {
147	if x < X1 { (`0`, `1`) } else { (`1`, X1) }
148	} else {
149	if x < X3 { (`2`, X2) } else { (`3`, X3) }
150	}
151	} else {
152	if x < X6 {
153	if x < X5 { (`4`, X4) } else { (`5`, X5) }
154	} else if x < X8 {
155	if x < X7 { (`6`, X6) } else { (`7`, X7) }
156	} else {
157	if x < X9 { (`8`, X8) } else { (`9`, X9) }
158	}
159	}
160	}
161
162	/// The shortest mode implementation for Grisu.
163	///
164	/// It returns `None` when it would return an inexact representation otherwise.
165	pub fn format_shortest_opt<'a>(
166	d: &Decoded,
167	buf: &'a mut [MaybeUninit<u8>],
168	) -> Option<(/digits/ &'a [u8], /exp/ i16)> {
169	assert!(d.mant > `0`);
170	assert!(d.minus > `0`);
171	assert!(d.plus > `0`);
172	assert!(d.mant.checked_add(d.plus).is_some());
173	assert!(d.mant.checked_sub(d.minus).is_some());
174	assert!(buf.len() >= MAX_SIG_DIGITS);
175	assert!(d.mant + d.plus < (`1` << `61`)); // we need at least three bits of additional precision
176
177	// start with the normalized values with the shared exponent
178	let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
179	let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
180	let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
181
182	// find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
183	// since `plus` is normalized, this means `2^(62 + ALPHA) <= plus cached < 2^(64 + GAMMA)`;*
184	// given our choices of `ALPHA` and `GAMMA`, this puts `plus cached` into `[4, 2^32)`.*
185	//
186	// it is obviously desirable to maximize `GAMMA - ALPHA`,
187	// so that we don't need many cached powers of 10, but there are some considerations:
188	//
189	// 1. we want to keep `floor(plus cached)` within `u32` since it needs a costly division.*
190	// (this is not really avoidable, remainder is required for accuracy estimation.)
191	// 2. the remainder of `floor(plus cached)` repeatedly gets multiplied by 10,*
192	// and it should not overflow.
193	//
194	// the first gives `64 + GAMMA <= 32`, while the second gives `10 2^-ALPHA <= 2^64`;*
195	// -60 and -32 is the maximal range with this constraint, and V8 also uses them.
196	let (minusk, cached) = cached_power(ALPHA - plus.e - `64`, GAMMA - plus.e - `64`);
197
198	// scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
199	let plus = plus.mul(cached);
200	let minus = minus.mul(cached);
201	let v = v.mul(cached);
202	debug_assert_eq!(plus.e, minus.e);
203	debug_assert_eq!(plus.e, v.e);
204
205	// +- actual range of minus
206	// \| <---\|---------------------- unsafe region --------------------------> \|
207	// \| \| \|
208	// \| \|<--->\| \| <--------------- safe region ---------------> \| \|
209	// \| \| \| \| \| \|
210	// \|1 ulp\|1 ulp\| \|1 ulp\|1 ulp\| \|1 ulp\|1 ulp\|
211	// \|<--->\|<--->\| \|<--->\|<--->\| \|<--->\|<--->\|
212	// \|-----\|-----\|-------...-------\|-----\|-----\|-------...-------\|-----\|-----\|
213	// \| minus \| \| v \| \| plus \|
214	// minus1 minus0 v - 1 ulp v + 1 ulp plus0 plus1
215	//
216	// above `minus`, `v` and `plus` are quantized* approximations (error < 1 ulp).*
217	// as we don't know the error is positive or negative, we use two approximations spaced equally
218	// and have the maximal error of 2 ulps.
219	//
220	// the "unsafe region" is a liberal interval which we initially generate.
221	// the "safe region" is a conservative interval which we only accept.
222	// we start with the correct repr within the unsafe region, and try to find the closest repr
223	// to `v` which is also within the safe region. if we can't, we give up.
224	let plus1 = plus.f + `1`;
225	// let plus0 = plus.f - 1; // only for explanation
226	// let minus0 = minus.f + 1; // only for explanation
227	let minus1 = minus.f - `1`;
228	let e = -plus.e as usize; // shared exponent
229
230	// divide `plus1` into integral and fractional parts.
231	// integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
232	// and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
233	let plus1int = (plus1 >> e) as u32;
234	let plus1frac = plus1 & ((`1` << e) - `1`);
235
236	// calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
237	// this is an upper bound of `kappa` below.
238	let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
239
240	let mut i = `0`;
241	let exp = max_kappa as i16 - minusk + `1`;
242
243	// Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
244	// then `V = floor(y / 10^k) 10^k` is in `[x, y]` and one of the shortest*
245	// representations (with the minimal number of significant digits) in that range.
246	//
247	// find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
248	// Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
249	// (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
250	// the algorithm relies on the later verification phase to exclude `y`.
251	let delta1 = plus1 - minus1;
252	// let delta1int = (delta1 >> e) as usize; // only for explanation
253	let delta1frac = delta1 & ((`1` << e) - `1`);
254
255	// render integral parts, while checking for the accuracy at each step.
256	let mut ten_kappa = max_ten_kappa; // 10^kappa
257	let mut remainder = plus1int; // digits yet to be rendered
258	loop {
259	// we always have at least one digit to render, as `plus1 >= 10^kappa`
260	// invariants:
261	// - `delta1int <= remainder < 10^(kappa+1)`
262	// - `plus1int = d[0..n-1] 10^(kappa+1) + remainder`*
263	// (it follows that `remainder = plus1int % 10^(kappa+1)`)
264
265	// divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
266	let q = remainder / ten_kappa;
267	let r = remainder % ten_kappa;
268	debug_assert!(q < `10`);
269	buf[i] = MaybeUninit::new(b'0' + q as u8);
270	i += `1`;
271
272	let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) 2^e*
273	if plus1rem < delta1 {
274	// `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
275	let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
276	return round_and_weed(
277	// SAFETY: we initialized that memory above.
278	unsafe { buf[..i].assume_init_mut() },
279	exp,
280	plus1rem,
281	delta1,
282	plus1 - v.f,
283	ten_kappa,
284	`1`,
285	);
286	}
287
288	// break the loop when we have rendered all integral digits.
289	// the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
290	if i > max_kappa as usize {
291	debug_assert_eq!(ten_kappa, `1`);
292	break;
293	}
294
295	// restore invariants
296	ten_kappa /= `10`;
297	remainder = r;
298	}
299
300	// render fractional parts, while checking for the accuracy at each step.
301	// this time we rely on repeated multiplications, as division will lose the precision.
302	let mut remainder = plus1frac;
303	let mut threshold = delta1frac;
304	let mut ulp = `1`;
305	loop {
306	// the next digit should be significant as we've tested that before breaking out
307	// invariants, where `m = max_kappa + 1` (# of digits in the integral part):
308	// - `remainder < 2^e`
309	// - `plus1frac 10^(n-m) = d[m..n-1] * 2^e + remainder`*
310
311	remainder = `10`; // won't overflow, `2^e * 10 < 2^64`*
312	threshold *= `10`;
313	ulp *= `10`;
314
315	// divide `remainder` by `10^kappa`.
316	// both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
317	let q = remainder >> e;
318	let r = remainder & ((`1` << e) - `1`);
319	debug_assert!(q < `10`);
320	buf[i] = MaybeUninit::new(b'0' + q as u8);
321	i += `1`;
322
323	if r < threshold {
324	let ten_kappa = `1` << e; // implicit divisor
325	return round_and_weed(
326	// SAFETY: we initialized that memory above.
327	unsafe { buf[..i].assume_init_mut() },
328	exp,
329	r,
330	threshold,
331	(plus1 - v.f) * ulp,
332	ten_kappa,
333	ulp,
334	);
335	}
336
337	// restore invariants
338	remainder = r;
339	}
340
341	// we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
342	// for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
343	// shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
344	// we have to successively decrease the last digit and check if this is the optimal repr.
345	// there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
346	//
347	// the function checks if this "optimal" repr is actually within the ulp ranges,
348	// and also, it is possible that the "second-to-optimal" repr can actually be optimal
349	// due to the rounding error. in either cases this returns `None`. ("weeding" phase)
350	//
351	// all arguments here are scaled by the common (but implicit) value `k`, so that:
352	// - `remainder = (plus1 % 10^kappa) k`*
353	// - `threshold = (plus1 - minus1) k` (and also, `remainder < threshold`)*
354	// - `plus1v = (plus1 - v) k` (and also, `threshold > plus1v` from prior invariants)*
355	// - `ten_kappa = 10^kappa k`*
356	// - `ulp = 2^-e k`*
357	fn round_and_weed(
358	buf: &mut [u8],
359	exp: i16,
360	remainder: u64,
361	threshold: u64,
362	plus1v: u64,
363	ten_kappa: u64,
364	ulp: u64,
365	) -> Option<(&[u8], i16)> {
366	assert!(!buf.is_empty());
367
368	// produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
369	// the resulting representation should be the closest representation to both.
370	//
371	// here `plus1 - v` is used since calculations are done with respect to `plus1`
372	// in order to avoid overflow/underflow (hence the seemingly swapped names).
373	let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
374	let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
375
376	// decrease the last digit and stop at the closest representation to `v + 1 ulp`.
377	let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
378	{
379	let last = buf.last_mut().unwrap();
380
381	// we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
382	// plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
383	// plus1 % 10^kappa - n 10^kappa`. we set `plus1w(n) = plus1 - w(n) =*
384	// plus1 % 10^kappa + n 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.*
385	// note that `plus1w(n)` is always increasing.
386	//
387	// we have three conditions to terminate. any of them will make the loop unable to
388	// proceed, but we then have at least one valid representation known to be closest to
389	// `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
390	//
391	// TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one.
392	// this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
393	// combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
394	// overflow on the calculation of `plus1w(n)`.
395	//
396	// TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`.
397	// this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
398	// plus1 - minus1 = threshold`. the left hand side can overflow, but we know
399	// `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
400	// threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
401	// `threshold - plus1w(n) < 10^kappa` instead.
402	//
403	// TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is
404	// no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
405	// this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
406	// `z(n) > 0`. we have two cases to consider:
407	//
408	// - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
409	// `z(n)` should be decreasing and this is clearly false.
410	// - when `z(n+1) < 0`:
411	// - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
412	// false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
413	// - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >=
414	// plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
415	// gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
416	// combined with TC3a.
417	//
418	// consequently, we should stop when `TC1 \|\| TC2 \|\| (TC3a && TC3b)`. the following is
419	// equal to its inverse, `!TC1 && !TC2 && (!TC3a \|\| !TC3b)`.
420	while plus1w < plus1v_up
421	&& threshold - plus1w >= ten_kappa
422	&& (plus1w + ten_kappa < plus1v_up
423	\|\| plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up)
424	{
425	*last -= `1`;
426	debug_assert!(last > b'0'); // the shortest repr cannot end with `0`*
427	plus1w += ten_kappa;
428	}
429	}
430
431	// check if this representation is also the closest representation to `v - 1 ulp`.
432	//
433	// this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
434	// replaced by `plus1v_down` instead. overflow analysis equally holds.
435	if plus1w < plus1v_down
436	&& threshold - plus1w >= ten_kappa
437	&& (plus1w + ten_kappa < plus1v_down
438	\|\| plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down)
439	{
440	return None;
441	}
442
443	// now we have the closest representation to `v` between `plus1` and `minus1`.
444	// this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
445	// i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
446	// that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
447	if `2` * ulp <= plus1w && plus1w <= threshold - `4` * ulp { Some((buf, exp)) } else { None }
448	}
449	}
450
451	/// The shortest mode implementation for Grisu with Dragon fallback.
452	///
453	/// This should be used for most cases.
454	pub fn format_shortest<'a>(
455	d: &Decoded,
456	buf: &'a mut [MaybeUninit<u8>],
457	) -> (/digits/ &'a [u8], /exp/ i16) {
458	use crate::num::flt2dec::strategy::dragon::format_shortest as fallback;
459	// SAFETY: The borrow checker is not smart enough to let us use `buf`
460	// in the second branch, so we launder the lifetime here. But we only re-use
461	// `buf` if `format_shortest_opt` returned `None` so this is okay.
462	match format_shortest_opt(d, buf:unsafe { &mut (buf as mut _) }) {
463	Some(ret: (&[u8], i16)) => ret,
464	None => fallback(d, buf),
465	}
466	}
467
468	/// The exact and fixed mode implementation for Grisu.
469	///
470	/// It returns `None` when it would return an inexact representation otherwise.
471	pub fn format_exact_opt<'a>(
472	d: &Decoded,
473	buf: &'a mut [MaybeUninit<u8>],
474	limit: i16,
475	) -> Option<(/digits/ &'a [u8], /exp/ i16)> {
476	assert!(d.mant > `0`);
477	assert!(d.mant < (`1` << `61`)); // we need at least three bits of additional precision
478	assert!(!buf.is_empty());
479
480	// normalize and scale `v`.
481	let v = Fp { f: d.mant, e: d.exp }.normalize();
482	let (minusk, cached) = cached_power(ALPHA - v.e - `64`, GAMMA - v.e - `64`);
483	let v = v.mul(cached);
484
485	// divide `v` into integral and fractional parts.
486	let e = -v.e as usize;
487	let vint = (v.f >> e) as u32;
488	let vfrac = v.f & ((`1` << e) - `1`);
489
490	let requested_digits = buf.len();
491
492	const POW10_UP_TO_9: [u32; `10`] =
493	[`1`, `10`, `100`, `1000`, `10_000`, `100_000`, `1_000_000`, `10_000_000`, `100_000_000`, `1_000_000_000`];
494
495	// We deviate from the original algorithm here and do some early checks to determine if we can satisfy requested_digits.
496	// If we determine that we can't, we exit early and avoid most of the heavy lifting that the algorithm otherwise does.
497	//
498	// When vfrac is zero, we can easily determine if vint can satisfy requested digits:
499	// If requested_digits >= 11, vint is not able to exhaust the count by itself since 10^(11 -1) > u32 max value >= vint.
500	// If vint < 10^(requested_digits - 1), vint cannot exhaust the count.
501	// Otherwise, vint might be able to exhaust the count and we need to execute the rest of the code.
502	if (vfrac == `0`) && ((requested_digits >= `11`) \|\| (vint < POW10_UP_TO_9[requested_digits - `1`])) {
503	return None;
504	}
505
506	// both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
507	// as we don't know the error is positive or negative, we use two approximations
508	// spaced equally and have the maximal error of 2 ulps (same to the shortest case).
509	//
510	// the goal is to find the exactly rounded series of digits that are common to
511	// both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
512	// if this is not possible, we don't know which one is the correct output for `v`,
513	// so we give up and fall back.
514	//
515	// `err` is defined as `1 ulp 2^e` here (same to the ulp in `vfrac`),*
516	// and we will scale it whenever `v` gets scaled.
517	let mut err = `1`;
518
519	// calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
520	// this is an upper bound of `kappa` below.
521	let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
522
523	let mut i = `0`;
524	let exp = max_kappa as i16 - minusk + `1`;
525
526	// if we are working with the last-digit limitation, we need to shorten the buffer
527	// before the actual rendering in order to avoid double rounding.
528	// note that we have to enlarge the buffer again when rounding up happens!
529	let len = if exp <= limit {
530	// oops, we cannot even produce one* digit.*
531	// this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
532	//
533	// in principle we can immediately call `possibly_round` with an empty buffer,
534	// but scaling `max_ten_kappa << e` by 10 can result in overflow.
535	// thus we are being sloppy here and widen the error range by a factor of 10.
536	// this will increase the false negative rate, but only very, very* slightly;*
537	// it can only matter noticeably when the mantissa is bigger than 60 bits.
538	//
539	// SAFETY: `len=0`, so the obligation of having initialized this memory is trivial.
540	return unsafe {
541	possibly_round(buf, `0`, exp, limit, v.f / `10`, (max_ten_kappa as u64) << e, err << e)
542	};
543	} else if ((exp as i32 - limit as i32) as usize) < buf.len() {
544	(exp - limit) as usize
545	} else {
546	buf.len()
547	};
548	debug_assert!(len > `0`);
549
550	// render integral parts.
551	// the error is entirely fractional, so we don't need to check it in this part.
552	let mut kappa = max_kappa as i16;
553	let mut ten_kappa = max_ten_kappa; // 10^kappa
554	let mut remainder = vint; // digits yet to be rendered
555	loop {
556	// we always have at least one digit to render
557	// invariants:
558	// - `remainder < 10^(kappa+1)`
559	// - `vint = d[0..n-1] 10^(kappa+1) + remainder`*
560	// (it follows that `remainder = vint % 10^(kappa+1)`)
561
562	// divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
563	let q = remainder / ten_kappa;
564	let r = remainder % ten_kappa;
565	debug_assert!(q < `10`);
566	buf[i] = MaybeUninit::new(b'0' + q as u8);
567	i += `1`;
568
569	// is the buffer full? run the rounding pass with the remainder.
570	if i == len {
571	let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) 2^e*
572	// SAFETY: we have initialized `len` many bytes.
573	return unsafe {
574	possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e)
575	};
576	}
577
578	// break the loop when we have rendered all integral digits.
579	// the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
580	if i > max_kappa as usize {
581	debug_assert_eq!(ten_kappa, `1`);
582	debug_assert_eq!(kappa, `0`);
583	break;
584	}
585
586	// restore invariants
587	kappa -= `1`;
588	ten_kappa /= `10`;
589	remainder = r;
590	}
591
592	// render fractional parts.
593	//
594	// in principle we can continue to the last available digit and check for the accuracy.
595	// unfortunately we are working with the finite-sized integers, so we need some criterion
596	// to detect the overflow. V8 uses `remainder > err`, which becomes false when
597	// the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
598	// too many otherwise valid input.
599	//
600	// since the later phase has a correct overflow detection, we instead use tighter criterion:
601	// we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
602	// `v + 1 ulp` definitely contains two or more rounded representations. this is same to
603	// the first two comparisons from `possibly_round`, for the reference.
604	let mut remainder = vfrac;
605	let maxerr = `1` << (e - `1`);
606	while err < maxerr {
607	// invariants, where `m = max_kappa + 1` (# of digits in the integral part):
608	// - `remainder < 2^e`
609	// - `vfrac 10^(n-m) = d[m..n-1] * 2^e + remainder`*
610	// - `err = 10^(n-m)`
611
612	remainder = `10`; // won't overflow, `2^e * 10 < 2^64`*
613	err = `10`; // won't overflow, `err * 10 < 2^e * 5 < 2^64`*
614
615	// divide `remainder` by `10^kappa`.
616	// both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
617	let q = remainder >> e;
618	let r = remainder & ((`1` << e) - `1`);
619	debug_assert!(q < `10`);
620	buf[i] = MaybeUninit::new(b'0' + q as u8);
621	i += `1`;
622
623	// is the buffer full? run the rounding pass with the remainder.
624	if i == len {
625	// SAFETY: we have initialized `len` many bytes.
626	return unsafe { possibly_round(buf, len, exp, limit, r, `1` << e, err) };
627	}
628
629	// restore invariants
630	remainder = r;
631	}
632
633	// further calculation is useless (`possibly_round` definitely fails), so we give up.
634	return None;
635
636	// we've generated all requested digits of `v`, which should be also same to corresponding
637	// digits of `v - 1 ulp`. now we check if there is a unique representation shared by
638	// both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
639	// to the rounded-up version of those digits. if the range contains multiple representations
640	// of the same length, we cannot be sure and should return `None` instead.
641	//
642	// all arguments here are scaled by the common (but implicit) value `k`, so that:
643	// - `remainder = (v % 10^kappa) k`*
644	// - `ten_kappa = 10^kappa k`*
645	// - `ulp = 2^-e k`*
646	//
647	// SAFETY: the first `len` bytes of `buf` must be initialized.
648	unsafe fn possibly_round(
649	buf: &mut [MaybeUninit<u8>],
650	mut len: usize,
651	mut exp: i16,
652	limit: i16,
653	remainder: u64,
654	ten_kappa: u64,
655	ulp: u64,
656	) -> Option<(&[u8], i16)> {
657	debug_assert!(remainder < ten_kappa);
658
659	// 10^kappa
660	// : : :<->: :
661	// : : : : :
662	// :\|1 ulp\|1 ulp\| :
663	// :\|<--->\|<--->\| :
664	// ----\|-----\|-----\|----
665	// \| v \|
666	// v - 1 ulp v + 1 ulp
667	//
668	// (for the reference, the dotted line indicates the exact value for
669	// possible representations in given number of digits.)
670	//
671	// error is too large that there are at least three possible representations
672	// between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
673	if ulp >= ten_kappa {
674	return None;
675	}
676
677	// 10^kappa
678	// :<------->:
679	// : :
680	// : \|1 ulp\|1 ulp\|
681	// : \|<--->\|<--->\|
682	// ----\|-----\|-----\|----
683	// \| v \|
684	// v - 1 ulp v + 1 ulp
685	//
686	// in fact, 1/2 ulp is enough to introduce two possible representations.
687	// (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
688	// this won't overflow, as `ulp < ten_kappa` from the first check.
689	if ten_kappa - ulp <= ulp {
690	return None;
691	}
692
693	// remainder
694	// :<->\| :
695	// : \| :
696	// :<--------- 10^kappa ---------->:
697	// \| : \| :
698	// \|1 ulp\|1 ulp\| :
699	// \|<--->\|<--->\| :
700	// ----\|-----\|-----\|------------------------
701	// \| v \|
702	// v - 1 ulp v + 1 ulp
703	//
704	// if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
705	// then we can safely return. note that `v - 1 ulp` can* be less than the current*
706	// representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
707	// the distance between `v - 1 ulp` and the current representation
708	// cannot exceed `10^kappa / 2`.
709	//
710	// the condition equals to `remainder + ulp < 10^kappa / 2`.
711	// since this can easily overflow, first check if `remainder < 10^kappa / 2`.
712	// we've already verified that `ulp < 10^kappa / 2`, so as long as
713	// `10^kappa` did not overflow after all, the second check is fine.
714	if ten_kappa - remainder > remainder && ten_kappa - `2` * remainder >= `2` * ulp {
715	// SAFETY: our caller initialized that memory.
716	return Some((unsafe { buf[..len].assume_init_ref() }, exp));
717	}
718
719	// :<------- remainder ------>\| :
720	// : \| :
721	// :<--------- 10^kappa --------->:
722	// : \| \| : \|
723	// : \|1 ulp\|1 ulp\|
724	// : \|<--->\|<--->\|
725	// -----------------------\|-----\|-----\|-----
726	// \| v \|
727	// v - 1 ulp v + 1 ulp
728	//
729	// on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
730	// we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
731	//
732	// the condition equals to `remainder - ulp >= 10^kappa / 2`.
733	// again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
734	// as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
735	// so the second check does not overflow.
736	if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
737	if let Some(c) =
738	// SAFETY: our caller must have initialized that memory.
739	round_up(unsafe { buf[..len].assume_init_mut() })
740	{
741	// only add an additional digit when we've been requested the fixed precision.
742	// we also need to check that, if the original buffer was empty,
743	// the additional digit can only be added when `exp == limit` (edge case).
744	exp += `1`;
745	if exp > limit && len < buf.len() {
746	buf[len] = MaybeUninit::new(c);
747	len += `1`;
748	}
749	}
750	// SAFETY: we and our caller initialized that memory.
751	return Some((unsafe { buf[..len].assume_init_ref() }, exp));
752	}
753
754	// otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are
755	// rounding down and others are rounding up) and give up.
756	None
757	}
758	}
759
760	/// The exact and fixed mode implementation for Grisu with Dragon fallback.
761	///
762	/// This should be used for most cases.
763	pub fn format_exact<'a>(
764	d: &Decoded,
765	buf: &'a mut [MaybeUninit<u8>],
766	limit: i16,
767	) -> (/digits/ &'a [u8], /exp/ i16) {
768	use crate::num::flt2dec::strategy::dragon::format_exact as fallback;
769	// SAFETY: The borrow checker is not smart enough to let us use `buf`
770	// in the second branch, so we launder the lifetime here. But we only re-use
771	// `buf` if `format_exact_opt` returned `None` so this is okay.
772	match format_exact_opt(d, buf:unsafe { &mut (buf as mut _) }, limit) {
773	Some(ret: (&[u8], i16)) => ret,
774	None => fallback(d, buf, limit),
775	}
776	}
777