d2s.rs source code [crates/ryu/src/d2s.rs]

1	// Translated from C to Rust. The original C code can be found at
2	// https://github.com/ulfjack/ryu and carries the following license:
3	//
4	// Copyright 2018 Ulf Adams
5	//
6	// The contents of this file may be used under the terms of the Apache License,
7	// Version 2.0.
8	//
9	// (See accompanying file LICENSE-Apache or copy at
10	// http://www.apache.org/licenses/LICENSE-2.0)
11	//
12	// Alternatively, the contents of this file may be used under the terms of
13	// the Boost Software License, Version 1.0.
14	// (See accompanying file LICENSE-Boost or copy at
15	// https://www.boost.org/LICENSE_1_0.txt)
16	//
17	// Unless required by applicable law or agreed to in writing, this software
18	// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19	// KIND, either express or implied.
20
21	use crate::common::{log10_pow2, log10_pow5, pow5bits};
22	#[cfg(not(feature = "small"))]
23	pub use crate::d2s_full_table::{DOUBLE_POW5_INV_SPLIT, DOUBLE_POW5_SPLIT};
24	use crate::d2s_intrinsics::{
25	div10, div100, div5, mul_shift_all_64, multiple_of_power_of_2, multiple_of_power_of_5,
26	};
27	#[cfg(feature = "small")]
28	pub use crate::d2s_small_table::{compute_inv_pow5, compute_pow5};
29	use core::mem::MaybeUninit;
30
31	pub const DOUBLE_MANTISSA_BITS: u32 = `52`;
32	pub const DOUBLE_EXPONENT_BITS: u32 = `11`;
33	pub const DOUBLE_BIAS: i32 = `1023`;
34	pub const DOUBLE_POW5_INV_BITCOUNT: i32 = `125`;
35	pub const DOUBLE_POW5_BITCOUNT: i32 = `125`;
36
37	#[cfg_attr(feature = "no-panic", inline)]
38	pub fn decimal_length17(v: u64) -> u32 {
39	// This is slightly faster than a loop.
40	// The average output length is 16.38 digits, so we check high-to-low.
41	// Function precondition: v is not an 18, 19, or 20-digit number.
42	// (17 digits are sufficient for round-tripping.)
43	debug_assert!(v < `100000000000000000`);
44
45	if v >= `10000000000000000` {
46	`17`
47	} else if v >= `1000000000000000` {
48	`16`
49	} else if v >= `100000000000000` {
50	`15`
51	} else if v >= `10000000000000` {
52	`14`
53	} else if v >= `1000000000000` {
54	`13`
55	} else if v >= `100000000000` {
56	`12`
57	} else if v >= `10000000000` {
58	`11`
59	} else if v >= `1000000000` {
60	`10`
61	} else if v >= `100000000` {
62	`9`
63	} else if v >= `10000000` {
64	`8`
65	} else if v >= `1000000` {
66	`7`
67	} else if v >= `100000` {
68	`6`
69	} else if v >= `10000` {
70	`5`
71	} else if v >= `1000` {
72	`4`
73	} else if v >= `100` {
74	`3`
75	} else if v >= `10` {
76	`2`
77	} else {
78	`1`
79	}
80	}
81
82	// A floating decimal representing m 10^e.*
83	pub struct FloatingDecimal64 {
84	pub mantissa: u64,
85	// Decimal exponent's range is -324 to 308
86	// inclusive, and can fit in i16 if needed.
87	pub exponent: i32,
88	}
89
90	#[cfg_attr(feature = "no-panic", inline)]
91	pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
92	let (e2, m2) = if ieee_exponent == `0` {
93	(
94	// We subtract 2 so that the bounds computation has 2 additional bits.
95	`1` - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS as i32 - `2`,
96	ieee_mantissa,
97	)
98	} else {
99	(
100	ieee_exponent as i32 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS as i32 - `2`,
101	(`1u64` << DOUBLE_MANTISSA_BITS) \| ieee_mantissa,
102	)
103	};
104	let even = (m2 & `1`) == `0`;
105	let accept_bounds = even;
106
107	// Step 2: Determine the interval of valid decimal representations.
108	let mv = `4` * m2;
109	// Implicit bool -> int conversion. True is 1, false is 0.
110	let mm_shift = (ieee_mantissa != `0` \|\| ieee_exponent <= `1`) as u32;
111	// We would compute mp and mm like this:
112	// uint64_t mp = 4 m2 + 2;*
113	// uint64_t mm = mv - 1 - mm_shift;
114
115	// Step 3: Convert to a decimal power base using 128-bit arithmetic.
116	let mut vr: u64;
117	let mut vp: u64;
118	let mut vm: u64;
119	let mut vp_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
120	let mut vm_uninit: MaybeUninit<u64> = MaybeUninit::uninit();
121	let e10: i32;
122	let mut vm_is_trailing_zeros = `false`;
123	let mut vr_is_trailing_zeros = `false`;
124	if e2 >= `0` {
125	// I tried special-casing q == 0, but there was no effect on performance.
126	// This expression is slightly faster than max(0, log10_pow2(e2) - 1).
127	let q = log10_pow2(e2) - (e2 > `3`) as u32;
128	e10 = q as i32;
129	let k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q as i32) - `1`;
130	let i = -e2 + q as i32 + k;
131	vr = unsafe {
132	mul_shift_all_64(
133	m2,
134	#[cfg(feature = "small")]
135	&compute_inv_pow5(q),
136	#[cfg(not(feature = "small"))]
137	{
138	debug_assert!(q < DOUBLE_POW5_INV_SPLIT.len() as u32);
139	DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
140	},
141	i as u32,
142	vp_uninit.as_mut_ptr(),
143	vm_uninit.as_mut_ptr(),
144	mm_shift,
145	)
146	};
147	vp = unsafe { vp_uninit.assume_init() };
148	vm = unsafe { vm_uninit.assume_init() };
149	if q <= `21` {
150	// This should use q <= 22, but I think 21 is also safe. Smaller values
151	// may still be safe, but it's more difficult to reason about them.
152	// Only one of mp, mv, and mm can be a multiple of 5, if any.
153	let mv_mod5 = (mv as u32).wrapping_sub(`5u32`.wrapping_mul(div5(mv) as u32));
154	if mv_mod5 == `0` {
155	vr_is_trailing_zeros = multiple_of_power_of_5(mv, q);
156	} else if accept_bounds {
157	// Same as min(e2 + (~mm & 1), pow5_factor(mm)) >= q
158	// <=> e2 + (~mm & 1) >= q && pow5_factor(mm) >= q
159	// <=> true && pow5_factor(mm) >= q, since e2 >= q.
160	vm_is_trailing_zeros = multiple_of_power_of_5(mv - `1` - mm_shift as u64, q);
161	} else {
162	// Same as min(e2 + 1, pow5_factor(mp)) >= q.
163	vp -= multiple_of_power_of_5(mv + `2`, q) as u64;
164	}
165	}
166	} else {
167	// This expression is slightly faster than max(0, log10_pow5(-e2) - 1).
168	let q = log10_pow5(-e2) - (-e2 > `1`) as u32;
169	e10 = q as i32 + e2;
170	let i = -e2 - q as i32;
171	let k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
172	let j = q as i32 - k;
173	vr = unsafe {
174	mul_shift_all_64(
175	m2,
176	#[cfg(feature = "small")]
177	&compute_pow5(i as u32),
178	#[cfg(not(feature = "small"))]
179	{
180	debug_assert!(i < DOUBLE_POW5_SPLIT.len() as i32);
181	DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
182	},
183	j as u32,
184	vp_uninit.as_mut_ptr(),
185	vm_uninit.as_mut_ptr(),
186	mm_shift,
187	)
188	};
189	vp = unsafe { vp_uninit.assume_init() };
190	vm = unsafe { vm_uninit.assume_init() };
191	if q <= `1` {
192	// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
193	// mv = 4 m2, so it always has at least two trailing 0 bits.*
194	vr_is_trailing_zeros = `true`;
195	if accept_bounds {
196	// mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1.
197	vm_is_trailing_zeros = mm_shift == `1`;
198	} else {
199	// mp = mv + 2, so it always has at least one trailing 0 bit.
200	vp -= `1`;
201	}
202	} else if q < `63` {
203	// TODO(ulfjack): Use a tighter bound here.
204	// We want to know if the full product has at least q trailing zeros.
205	// We need to compute min(p2(mv), p5(mv) - e2) >= q
206	// <=> p2(mv) >= q && p5(mv) - e2 >= q
207	// <=> p2(mv) >= q (because -e2 >= q)
208	vr_is_trailing_zeros = multiple_of_power_of_2(mv, q);
209	}
210	}
211
212	// Step 4: Find the shortest decimal representation in the interval of valid representations.
213	let mut removed = `0i32`;
214	let mut last_removed_digit = `0u8`;
215	// On average, we remove ~2 digits.
216	let output = if vm_is_trailing_zeros \|\| vr_is_trailing_zeros {
217	// General case, which happens rarely (~0.7%).
218	loop {
219	let vp_div10 = div10(vp);
220	let vm_div10 = div10(vm);
221	if vp_div10 <= vm_div10 {
222	break;
223	}
224	let vm_mod10 = (vm as u32).wrapping_sub(`10u32`.wrapping_mul(vm_div10 as u32));
225	let vr_div10 = div10(vr);
226	let vr_mod10 = (vr as u32).wrapping_sub(`10u32`.wrapping_mul(vr_div10 as u32));
227	vm_is_trailing_zeros &= vm_mod10 == `0`;
228	vr_is_trailing_zeros &= last_removed_digit == `0`;
229	last_removed_digit = vr_mod10 as u8;
230	vr = vr_div10;
231	vp = vp_div10;
232	vm = vm_div10;
233	removed += `1`;
234	}
235	if vm_is_trailing_zeros {
236	loop {
237	let vm_div10 = div10(vm);
238	let vm_mod10 = (vm as u32).wrapping_sub(`10u32`.wrapping_mul(vm_div10 as u32));
239	if vm_mod10 != `0` {
240	break;
241	}
242	let vp_div10 = div10(vp);
243	let vr_div10 = div10(vr);
244	let vr_mod10 = (vr as u32).wrapping_sub(`10u32`.wrapping_mul(vr_div10 as u32));
245	vr_is_trailing_zeros &= last_removed_digit == `0`;
246	last_removed_digit = vr_mod10 as u8;
247	vr = vr_div10;
248	vp = vp_div10;
249	vm = vm_div10;
250	removed += `1`;
251	}
252	}
253	if vr_is_trailing_zeros && last_removed_digit == `5` && vr % `2` == `0` {
254	// Round even if the exact number is .....50..0.
255	last_removed_digit = `4`;
256	}
257	// We need to take vr + 1 if vr is outside bounds or we need to round up.
258	vr + ((vr == vm && (!accept_bounds \|\| !vm_is_trailing_zeros)) \|\| last_removed_digit >= `5`)
259	as u64
260	} else {
261	// Specialized for the common case (~99.3%). Percentages below are relative to this.
262	let mut round_up = `false`;
263	let vp_div100 = div100(vp);
264	let vm_div100 = div100(vm);
265	// Optimization: remove two digits at a time (~86.2%).
266	if vp_div100 > vm_div100 {
267	let vr_div100 = div100(vr);
268	let vr_mod100 = (vr as u32).wrapping_sub(`100u32`.wrapping_mul(vr_div100 as u32));
269	round_up = vr_mod100 >= `50`;
270	vr = vr_div100;
271	vp = vp_div100;
272	vm = vm_div100;
273	removed += `2`;
274	}
275	// Loop iterations below (approximately), without optimization above:
276	// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
277	// Loop iterations below (approximately), with optimization above:
278	// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
279	loop {
280	let vp_div10 = div10(vp);
281	let vm_div10 = div10(vm);
282	if vp_div10 <= vm_div10 {
283	break;
284	}
285	let vr_div10 = div10(vr);
286	let vr_mod10 = (vr as u32).wrapping_sub(`10u32`.wrapping_mul(vr_div10 as u32));
287	round_up = vr_mod10 >= `5`;
288	vr = vr_div10;
289	vp = vp_div10;
290	vm = vm_div10;
291	removed += `1`;
292	}
293	// We need to take vr + 1 if vr is outside bounds or we need to round up.
294	vr + (vr == vm \|\| round_up) as u64
295	};
296	let exp = e10 + removed;
297
298	FloatingDecimal64 {
299	exponent: exp,
300	mantissa: output,
301	}
302	}
303