float.rs source code [crates/core/src/num/dec2flt/float.rs]

1	//! Helper trait for generic float types.
2
3	use core::f64;
4
5	use crate::fmt::{Debug, LowerExp};
6	use crate::num::FpCategory;
7	use crate::ops::{self, Add, Div, Mul, Neg};
8
9	/// Lossy `as` casting between two types.
10	pub trait CastInto<T: Copy>: Copy {
11	fn cast(self) -> T;
12	}
13
14	/// Collection of traits that allow us to be generic over integer size.
15	pub trait Integer:
16	Sized
17	+ Clone
18	+ Copy
19	+ Debug
20	+ ops::Shr<u32, Output = Self>
21	+ ops::Shl<u32, Output = Self>
22	+ ops::BitAnd<Output = Self>
23	+ ops::BitOr<Output = Self>
24	+ PartialEq
25	+ CastInto<i16>
26	{
27	const ZERO: Self;
28	const ONE: Self;
29	}
30
31	macro_rules! int {
32	($($ty:ty),+) => {
33	$(
34	impl CastInto<i16> for $ty {
35	fn cast(self) -> i16 {
36	self as i16
37	}
38	}
39
40	impl Integer for $ty {
41	const ZERO: Self = `0`;
42	const ONE: Self = `1`;
43	}
44	)+
45	}
46	}
47
48	int!(u16, u32, u64);
49
50	/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
51	///
52	/// See the parent module's doc comment for why this is necessary.
53	///
54	/// Should never ever* be implemented for other types or be used outside the `dec2flt` module.*
55	#[doc(hidden)]
56	pub trait RawFloat:
57	Sized
58	+ Div<Output = Self>
59	+ Neg<Output = Self>
60	+ Mul<Output = Self>
61	+ Add<Output = Self>
62	+ LowerExp
63	+ PartialEq
64	+ PartialOrd
65	+ Default
66	+ Clone
67	+ Copy
68	+ Debug
69	{
70	/// The unsigned integer with the same size as the float
71	type Int: Integer + Into<u64>;
72
73	/ general constants /
74
75	const INFINITY: Self;
76	const NEG_INFINITY: Self;
77	const NAN: Self;
78	const NEG_NAN: Self;
79
80	/// Bit width of the float
81	const BITS: u32;
82
83	/// The number of bits in the significand, including* the hidden bit.*
84	const SIG_TOTAL_BITS: u32;
85
86	const EXP_MASK: Self::Int;
87	const SIG_MASK: Self::Int;
88
89	/// The number of bits in the significand, excluding* the hidden bit.*
90	const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - `1`;
91
92	/// Number of bits in the exponent.
93	const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - `1`;
94
95	/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96	/// representation.
97	///
98	/// This shifted fully right, use `EXP_MASK` for the shifted value.
99	const EXP_SAT: u32 = (`1` << Self::EXP_BITS) - `1`;
100
101	/// Signed version of `EXP_SAT` since we convert a lot.
102	const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
103
104	/// The exponent bias value. This is also the maximum value of the exponent.
105	const EXP_BIAS: u32 = Self::EXP_SAT >> `1`;
106
107	/// Minimum exponent value of normal values.
108	const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - `1`);
109
110	/// Round-to-even only happens for negative values of q
111	/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112	/// the 32-bit case.
113	///
114	/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115	/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116	/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117	///
118	/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119	/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120	/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121	/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122	/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bit case).
123	///
124	/// Thus we have that we only need to round ties to even when
125	/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126	/// (in the 32-bit case). In both cases,the power of five(5^\|q\|)
127	/// fits in a 64-bit word.
128	const MIN_EXPONENT_ROUND_TO_EVEN: i32;
129	const MAX_EXPONENT_ROUND_TO_EVEN: i32;
130
131	/ limits related to Fast pathing /
132
133	/// Largest decimal exponent for a non-infinite value.
134	///
135	/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136	/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137	const LARGEST_POWER_OF_TEN: i32 = {
138	let largest_pow2 = Self::EXP_BIAS + `1`;
139	pow2_to_pow10(largest_pow2 as i64) as i32
140	};
141
142	/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143	/// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144	///
145	/// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146	/// the smallest power of two. The `2^64 - 1)` denominator comes from the number of values
147	/// that are representable by the intermediate storage format. I don't actually know _why_
148	/// the storage format is relevant here.
149	///
150	/// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151	/// compile time since intermediates exceed the range of an `f64`.
152	const SMALLEST_POWER_OF_TEN: i32;
153
154	/// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155	// assuming FLT_EVAL_METHOD = 0
156	const MAX_EXPONENT_FAST_PATH: i64 = {
157	let log2_5 = f64::consts::LOG2_10 - `1.0`;
158	(Self::SIG_TOTAL_BITS as f64 / log2_5) as i64
159	};
160
161	/// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162	const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
163
164	/// Maximum exponent that can be represented for a disguised-fast path case.
165	/// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166	const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
167	Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64;
168
169	/// Maximum mantissa for the fast-path (`1 << 53` for f64).
170	const MAX_MANTISSA_FAST_PATH: u64 = `1` << Self::SIG_TOTAL_BITS;
171
172	/// Converts integer into float through an as cast.
173	/// This is only called in the fast-path algorithm, and therefore
174	/// will not lose precision, since the value will always have
175	/// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
176	fn from_u64(v: u64) -> Self;
177
178	/// Performs a raw transmutation from an integer.
179	fn from_u64_bits(v: u64) -> Self;
180
181	/// Gets a small power-of-ten for fast-path multiplication.
182	fn pow10_fast_path(exponent: usize) -> Self;
183
184	/// Returns the category that this number falls into.
185	fn classify(self) -> FpCategory;
186
187	/// Transmute to the integer representation
188	fn to_bits(self) -> Self::Int;
189
190	/// Returns the mantissa, exponent and sign as integers.
191	///
192	/// This returns `(m, p, s)` such that `s m * 2^p` represents the original float. For 0, the*
193	/// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
194	/// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
195	///
196	/// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
197	/// with the explicit bit set but otherwise unshifted
198	///
199	/// `s` is only ever +/-1.
200	fn integer_decode(self) -> (u64, i16, i8) {
201	let bits = self.to_bits();
202	let sign: i8 = if bits >> (Self::BITS - `1`) == Self::Int::ZERO { `1` } else { `-1` };
203	let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
204	let mantissa = if exponent == `0` {
205	(bits & Self::SIG_MASK) << `1`
206	} else {
207	(bits & Self::SIG_MASK) \| (Self::Int::ONE << Self::SIG_BITS)
208	};
209	// Exponent bias + mantissa shift
210	exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
211	(mantissa.into(), exponent, sign)
212	}
213	}
214
215	/// Solve for `b` in `10^b = 2^a`
216	const fn pow2_to_pow10(a: i64) -> i64 {
217	let res: f64 = (a as f64) / f64::consts::LOG2_10;
218	res as i64
219	}
220
221	#[cfg(target_has_reliable_f16)]
222	impl RawFloat for f16 {
223	type Int = u16;
224
225	const INFINITY: Self = Self::INFINITY;
226	const NEG_INFINITY: Self = Self::NEG_INFINITY;
227	const NAN: Self = Self::NAN;
228	const NEG_NAN: Self = -Self::NAN;
229
230	const BITS: u32 = `16`;
231	const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
232	const EXP_MASK: Self::Int = Self::EXP_MASK;
233	const SIG_MASK: Self::Int = Self::MAN_MASK;
234
235	const MIN_EXPONENT_ROUND_TO_EVEN: i32 = `-22`;
236	const MAX_EXPONENT_ROUND_TO_EVEN: i32 = `5`;
237	const SMALLEST_POWER_OF_TEN: i32 = `-27`;
238
239	#[inline]
240	fn from_u64(v: u64) -> Self {
241	debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
242	v as _
243	}
244
245	#[inline]
246	fn from_u64_bits(v: u64) -> Self {
247	Self::from_bits((v & `0xFFFF`) as u16)
248	}
249
250	fn pow10_fast_path(exponent: usize) -> Self {
251	#[allow(clippy::use_self)]
252	const TABLE: [f16; `8`] = [`1e0`, `1e1`, `1e2`, `1e3`, `1e4`, `0.0`, `0.0`, `0.`];
253	TABLE[exponent & `7`]
254	}
255
256	fn to_bits(self) -> Self::Int {
257	self.to_bits()
258	}
259
260	fn classify(self) -> FpCategory {
261	self.classify()
262	}
263	}
264
265	impl RawFloat for f32 {
266	type Int = u32;
267
268	const INFINITY: Self = f32::INFINITY;
269	const NEG_INFINITY: Self = f32::NEG_INFINITY;
270	const NAN: Self = f32::NAN;
271	const NEG_NAN: Self = -f32::NAN;
272
273	const BITS: u32 = `32`;
274	const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
275	const EXP_MASK: Self::Int = Self::EXP_MASK;
276	const SIG_MASK: Self::Int = Self::MAN_MASK;
277
278	const MIN_EXPONENT_ROUND_TO_EVEN: i32 = `-17`;
279	const MAX_EXPONENT_ROUND_TO_EVEN: i32 = `10`;
280	const SMALLEST_POWER_OF_TEN: i32 = `-65`;
281
282	#[inline]
283	fn from_u64(v: u64) -> Self {
284	debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
285	v as _
286	}
287
288	#[inline]
289	fn from_u64_bits(v: u64) -> Self {
290	f32::from_bits((v & `0xFFFFFFFF`) as u32)
291	}
292
293	fn pow10_fast_path(exponent: usize) -> Self {
294	#[allow(clippy::use_self)]
295	const TABLE: [f32; `16`] =
296	[`1e0`, `1e1`, `1e2`, `1e3`, `1e4`, `1e5`, `1e6`, `1e7`, `1e8`, `1e9`, `1e10`, `0.`, `0.`, `0.`, `0.`, `0.`];
297	TABLE[exponent & `15`]
298	}
299
300	fn to_bits(self) -> Self::Int {
301	self.to_bits()
302	}
303
304	fn classify(self) -> FpCategory {
305	self.classify()
306	}
307	}
308
309	impl RawFloat for f64 {
310	type Int = u64;
311
312	const INFINITY: Self = Self::INFINITY;
313	const NEG_INFINITY: Self = Self::NEG_INFINITY;
314	const NAN: Self = Self::NAN;
315	const NEG_NAN: Self = -Self::NAN;
316
317	const BITS: u32 = `64`;
318	const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
319	const EXP_MASK: Self::Int = Self::EXP_MASK;
320	const SIG_MASK: Self::Int = Self::MAN_MASK;
321
322	const MIN_EXPONENT_ROUND_TO_EVEN: i32 = `-4`;
323	const MAX_EXPONENT_ROUND_TO_EVEN: i32 = `23`;
324	const SMALLEST_POWER_OF_TEN: i32 = `-342`;
325
326	#[inline]
327	fn from_u64(v: u64) -> Self {
328	debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
329	v as _
330	}
331
332	#[inline]
333	fn from_u64_bits(v: u64) -> Self {
334	f64::from_bits(v)
335	}
336
337	fn pow10_fast_path(exponent: usize) -> Self {
338	const TABLE: [f64; `32`] = [
339	`1e0`, `1e1`, `1e2`, `1e3`, `1e4`, `1e5`, `1e6`, `1e7`, `1e8`, `1e9`, `1e10`, `1e11`, `1e12`, `1e13`, `1e14`, `1e15`,
340	`1e16`, `1e17`, `1e18`, `1e19`, `1e20`, `1e21`, `1e22`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
341	];
342	TABLE[exponent & `31`]
343	}
344
345	fn to_bits(self) -> Self::Int {
346	self.to_bits()
347	}
348
349	fn classify(self) -> FpCategory {
350	self.classify()
351	}
352	}
353