1//! Helper trait for generic float types.
2
3use core::f64;
4
5use crate::fmt::{Debug, LowerExp};
6use crate::num::FpCategory;
7use crate::ops::{self, Add, Div, Mul, Neg};
8
9/// Lossy `as` casting between two types.
10pub trait CastInto<T: Copy>: Copy {
11 fn cast(self) -> T;
12}
13
14/// Collection of traits that allow us to be generic over integer size.
15pub trait Integer:
16 Sized
17 + Clone
18 + Copy
19 + Debug
20 + ops::Shr<u32, Output = Self>
21 + ops::Shl<u32, Output = Self>
22 + ops::BitAnd<Output = Self>
23 + ops::BitOr<Output = Self>
24 + PartialEq
25 + CastInto<i16>
26{
27 const ZERO: Self;
28 const ONE: Self;
29}
30
31macro_rules! int {
32 ($($ty:ty),+) => {
33 $(
34 impl CastInto<i16> for $ty {
35 fn cast(self) -> i16 {
36 self as i16
37 }
38 }
39
40 impl Integer for $ty {
41 const ZERO: Self = 0;
42 const ONE: Self = 1;
43 }
44 )+
45 }
46}
47
48int!(u16, u32, u64);
49
50/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
51///
52/// See the parent module's doc comment for why this is necessary.
53///
54/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
55#[doc(hidden)]
56pub trait RawFloat:
57 Sized
58 + Div<Output = Self>
59 + Neg<Output = Self>
60 + Mul<Output = Self>
61 + Add<Output = Self>
62 + LowerExp
63 + PartialEq
64 + PartialOrd
65 + Default
66 + Clone
67 + Copy
68 + Debug
69{
70 /// The unsigned integer with the same size as the float
71 type Int: Integer + Into<u64>;
72
73 /* general constants */
74
75 const INFINITY: Self;
76 const NEG_INFINITY: Self;
77 const NAN: Self;
78 const NEG_NAN: Self;
79
80 /// Bit width of the float
81 const BITS: u32;
82
83 /// The number of bits in the significand, *including* the hidden bit.
84 const SIG_TOTAL_BITS: u32;
85
86 const EXP_MASK: Self::Int;
87 const SIG_MASK: Self::Int;
88
89 /// The number of bits in the significand, *excluding* the hidden bit.
90 const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
91
92 /// Number of bits in the exponent.
93 const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
94
95 /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96 /// representation.
97 ///
98 /// This shifted fully right, use `EXP_MASK` for the shifted value.
99 const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
100
101 /// Signed version of `EXP_SAT` since we convert a lot.
102 const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
103
104 /// The exponent bias value. This is also the maximum value of the exponent.
105 const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
106
107 /// Minimum exponent value of normal values.
108 const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
109
110 /// Round-to-even only happens for negative values of q
111 /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112 /// the 32-bitcase.
113 ///
114 /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115 /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116 /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117 ///
118 /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119 /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120 /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121 /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122 /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
123 ///
124 /// Thus we have that we only need to round ties to even when
125 /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126 /// (in the 32-bit case). In both cases,the power of five(5^|q|)
127 /// fits in a 64-bit word.
128 const MIN_EXPONENT_ROUND_TO_EVEN: i32;
129 const MAX_EXPONENT_ROUND_TO_EVEN: i32;
130
131 /* limits related to Fast pathing */
132
133 /// Largest decimal exponent for a non-infinite value.
134 ///
135 /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136 /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137 const LARGEST_POWER_OF_TEN: i32 = {
138 let largest_pow2 = Self::EXP_BIAS + 1;
139 pow2_to_pow10(largest_pow2 as i64) as i32
140 };
141
142 /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143 /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144 ///
145 /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146 /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values
147 /// that are representable by the intermediate storage format. I don't actually know _why_
148 /// the storage format is relevant here.
149 ///
150 /// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151 /// compile time since intermediates exceed the range of an `f64`.
152 const SMALLEST_POWER_OF_TEN: i32;
153
154 /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155 // assuming FLT_EVAL_METHOD = 0
156 const MAX_EXPONENT_FAST_PATH: i64 = {
157 let log2_5 = f64::consts::LOG2_10 - 1.0;
158 (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64
159 };
160
161 /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162 const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
163
164 /// Maximum exponent that can be represented for a disguised-fast path case.
165 /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166 const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
167 Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64;
168
169 /// Maximum mantissa for the fast-path (`1 << 53` for f64).
170 const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS;
171
172 /// Converts integer into float through an as cast.
173 /// This is only called in the fast-path algorithm, and therefore
174 /// will not lose precision, since the value will always have
175 /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
176 fn from_u64(v: u64) -> Self;
177
178 /// Performs a raw transmutation from an integer.
179 fn from_u64_bits(v: u64) -> Self;
180
181 /// Gets a small power-of-ten for fast-path multiplication.
182 fn pow10_fast_path(exponent: usize) -> Self;
183
184 /// Returns the category that this number falls into.
185 fn classify(self) -> FpCategory;
186
187 /// Transmute to the integer representation
188 fn to_bits(self) -> Self::Int;
189
190 /// Returns the mantissa, exponent and sign as integers.
191 ///
192 /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
193 /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
194 /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
195 ///
196 /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
197 /// with the explicit bit set but otherwise unshifted
198 ///
199 /// `s` is only ever +/-1.
200 fn integer_decode(self) -> (u64, i16, i8) {
201 let bits = self.to_bits();
202 let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
203 let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
204 let mantissa = if exponent == 0 {
205 (bits & Self::SIG_MASK) << 1
206 } else {
207 (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
208 };
209 // Exponent bias + mantissa shift
210 exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
211 (mantissa.into(), exponent, sign)
212 }
213}
214
215/// Solve for `b` in `10^b = 2^a`
216const fn pow2_to_pow10(a: i64) -> i64 {
217 let res: f64 = (a as f64) / f64::consts::LOG2_10;
218 res as i64
219}
220
221#[cfg(target_has_reliable_f16)]
222impl RawFloat for f16 {
223 type Int = u16;
224
225 const INFINITY: Self = Self::INFINITY;
226 const NEG_INFINITY: Self = Self::NEG_INFINITY;
227 const NAN: Self = Self::NAN;
228 const NEG_NAN: Self = -Self::NAN;
229
230 const BITS: u32 = 16;
231 const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
232 const EXP_MASK: Self::Int = Self::EXP_MASK;
233 const SIG_MASK: Self::Int = Self::MAN_MASK;
234
235 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
236 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
237 const SMALLEST_POWER_OF_TEN: i32 = -27;
238
239 #[inline]
240 fn from_u64(v: u64) -> Self {
241 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
242 v as _
243 }
244
245 #[inline]
246 fn from_u64_bits(v: u64) -> Self {
247 Self::from_bits((v & 0xFFFF) as u16)
248 }
249
250 fn pow10_fast_path(exponent: usize) -> Self {
251 #[allow(clippy::use_self)]
252 const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.];
253 TABLE[exponent & 7]
254 }
255
256 fn to_bits(self) -> Self::Int {
257 self.to_bits()
258 }
259
260 fn classify(self) -> FpCategory {
261 self.classify()
262 }
263}
264
265impl RawFloat for f32 {
266 type Int = u32;
267
268 const INFINITY: Self = f32::INFINITY;
269 const NEG_INFINITY: Self = f32::NEG_INFINITY;
270 const NAN: Self = f32::NAN;
271 const NEG_NAN: Self = -f32::NAN;
272
273 const BITS: u32 = 32;
274 const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
275 const EXP_MASK: Self::Int = Self::EXP_MASK;
276 const SIG_MASK: Self::Int = Self::MAN_MASK;
277
278 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
279 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
280 const SMALLEST_POWER_OF_TEN: i32 = -65;
281
282 #[inline]
283 fn from_u64(v: u64) -> Self {
284 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
285 v as _
286 }
287
288 #[inline]
289 fn from_u64_bits(v: u64) -> Self {
290 f32::from_bits((v & 0xFFFFFFFF) as u32)
291 }
292
293 fn pow10_fast_path(exponent: usize) -> Self {
294 #[allow(clippy::use_self)]
295 const TABLE: [f32; 16] =
296 [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
297 TABLE[exponent & 15]
298 }
299
300 fn to_bits(self) -> Self::Int {
301 self.to_bits()
302 }
303
304 fn classify(self) -> FpCategory {
305 self.classify()
306 }
307}
308
309impl RawFloat for f64 {
310 type Int = u64;
311
312 const INFINITY: Self = Self::INFINITY;
313 const NEG_INFINITY: Self = Self::NEG_INFINITY;
314 const NAN: Self = Self::NAN;
315 const NEG_NAN: Self = -Self::NAN;
316
317 const BITS: u32 = 64;
318 const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
319 const EXP_MASK: Self::Int = Self::EXP_MASK;
320 const SIG_MASK: Self::Int = Self::MAN_MASK;
321
322 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
323 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
324 const SMALLEST_POWER_OF_TEN: i32 = -342;
325
326 #[inline]
327 fn from_u64(v: u64) -> Self {
328 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
329 v as _
330 }
331
332 #[inline]
333 fn from_u64_bits(v: u64) -> Self {
334 f64::from_bits(v)
335 }
336
337 fn pow10_fast_path(exponent: usize) -> Self {
338 const TABLE: [f64; 32] = [
339 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
340 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
341 ];
342 TABLE[exponent & 31]
343 }
344
345 fn to_bits(self) -> Self::Int {
346 self.to_bits()
347 }
348
349 fn classify(self) -> FpCategory {
350 self.classify()
351 }
352}
353