1//! Helper trait for generic float types.
2
3use core::f64;
4
5use crate::fmt::{Debug, LowerExp};
6use crate::num::FpCategory;
7use crate::ops::{self, Add, Div, Mul, Neg};
8
9/// Lossy `as` casting between two types.
10pub trait CastInto<T: Copy>: Copy {
11 fn cast(self) -> T;
12}
13
14/// Collection of traits that allow us to be generic over integer size.
15pub trait Integer:
16 Sized
17 + Clone
18 + Copy
19 + Debug
20 + ops::Shr<u32, Output = Self>
21 + ops::Shl<u32, Output = Self>
22 + ops::BitAnd<Output = Self>
23 + ops::BitOr<Output = Self>
24 + PartialEq
25 + CastInto<i16>
26{
27 const ZERO: Self;
28 const ONE: Self;
29}
30
31macro_rules! int {
32 ($($ty:ty),+) => {
33 $(
34 impl CastInto<i16> for $ty {
35 fn cast(self) -> i16 {
36 self as i16
37 }
38 }
39
40 impl Integer for $ty {
41 const ZERO: Self = 0;
42 const ONE: Self = 1;
43 }
44 )+
45 }
46}
47
48int!(u32, u64);
49
50/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
51///
52/// See the parent module's doc comment for why this is necessary.
53///
54/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
55#[doc(hidden)]
56pub trait RawFloat:
57 Sized
58 + Div<Output = Self>
59 + Neg<Output = Self>
60 + Mul<Output = Self>
61 + Add<Output = Self>
62 + LowerExp
63 + PartialEq
64 + PartialOrd
65 + Default
66 + Clone
67 + Copy
68 + Debug
69{
70 /// The unsigned integer with the same size as the float
71 type Int: Integer + Into<u64>;
72
73 /* general constants */
74
75 const INFINITY: Self;
76 const NEG_INFINITY: Self;
77 const NAN: Self;
78 const NEG_NAN: Self;
79
80 /// Bit width of the float
81 const BITS: u32;
82
83 /// The number of bits in the significand, *including* the hidden bit.
84 const SIG_TOTAL_BITS: u32;
85
86 const EXP_MASK: Self::Int;
87 const SIG_MASK: Self::Int;
88
89 /// The number of bits in the significand, *excluding* the hidden bit.
90 const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
91
92 /// Number of bits in the exponent.
93 const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
94
95 /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96 /// representation.
97 ///
98 /// This shifted fully right, use `EXP_MASK` for the shifted value.
99 const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
100
101 /// Signed version of `EXP_SAT` since we convert a lot.
102 const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
103
104 /// The exponent bias value. This is also the maximum value of the exponent.
105 const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
106
107 /// Minimum exponent value of normal values.
108 const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
109
110 /// Round-to-even only happens for negative values of q
111 /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112 /// the 32-bitcase.
113 ///
114 /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115 /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116 /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117 ///
118 /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119 /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120 /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121 /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122 /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
123 ///
124 /// Thus we have that we only need to round ties to even when
125 /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126 /// (in the 32-bit case). In both cases,the power of five(5^|q|)
127 /// fits in a 64-bit word.
128 const MIN_EXPONENT_ROUND_TO_EVEN: i32;
129 const MAX_EXPONENT_ROUND_TO_EVEN: i32;
130
131 /* limits related to Fast pathing */
132
133 /// Largest decimal exponent for a non-infinite value.
134 ///
135 /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136 /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137 const LARGEST_POWER_OF_TEN: i32 = {
138 let largest_pow2 = Self::EXP_BIAS + 1;
139 pow2_to_pow10(largest_pow2 as i64) as i32
140 };
141
142 /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143 /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144 ///
145 /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146 /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values
147 /// that are representable by the intermediate storage format. I don't actually know _why_
148 /// the storage format is relevant here.
149 ///
150 /// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151 /// compile time since intermediates exceed the range of an `f64`.
152 const SMALLEST_POWER_OF_TEN: i32;
153
154 /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155 // assuming FLT_EVAL_METHOD = 0
156 const MAX_EXPONENT_FAST_PATH: i64 = {
157 let log2_5 = f64::consts::LOG2_10 - 1.0;
158 (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64
159 };
160
161 /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162 const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
163
164 /// Maximum exponent that can be represented for a disguised-fast path case.
165 /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166 const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
167 Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64;
168
169 /// Maximum mantissa for the fast-path (`1 << 53` for f64).
170 const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS;
171
172 /// Converts integer into float through an as cast.
173 /// This is only called in the fast-path algorithm, and therefore
174 /// will not lose precision, since the value will always have
175 /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
176 fn from_u64(v: u64) -> Self;
177
178 /// Performs a raw transmutation from an integer.
179 fn from_u64_bits(v: u64) -> Self;
180
181 /// Gets a small power-of-ten for fast-path multiplication.
182 fn pow10_fast_path(exponent: usize) -> Self;
183
184 /// Returns the category that this number falls into.
185 fn classify(self) -> FpCategory;
186
187 /// Transmute to the integer representation
188 fn to_bits(self) -> Self::Int;
189
190 /// Returns the mantissa, exponent and sign as integers.
191 ///
192 /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
193 /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
194 /// minimum subnormal power.
195 fn integer_decode(self) -> (u64, i16, i8) {
196 let bits = self.to_bits();
197 let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
198 let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
199 let mantissa = if exponent == 0 {
200 (bits & Self::SIG_MASK) << 1
201 } else {
202 (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
203 };
204 // Exponent bias + mantissa shift
205 exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
206 (mantissa.into(), exponent, sign)
207 }
208}
209
210/// Solve for `b` in `10^b = 2^a`
211const fn pow2_to_pow10(a: i64) -> i64 {
212 let res: f64 = (a as f64) / f64::consts::LOG2_10;
213 res as i64
214}
215
216impl RawFloat for f32 {
217 type Int = u32;
218
219 const INFINITY: Self = f32::INFINITY;
220 const NEG_INFINITY: Self = f32::NEG_INFINITY;
221 const NAN: Self = f32::NAN;
222 const NEG_NAN: Self = -f32::NAN;
223
224 const BITS: u32 = 32;
225 const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
226 const EXP_MASK: Self::Int = Self::EXP_MASK;
227 const SIG_MASK: Self::Int = Self::MAN_MASK;
228
229 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
230 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
231 const SMALLEST_POWER_OF_TEN: i32 = -65;
232
233 #[inline]
234 fn from_u64(v: u64) -> Self {
235 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
236 v as _
237 }
238
239 #[inline]
240 fn from_u64_bits(v: u64) -> Self {
241 f32::from_bits((v & 0xFFFFFFFF) as u32)
242 }
243
244 fn pow10_fast_path(exponent: usize) -> Self {
245 #[allow(clippy::use_self)]
246 const TABLE: [f32; 16] =
247 [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
248 TABLE[exponent & 15]
249 }
250
251 fn to_bits(self) -> Self::Int {
252 self.to_bits()
253 }
254
255 fn classify(self) -> FpCategory {
256 self.classify()
257 }
258}
259
260impl RawFloat for f64 {
261 type Int = u64;
262
263 const INFINITY: Self = Self::INFINITY;
264 const NEG_INFINITY: Self = Self::NEG_INFINITY;
265 const NAN: Self = Self::NAN;
266 const NEG_NAN: Self = -Self::NAN;
267
268 const BITS: u32 = 64;
269 const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
270 const EXP_MASK: Self::Int = Self::EXP_MASK;
271 const SIG_MASK: Self::Int = Self::MAN_MASK;
272
273 const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
274 const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
275 const SMALLEST_POWER_OF_TEN: i32 = -342;
276
277 #[inline]
278 fn from_u64(v: u64) -> Self {
279 debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
280 v as _
281 }
282
283 #[inline]
284 fn from_u64_bits(v: u64) -> Self {
285 f64::from_bits(v)
286 }
287
288 fn pow10_fast_path(exponent: usize) -> Self {
289 const TABLE: [f64; 32] = [
290 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
291 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
292 ];
293 TABLE[exponent & 31]
294 }
295
296 fn to_bits(self) -> Self::Int {
297 self.to_bits()
298 }
299
300 fn classify(self) -> FpCategory {
301 self.classify()
302 }
303}
304