1 | //! Helper trait for generic float types. |
2 | |
3 | use core::f64; |
4 | |
5 | use crate::fmt::{Debug, LowerExp}; |
6 | use crate::num::FpCategory; |
7 | use crate::ops::{self, Add, Div, Mul, Neg}; |
8 | |
9 | /// Lossy `as` casting between two types. |
10 | pub trait CastInto<T: Copy>: Copy { |
11 | fn cast(self) -> T; |
12 | } |
13 | |
14 | /// Collection of traits that allow us to be generic over integer size. |
15 | pub trait Integer: |
16 | Sized |
17 | + Clone |
18 | + Copy |
19 | + Debug |
20 | + ops::Shr<u32, Output = Self> |
21 | + ops::Shl<u32, Output = Self> |
22 | + ops::BitAnd<Output = Self> |
23 | + ops::BitOr<Output = Self> |
24 | + PartialEq |
25 | + CastInto<i16> |
26 | { |
27 | const ZERO: Self; |
28 | const ONE: Self; |
29 | } |
30 | |
31 | macro_rules! int { |
32 | ($($ty:ty),+) => { |
33 | $( |
34 | impl CastInto<i16> for $ty { |
35 | fn cast(self) -> i16 { |
36 | self as i16 |
37 | } |
38 | } |
39 | |
40 | impl Integer for $ty { |
41 | const ZERO: Self = 0; |
42 | const ONE: Self = 1; |
43 | } |
44 | )+ |
45 | } |
46 | } |
47 | |
48 | int!(u16, u32, u64); |
49 | |
50 | /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats. |
51 | /// |
52 | /// See the parent module's doc comment for why this is necessary. |
53 | /// |
54 | /// Should **never ever** be implemented for other types or be used outside the `dec2flt` module. |
55 | #[doc (hidden)] |
56 | pub trait RawFloat: |
57 | Sized |
58 | + Div<Output = Self> |
59 | + Neg<Output = Self> |
60 | + Mul<Output = Self> |
61 | + Add<Output = Self> |
62 | + LowerExp |
63 | + PartialEq |
64 | + PartialOrd |
65 | + Default |
66 | + Clone |
67 | + Copy |
68 | + Debug |
69 | { |
70 | /// The unsigned integer with the same size as the float |
71 | type Int: Integer + Into<u64>; |
72 | |
73 | /* general constants */ |
74 | |
75 | const INFINITY: Self; |
76 | const NEG_INFINITY: Self; |
77 | const NAN: Self; |
78 | const NEG_NAN: Self; |
79 | |
80 | /// Bit width of the float |
81 | const BITS: u32; |
82 | |
83 | /// The number of bits in the significand, *including* the hidden bit. |
84 | const SIG_TOTAL_BITS: u32; |
85 | |
86 | const EXP_MASK: Self::Int; |
87 | const SIG_MASK: Self::Int; |
88 | |
89 | /// The number of bits in the significand, *excluding* the hidden bit. |
90 | const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1; |
91 | |
92 | /// Number of bits in the exponent. |
93 | const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; |
94 | |
95 | /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite |
96 | /// representation. |
97 | /// |
98 | /// This shifted fully right, use `EXP_MASK` for the shifted value. |
99 | const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; |
100 | |
101 | /// Signed version of `EXP_SAT` since we convert a lot. |
102 | const INFINITE_POWER: i32 = Self::EXP_SAT as i32; |
103 | |
104 | /// The exponent bias value. This is also the maximum value of the exponent. |
105 | const EXP_BIAS: u32 = Self::EXP_SAT >> 1; |
106 | |
107 | /// Minimum exponent value of normal values. |
108 | const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1); |
109 | |
110 | /// Round-to-even only happens for negative values of q |
111 | /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in |
112 | /// the 32-bitcase. |
113 | /// |
114 | /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we |
115 | /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have |
116 | /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10. |
117 | /// |
118 | /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64 |
119 | /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case) |
120 | /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64 |
121 | /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11 |
122 | /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase). |
123 | /// |
124 | /// Thus we have that we only need to round ties to even when |
125 | /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10] |
126 | /// (in the 32-bit case). In both cases,the power of five(5^|q|) |
127 | /// fits in a 64-bit word. |
128 | const MIN_EXPONENT_ROUND_TO_EVEN: i32; |
129 | const MAX_EXPONENT_ROUND_TO_EVEN: i32; |
130 | |
131 | /* limits related to Fast pathing */ |
132 | |
133 | /// Largest decimal exponent for a non-infinite value. |
134 | /// |
135 | /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast |
136 | /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity. |
137 | const LARGEST_POWER_OF_TEN: i32 = { |
138 | let largest_pow2 = Self::EXP_BIAS + 1; |
139 | pow2_to_pow10(largest_pow2 as i64) as i32 |
140 | }; |
141 | |
142 | /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything |
143 | /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero. |
144 | /// |
145 | /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is |
146 | /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values |
147 | /// that are representable by the intermediate storage format. I don't actually know _why_ |
148 | /// the storage format is relevant here. |
149 | /// |
150 | /// The values may be calculated using the formula. Unfortunately we cannot calculate them at |
151 | /// compile time since intermediates exceed the range of an `f64`. |
152 | const SMALLEST_POWER_OF_TEN: i32; |
153 | |
154 | /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋` |
155 | // assuming FLT_EVAL_METHOD = 0 |
156 | const MAX_EXPONENT_FAST_PATH: i64 = { |
157 | let log2_5 = f64::consts::LOG2_10 - 1.0; |
158 | (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64 |
159 | }; |
160 | |
161 | /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋` |
162 | const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH; |
163 | |
164 | /// Maximum exponent that can be represented for a disguised-fast path case. |
165 | /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋` |
166 | const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = |
167 | Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64; |
168 | |
169 | /// Maximum mantissa for the fast-path (`1 << 53` for f64). |
170 | const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS; |
171 | |
172 | /// Converts integer into float through an as cast. |
173 | /// This is only called in the fast-path algorithm, and therefore |
174 | /// will not lose precision, since the value will always have |
175 | /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH. |
176 | fn from_u64(v: u64) -> Self; |
177 | |
178 | /// Performs a raw transmutation from an integer. |
179 | fn from_u64_bits(v: u64) -> Self; |
180 | |
181 | /// Gets a small power-of-ten for fast-path multiplication. |
182 | fn pow10_fast_path(exponent: usize) -> Self; |
183 | |
184 | /// Returns the category that this number falls into. |
185 | fn classify(self) -> FpCategory; |
186 | |
187 | /// Transmute to the integer representation |
188 | fn to_bits(self) -> Self::Int; |
189 | |
190 | /// Returns the mantissa, exponent and sign as integers. |
191 | /// |
192 | /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the |
193 | /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For |
194 | /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`. |
195 | /// |
196 | /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned |
197 | /// with the explicit bit set but otherwise unshifted |
198 | /// |
199 | /// `s` is only ever +/-1. |
200 | fn integer_decode(self) -> (u64, i16, i8) { |
201 | let bits = self.to_bits(); |
202 | let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 }; |
203 | let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast(); |
204 | let mantissa = if exponent == 0 { |
205 | (bits & Self::SIG_MASK) << 1 |
206 | } else { |
207 | (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS) |
208 | }; |
209 | // Exponent bias + mantissa shift |
210 | exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16; |
211 | (mantissa.into(), exponent, sign) |
212 | } |
213 | } |
214 | |
215 | /// Solve for `b` in `10^b = 2^a` |
216 | const fn pow2_to_pow10(a: i64) -> i64 { |
217 | let res: f64 = (a as f64) / f64::consts::LOG2_10; |
218 | res as i64 |
219 | } |
220 | |
221 | #[cfg (target_has_reliable_f16)] |
222 | impl RawFloat for f16 { |
223 | type Int = u16; |
224 | |
225 | const INFINITY: Self = Self::INFINITY; |
226 | const NEG_INFINITY: Self = Self::NEG_INFINITY; |
227 | const NAN: Self = Self::NAN; |
228 | const NEG_NAN: Self = -Self::NAN; |
229 | |
230 | const BITS: u32 = 16; |
231 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; |
232 | const EXP_MASK: Self::Int = Self::EXP_MASK; |
233 | const SIG_MASK: Self::Int = Self::MAN_MASK; |
234 | |
235 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22; |
236 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5; |
237 | const SMALLEST_POWER_OF_TEN: i32 = -27; |
238 | |
239 | #[inline ] |
240 | fn from_u64(v: u64) -> Self { |
241 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); |
242 | v as _ |
243 | } |
244 | |
245 | #[inline ] |
246 | fn from_u64_bits(v: u64) -> Self { |
247 | Self::from_bits((v & 0xFFFF) as u16) |
248 | } |
249 | |
250 | fn pow10_fast_path(exponent: usize) -> Self { |
251 | #[allow (clippy::use_self)] |
252 | const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.]; |
253 | TABLE[exponent & 7] |
254 | } |
255 | |
256 | fn to_bits(self) -> Self::Int { |
257 | self.to_bits() |
258 | } |
259 | |
260 | fn classify(self) -> FpCategory { |
261 | self.classify() |
262 | } |
263 | } |
264 | |
265 | impl RawFloat for f32 { |
266 | type Int = u32; |
267 | |
268 | const INFINITY: Self = f32::INFINITY; |
269 | const NEG_INFINITY: Self = f32::NEG_INFINITY; |
270 | const NAN: Self = f32::NAN; |
271 | const NEG_NAN: Self = -f32::NAN; |
272 | |
273 | const BITS: u32 = 32; |
274 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; |
275 | const EXP_MASK: Self::Int = Self::EXP_MASK; |
276 | const SIG_MASK: Self::Int = Self::MAN_MASK; |
277 | |
278 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17; |
279 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10; |
280 | const SMALLEST_POWER_OF_TEN: i32 = -65; |
281 | |
282 | #[inline ] |
283 | fn from_u64(v: u64) -> Self { |
284 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); |
285 | v as _ |
286 | } |
287 | |
288 | #[inline ] |
289 | fn from_u64_bits(v: u64) -> Self { |
290 | f32::from_bits((v & 0xFFFFFFFF) as u32) |
291 | } |
292 | |
293 | fn pow10_fast_path(exponent: usize) -> Self { |
294 | #[allow (clippy::use_self)] |
295 | const TABLE: [f32; 16] = |
296 | [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.]; |
297 | TABLE[exponent & 15] |
298 | } |
299 | |
300 | fn to_bits(self) -> Self::Int { |
301 | self.to_bits() |
302 | } |
303 | |
304 | fn classify(self) -> FpCategory { |
305 | self.classify() |
306 | } |
307 | } |
308 | |
309 | impl RawFloat for f64 { |
310 | type Int = u64; |
311 | |
312 | const INFINITY: Self = Self::INFINITY; |
313 | const NEG_INFINITY: Self = Self::NEG_INFINITY; |
314 | const NAN: Self = Self::NAN; |
315 | const NEG_NAN: Self = -Self::NAN; |
316 | |
317 | const BITS: u32 = 64; |
318 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; |
319 | const EXP_MASK: Self::Int = Self::EXP_MASK; |
320 | const SIG_MASK: Self::Int = Self::MAN_MASK; |
321 | |
322 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4; |
323 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23; |
324 | const SMALLEST_POWER_OF_TEN: i32 = -342; |
325 | |
326 | #[inline ] |
327 | fn from_u64(v: u64) -> Self { |
328 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); |
329 | v as _ |
330 | } |
331 | |
332 | #[inline ] |
333 | fn from_u64_bits(v: u64) -> Self { |
334 | f64::from_bits(v) |
335 | } |
336 | |
337 | fn pow10_fast_path(exponent: usize) -> Self { |
338 | const TABLE: [f64; 32] = [ |
339 | 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, |
340 | 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0., |
341 | ]; |
342 | TABLE[exponent & 31] |
343 | } |
344 | |
345 | fn to_bits(self) -> Self::Int { |
346 | self.to_bits() |
347 | } |
348 | |
349 | fn classify(self) -> FpCategory { |
350 | self.classify() |
351 | } |
352 | } |
353 | |