| 1 | //! Helper trait for generic float types. | 
| 2 |  | 
|---|
| 3 | use core::f64; | 
|---|
| 4 |  | 
|---|
| 5 | use crate::fmt::{Debug, LowerExp}; | 
|---|
| 6 | use crate::num::FpCategory; | 
|---|
| 7 | use crate::ops::{self, Add, Div, Mul, Neg}; | 
|---|
| 8 |  | 
|---|
| 9 | /// Lossy `as` casting between two types. | 
|---|
| 10 | pub trait CastInto<T: Copy>: Copy { | 
|---|
| 11 | fn cast(self) -> T; | 
|---|
| 12 | } | 
|---|
| 13 |  | 
|---|
| 14 | /// Collection of traits that allow us to be generic over integer size. | 
|---|
| 15 | pub trait Integer: | 
|---|
| 16 | Sized | 
|---|
| 17 | + Clone | 
|---|
| 18 | + Copy | 
|---|
| 19 | + Debug | 
|---|
| 20 | + ops::Shr<u32, Output = Self> | 
|---|
| 21 | + ops::Shl<u32, Output = Self> | 
|---|
| 22 | + ops::BitAnd<Output = Self> | 
|---|
| 23 | + ops::BitOr<Output = Self> | 
|---|
| 24 | + PartialEq | 
|---|
| 25 | + CastInto<i16> | 
|---|
| 26 | { | 
|---|
| 27 | const ZERO: Self; | 
|---|
| 28 | const ONE: Self; | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | macro_rules! int { | 
|---|
| 32 | ($($ty:ty),+) => { | 
|---|
| 33 | $( | 
|---|
| 34 | impl CastInto<i16> for $ty { | 
|---|
| 35 | fn cast(self) -> i16 { | 
|---|
| 36 | self as i16 | 
|---|
| 37 | } | 
|---|
| 38 | } | 
|---|
| 39 |  | 
|---|
| 40 | impl Integer for $ty { | 
|---|
| 41 | const ZERO: Self = 0; | 
|---|
| 42 | const ONE: Self = 1; | 
|---|
| 43 | } | 
|---|
| 44 | )+ | 
|---|
| 45 | } | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | int!(u16, u32, u64); | 
|---|
| 49 |  | 
|---|
| 50 | /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats. | 
|---|
| 51 | /// | 
|---|
| 52 | /// See the parent module's doc comment for why this is necessary. | 
|---|
| 53 | /// | 
|---|
| 54 | /// Should **never ever** be implemented for other types or be used outside the `dec2flt` module. | 
|---|
| 55 | #[ doc(hidden)] | 
|---|
| 56 | pub trait RawFloat: | 
|---|
| 57 | Sized | 
|---|
| 58 | + Div<Output = Self> | 
|---|
| 59 | + Neg<Output = Self> | 
|---|
| 60 | + Mul<Output = Self> | 
|---|
| 61 | + Add<Output = Self> | 
|---|
| 62 | + LowerExp | 
|---|
| 63 | + PartialEq | 
|---|
| 64 | + PartialOrd | 
|---|
| 65 | + Default | 
|---|
| 66 | + Clone | 
|---|
| 67 | + Copy | 
|---|
| 68 | + Debug | 
|---|
| 69 | { | 
|---|
| 70 | /// The unsigned integer with the same size as the float | 
|---|
| 71 | type Int: Integer + Into<u64>; | 
|---|
| 72 |  | 
|---|
| 73 | /* general constants */ | 
|---|
| 74 |  | 
|---|
| 75 | const INFINITY: Self; | 
|---|
| 76 | const NEG_INFINITY: Self; | 
|---|
| 77 | const NAN: Self; | 
|---|
| 78 | const NEG_NAN: Self; | 
|---|
| 79 |  | 
|---|
| 80 | /// Bit width of the float | 
|---|
| 81 | const BITS: u32; | 
|---|
| 82 |  | 
|---|
| 83 | /// The number of bits in the significand, *including* the hidden bit. | 
|---|
| 84 | const SIG_TOTAL_BITS: u32; | 
|---|
| 85 |  | 
|---|
| 86 | const EXP_MASK: Self::Int; | 
|---|
| 87 | const SIG_MASK: Self::Int; | 
|---|
| 88 |  | 
|---|
| 89 | /// The number of bits in the significand, *excluding* the hidden bit. | 
|---|
| 90 | const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1; | 
|---|
| 91 |  | 
|---|
| 92 | /// Number of bits in the exponent. | 
|---|
| 93 | const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1; | 
|---|
| 94 |  | 
|---|
| 95 | /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite | 
|---|
| 96 | /// representation. | 
|---|
| 97 | /// | 
|---|
| 98 | /// This shifted fully right, use `EXP_MASK` for the shifted value. | 
|---|
| 99 | const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1; | 
|---|
| 100 |  | 
|---|
| 101 | /// Signed version of `EXP_SAT` since we convert a lot. | 
|---|
| 102 | const INFINITE_POWER: i32 = Self::EXP_SAT as i32; | 
|---|
| 103 |  | 
|---|
| 104 | /// The exponent bias value. This is also the maximum value of the exponent. | 
|---|
| 105 | const EXP_BIAS: u32 = Self::EXP_SAT >> 1; | 
|---|
| 106 |  | 
|---|
| 107 | /// Minimum exponent value of normal values. | 
|---|
| 108 | const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1); | 
|---|
| 109 |  | 
|---|
| 110 | /// Round-to-even only happens for negative values of q | 
|---|
| 111 | /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in | 
|---|
| 112 | /// the 32-bitcase. | 
|---|
| 113 | /// | 
|---|
| 114 | /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we | 
|---|
| 115 | /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have | 
|---|
| 116 | /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10. | 
|---|
| 117 | /// | 
|---|
| 118 | /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64 | 
|---|
| 119 | /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case) | 
|---|
| 120 | /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64 | 
|---|
| 121 | /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11 | 
|---|
| 122 | /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase). | 
|---|
| 123 | /// | 
|---|
| 124 | /// Thus we have that we only need to round ties to even when | 
|---|
| 125 | /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10] | 
|---|
| 126 | /// (in the 32-bit case). In both cases,the power of five(5^|q|) | 
|---|
| 127 | /// fits in a 64-bit word. | 
|---|
| 128 | const MIN_EXPONENT_ROUND_TO_EVEN: i32; | 
|---|
| 129 | const MAX_EXPONENT_ROUND_TO_EVEN: i32; | 
|---|
| 130 |  | 
|---|
| 131 | /* limits related to Fast pathing */ | 
|---|
| 132 |  | 
|---|
| 133 | /// Largest decimal exponent for a non-infinite value. | 
|---|
| 134 | /// | 
|---|
| 135 | /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast | 
|---|
| 136 | /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity. | 
|---|
| 137 | const LARGEST_POWER_OF_TEN: i32 = { | 
|---|
| 138 | let largest_pow2 = Self::EXP_BIAS + 1; | 
|---|
| 139 | pow2_to_pow10(largest_pow2 as i64) as i32 | 
|---|
| 140 | }; | 
|---|
| 141 |  | 
|---|
| 142 | /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything | 
|---|
| 143 | /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero. | 
|---|
| 144 | /// | 
|---|
| 145 | /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is | 
|---|
| 146 | /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values | 
|---|
| 147 | /// that are representable by the intermediate storage format. I don't actually know _why_ | 
|---|
| 148 | /// the storage format is relevant here. | 
|---|
| 149 | /// | 
|---|
| 150 | /// The values may be calculated using the formula. Unfortunately we cannot calculate them at | 
|---|
| 151 | /// compile time since intermediates exceed the range of an `f64`. | 
|---|
| 152 | const SMALLEST_POWER_OF_TEN: i32; | 
|---|
| 153 |  | 
|---|
| 154 | /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋` | 
|---|
| 155 | // assuming FLT_EVAL_METHOD = 0 | 
|---|
| 156 | const MAX_EXPONENT_FAST_PATH: i64 = { | 
|---|
| 157 | let log2_5 = f64::consts::LOG2_10 - 1.0; | 
|---|
| 158 | (Self::SIG_TOTAL_BITS as f64 / log2_5) as i64 | 
|---|
| 159 | }; | 
|---|
| 160 |  | 
|---|
| 161 | /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋` | 
|---|
| 162 | const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH; | 
|---|
| 163 |  | 
|---|
| 164 | /// Maximum exponent that can be represented for a disguised-fast path case. | 
|---|
| 165 | /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋` | 
|---|
| 166 | const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = | 
|---|
| 167 | Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64; | 
|---|
| 168 |  | 
|---|
| 169 | /// Maximum mantissa for the fast-path (`1 << 53` for f64). | 
|---|
| 170 | const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS; | 
|---|
| 171 |  | 
|---|
| 172 | /// Converts integer into float through an as cast. | 
|---|
| 173 | /// This is only called in the fast-path algorithm, and therefore | 
|---|
| 174 | /// will not lose precision, since the value will always have | 
|---|
| 175 | /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH. | 
|---|
| 176 | fn from_u64(v: u64) -> Self; | 
|---|
| 177 |  | 
|---|
| 178 | /// Performs a raw transmutation from an integer. | 
|---|
| 179 | fn from_u64_bits(v: u64) -> Self; | 
|---|
| 180 |  | 
|---|
| 181 | /// Gets a small power-of-ten for fast-path multiplication. | 
|---|
| 182 | fn pow10_fast_path(exponent: usize) -> Self; | 
|---|
| 183 |  | 
|---|
| 184 | /// Returns the category that this number falls into. | 
|---|
| 185 | fn classify(self) -> FpCategory; | 
|---|
| 186 |  | 
|---|
| 187 | /// Transmute to the integer representation | 
|---|
| 188 | fn to_bits(self) -> Self::Int; | 
|---|
| 189 |  | 
|---|
| 190 | /// Returns the mantissa, exponent and sign as integers. | 
|---|
| 191 | /// | 
|---|
| 192 | /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the | 
|---|
| 193 | /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For | 
|---|
| 194 | /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`. | 
|---|
| 195 | /// | 
|---|
| 196 | /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned | 
|---|
| 197 | /// with the explicit bit set but otherwise unshifted | 
|---|
| 198 | /// | 
|---|
| 199 | /// `s` is only ever +/-1. | 
|---|
| 200 | fn integer_decode(self) -> (u64, i16, i8) { | 
|---|
| 201 | let bits = self.to_bits(); | 
|---|
| 202 | let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 }; | 
|---|
| 203 | let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast(); | 
|---|
| 204 | let mantissa = if exponent == 0 { | 
|---|
| 205 | (bits & Self::SIG_MASK) << 1 | 
|---|
| 206 | } else { | 
|---|
| 207 | (bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS) | 
|---|
| 208 | }; | 
|---|
| 209 | // Exponent bias + mantissa shift | 
|---|
| 210 | exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16; | 
|---|
| 211 | (mantissa.into(), exponent, sign) | 
|---|
| 212 | } | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | /// Solve for `b` in `10^b = 2^a` | 
|---|
| 216 | const fn pow2_to_pow10(a: i64) -> i64 { | 
|---|
| 217 | let res: f64 = (a as f64) / f64::consts::LOG2_10; | 
|---|
| 218 | res as i64 | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | #[ cfg(target_has_reliable_f16)] | 
|---|
| 222 | impl RawFloat for f16 { | 
|---|
| 223 | type Int = u16; | 
|---|
| 224 |  | 
|---|
| 225 | const INFINITY: Self = Self::INFINITY; | 
|---|
| 226 | const NEG_INFINITY: Self = Self::NEG_INFINITY; | 
|---|
| 227 | const NAN: Self = Self::NAN; | 
|---|
| 228 | const NEG_NAN: Self = -Self::NAN; | 
|---|
| 229 |  | 
|---|
| 230 | const BITS: u32 = 16; | 
|---|
| 231 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; | 
|---|
| 232 | const EXP_MASK: Self::Int = Self::EXP_MASK; | 
|---|
| 233 | const SIG_MASK: Self::Int = Self::MAN_MASK; | 
|---|
| 234 |  | 
|---|
| 235 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22; | 
|---|
| 236 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5; | 
|---|
| 237 | const SMALLEST_POWER_OF_TEN: i32 = -27; | 
|---|
| 238 |  | 
|---|
| 239 | #[ inline] | 
|---|
| 240 | fn from_u64(v: u64) -> Self { | 
|---|
| 241 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); | 
|---|
| 242 | v as _ | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | #[ inline] | 
|---|
| 246 | fn from_u64_bits(v: u64) -> Self { | 
|---|
| 247 | Self::from_bits((v & 0xFFFF) as u16) | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | fn pow10_fast_path(exponent: usize) -> Self { | 
|---|
| 251 | #[ allow(clippy::use_self)] | 
|---|
| 252 | const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.]; | 
|---|
| 253 | TABLE[exponent & 7] | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | fn to_bits(self) -> Self::Int { | 
|---|
| 257 | self.to_bits() | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | fn classify(self) -> FpCategory { | 
|---|
| 261 | self.classify() | 
|---|
| 262 | } | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | impl RawFloat for f32 { | 
|---|
| 266 | type Int = u32; | 
|---|
| 267 |  | 
|---|
| 268 | const INFINITY: Self = f32::INFINITY; | 
|---|
| 269 | const NEG_INFINITY: Self = f32::NEG_INFINITY; | 
|---|
| 270 | const NAN: Self = f32::NAN; | 
|---|
| 271 | const NEG_NAN: Self = -f32::NAN; | 
|---|
| 272 |  | 
|---|
| 273 | const BITS: u32 = 32; | 
|---|
| 274 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; | 
|---|
| 275 | const EXP_MASK: Self::Int = Self::EXP_MASK; | 
|---|
| 276 | const SIG_MASK: Self::Int = Self::MAN_MASK; | 
|---|
| 277 |  | 
|---|
| 278 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17; | 
|---|
| 279 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10; | 
|---|
| 280 | const SMALLEST_POWER_OF_TEN: i32 = -65; | 
|---|
| 281 |  | 
|---|
| 282 | #[ inline] | 
|---|
| 283 | fn from_u64(v: u64) -> Self { | 
|---|
| 284 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); | 
|---|
| 285 | v as _ | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | #[ inline] | 
|---|
| 289 | fn from_u64_bits(v: u64) -> Self { | 
|---|
| 290 | f32::from_bits((v & 0xFFFFFFFF) as u32) | 
|---|
| 291 | } | 
|---|
| 292 |  | 
|---|
| 293 | fn pow10_fast_path(exponent: usize) -> Self { | 
|---|
| 294 | #[ allow(clippy::use_self)] | 
|---|
| 295 | const TABLE: [f32; 16] = | 
|---|
| 296 | [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.]; | 
|---|
| 297 | TABLE[exponent & 15] | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | fn to_bits(self) -> Self::Int { | 
|---|
| 301 | self.to_bits() | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | fn classify(self) -> FpCategory { | 
|---|
| 305 | self.classify() | 
|---|
| 306 | } | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | impl RawFloat for f64 { | 
|---|
| 310 | type Int = u64; | 
|---|
| 311 |  | 
|---|
| 312 | const INFINITY: Self = Self::INFINITY; | 
|---|
| 313 | const NEG_INFINITY: Self = Self::NEG_INFINITY; | 
|---|
| 314 | const NAN: Self = Self::NAN; | 
|---|
| 315 | const NEG_NAN: Self = -Self::NAN; | 
|---|
| 316 |  | 
|---|
| 317 | const BITS: u32 = 64; | 
|---|
| 318 | const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS; | 
|---|
| 319 | const EXP_MASK: Self::Int = Self::EXP_MASK; | 
|---|
| 320 | const SIG_MASK: Self::Int = Self::MAN_MASK; | 
|---|
| 321 |  | 
|---|
| 322 | const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4; | 
|---|
| 323 | const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23; | 
|---|
| 324 | const SMALLEST_POWER_OF_TEN: i32 = -342; | 
|---|
| 325 |  | 
|---|
| 326 | #[ inline] | 
|---|
| 327 | fn from_u64(v: u64) -> Self { | 
|---|
| 328 | debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH); | 
|---|
| 329 | v as _ | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 | #[ inline] | 
|---|
| 333 | fn from_u64_bits(v: u64) -> Self { | 
|---|
| 334 | f64::from_bits(v) | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | fn pow10_fast_path(exponent: usize) -> Self { | 
|---|
| 338 | const TABLE: [f64; 32] = [ | 
|---|
| 339 | 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, | 
|---|
| 340 | 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0., | 
|---|
| 341 | ]; | 
|---|
| 342 | TABLE[exponent & 31] | 
|---|
| 343 | } | 
|---|
| 344 |  | 
|---|
| 345 | fn to_bits(self) -> Self::Int { | 
|---|
| 346 | self.to_bits() | 
|---|
| 347 | } | 
|---|
| 348 |  | 
|---|
| 349 | fn classify(self) -> FpCategory { | 
|---|
| 350 | self.classify() | 
|---|
| 351 | } | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|