1 | // Adapted from https://github.com/Alexhuszagh/rust-lexical. |
2 | |
3 | // FLOAT TYPE |
4 | |
5 | use super::num::*; |
6 | use super::rounding::*; |
7 | use super::shift::*; |
8 | |
9 | /// Extended precision floating-point type. |
10 | /// |
11 | /// Private implementation, exposed only for testing purposes. |
12 | #[doc (hidden)] |
13 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
14 | pub(crate) struct ExtendedFloat { |
15 | /// Mantissa for the extended-precision float. |
16 | pub mant: u64, |
17 | /// Binary exponent for the extended-precision float. |
18 | pub exp: i32, |
19 | } |
20 | |
21 | impl ExtendedFloat { |
22 | // PROPERTIES |
23 | |
24 | // OPERATIONS |
25 | |
26 | /// Multiply two normalized extended-precision floats, as if by `a*b`. |
27 | /// |
28 | /// The precision is maximal when the numbers are normalized, however, |
29 | /// decent precision will occur as long as both values have high bits |
30 | /// set. The result is not normalized. |
31 | /// |
32 | /// Algorithm: |
33 | /// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input). |
34 | /// 2. Normalization of the result (not done here). |
35 | /// 3. Addition of exponents. |
36 | pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat { |
37 | // Logic check, values must be decently normalized prior to multiplication. |
38 | debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0)); |
39 | |
40 | // Extract high-and-low masks. |
41 | let ah = self.mant >> u64::HALF; |
42 | let al = self.mant & u64::LOMASK; |
43 | let bh = b.mant >> u64::HALF; |
44 | let bl = b.mant & u64::LOMASK; |
45 | |
46 | // Get our products |
47 | let ah_bl = ah * bl; |
48 | let al_bh = al * bh; |
49 | let al_bl = al * bl; |
50 | let ah_bh = ah * bh; |
51 | |
52 | let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF); |
53 | // round up |
54 | tmp += 1 << (u64::HALF - 1); |
55 | |
56 | ExtendedFloat { |
57 | mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF), |
58 | exp: self.exp + b.exp + u64::FULL, |
59 | } |
60 | } |
61 | |
62 | /// Multiply in-place, as if by `a*b`. |
63 | /// |
64 | /// The result is not normalized. |
65 | #[inline ] |
66 | pub(crate) fn imul(&mut self, b: &ExtendedFloat) { |
67 | *self = self.mul(b); |
68 | } |
69 | |
70 | // NORMALIZE |
71 | |
72 | /// Normalize float-point number. |
73 | /// |
74 | /// Shift the mantissa so the number of leading zeros is 0, or the value |
75 | /// itself is 0. |
76 | /// |
77 | /// Get the number of bytes shifted. |
78 | #[inline ] |
79 | pub(crate) fn normalize(&mut self) -> u32 { |
80 | // Note: |
81 | // Using the cltz intrinsic via leading_zeros is way faster (~10x) |
82 | // than shifting 1-bit at a time, via while loop, and also way |
83 | // faster (~2x) than an unrolled loop that checks at 32, 16, 4, |
84 | // 2, and 1 bit. |
85 | // |
86 | // Using a modulus of pow2 (which will get optimized to a bitwise |
87 | // and with 0x3F or faster) is slightly slower than an if/then, |
88 | // however, removing the if/then will likely optimize more branched |
89 | // code as it removes conditional logic. |
90 | |
91 | // Calculate the number of leading zeros, and then zero-out |
92 | // any overflowing bits, to avoid shl overflow when self.mant == 0. |
93 | let shift = if self.mant == 0 { |
94 | 0 |
95 | } else { |
96 | self.mant.leading_zeros() |
97 | }; |
98 | shl(self, shift as i32); |
99 | shift |
100 | } |
101 | |
102 | // ROUND |
103 | |
104 | /// Lossy round float-point number to native mantissa boundaries. |
105 | #[inline ] |
106 | pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm) |
107 | where |
108 | F: Float, |
109 | Algorithm: FnOnce(&mut ExtendedFloat, i32), |
110 | { |
111 | round_to_native::<F, _>(self, algorithm); |
112 | } |
113 | |
114 | // FROM |
115 | |
116 | /// Create extended float from native float. |
117 | #[inline ] |
118 | pub fn from_float<F: Float>(f: F) -> ExtendedFloat { |
119 | from_float(f) |
120 | } |
121 | |
122 | // INTO |
123 | |
124 | /// Convert into default-rounded, lower-precision native float. |
125 | #[inline ] |
126 | pub(crate) fn into_float<F: Float>(mut self) -> F { |
127 | self.round_to_native::<F, _>(round_nearest_tie_even); |
128 | into_float(self) |
129 | } |
130 | |
131 | /// Convert into downward-rounded, lower-precision native float. |
132 | #[inline ] |
133 | pub(crate) fn into_downward_float<F: Float>(mut self) -> F { |
134 | self.round_to_native::<F, _>(round_downward); |
135 | into_float(self) |
136 | } |
137 | } |
138 | |
139 | // FROM FLOAT |
140 | |
141 | // Import ExtendedFloat from native float. |
142 | #[inline ] |
143 | pub(crate) fn from_float<F>(f: F) -> ExtendedFloat |
144 | where |
145 | F: Float, |
146 | { |
147 | ExtendedFloat { |
148 | mant: u64::as_cast(f.mantissa()), |
149 | exp: f.exponent(), |
150 | } |
151 | } |
152 | |
153 | // INTO FLOAT |
154 | |
155 | // Export extended-precision float to native float. |
156 | // |
157 | // The extended-precision float must be in native float representation, |
158 | // with overflow/underflow appropriately handled. |
159 | #[inline ] |
160 | pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F |
161 | where |
162 | F: Float, |
163 | { |
164 | // Export floating-point number. |
165 | if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT { |
166 | // sub-denormal, underflow |
167 | F::ZERO |
168 | } else if fp.exp >= F::MAX_EXPONENT { |
169 | // overflow |
170 | F::from_bits(F::INFINITY_BITS) |
171 | } else { |
172 | // calculate the exp and fraction bits, and return a float from bits. |
173 | let exp: u64; |
174 | if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 { |
175 | exp = 0; |
176 | } else { |
177 | exp = (fp.exp + F::EXPONENT_BIAS) as u64; |
178 | } |
179 | let exp = exp << F::MANTISSA_SIZE; |
180 | let mant = fp.mant & F::MANTISSA_MASK.as_u64(); |
181 | F::from_bits(F::Unsigned::as_cast(mant | exp)) |
182 | } |
183 | } |
184 | |