1 | use crate::float::Float; |
2 | use crate::int::{CastInto, DInt, HInt, Int}; |
3 | |
4 | fn mul<F: Float>(a: F, b: F) -> F |
5 | where |
6 | u32: CastInto<F::Int>, |
7 | F::Int: CastInto<u32>, |
8 | i32: CastInto<F::Int>, |
9 | F::Int: CastInto<i32>, |
10 | F::Int: HInt, |
11 | { |
12 | let one = F::Int::ONE; |
13 | let zero = F::Int::ZERO; |
14 | |
15 | let bits = F::BITS; |
16 | let significand_bits = F::SIGNIFICAND_BITS; |
17 | let max_exponent = F::EXPONENT_MAX; |
18 | |
19 | let exponent_bias = F::EXPONENT_BIAS; |
20 | |
21 | let implicit_bit = F::IMPLICIT_BIT; |
22 | let significand_mask = F::SIGNIFICAND_MASK; |
23 | let sign_bit = F::SIGN_MASK as F::Int; |
24 | let abs_mask = sign_bit - one; |
25 | let exponent_mask = F::EXPONENT_MASK; |
26 | let inf_rep = exponent_mask; |
27 | let quiet_bit = implicit_bit >> 1; |
28 | let qnan_rep = exponent_mask | quiet_bit; |
29 | let exponent_bits = F::EXPONENT_BITS; |
30 | |
31 | let a_rep = a.repr(); |
32 | let b_rep = b.repr(); |
33 | |
34 | let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); |
35 | let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); |
36 | let product_sign = (a_rep ^ b_rep) & sign_bit; |
37 | |
38 | let mut a_significand = a_rep & significand_mask; |
39 | let mut b_significand = b_rep & significand_mask; |
40 | let mut scale = 0; |
41 | |
42 | // Detect if a or b is zero, denormal, infinity, or NaN. |
43 | if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() |
44 | || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() |
45 | { |
46 | let a_abs = a_rep & abs_mask; |
47 | let b_abs = b_rep & abs_mask; |
48 | |
49 | // NaN + anything = qNaN |
50 | if a_abs > inf_rep { |
51 | return F::from_repr(a_rep | quiet_bit); |
52 | } |
53 | // anything + NaN = qNaN |
54 | if b_abs > inf_rep { |
55 | return F::from_repr(b_rep | quiet_bit); |
56 | } |
57 | |
58 | if a_abs == inf_rep { |
59 | if b_abs != zero { |
60 | // infinity * non-zero = +/- infinity |
61 | return F::from_repr(a_abs | product_sign); |
62 | } else { |
63 | // infinity * zero = NaN |
64 | return F::from_repr(qnan_rep); |
65 | } |
66 | } |
67 | |
68 | if b_abs == inf_rep { |
69 | if a_abs != zero { |
70 | // infinity * non-zero = +/- infinity |
71 | return F::from_repr(b_abs | product_sign); |
72 | } else { |
73 | // infinity * zero = NaN |
74 | return F::from_repr(qnan_rep); |
75 | } |
76 | } |
77 | |
78 | // zero * anything = +/- zero |
79 | if a_abs == zero { |
80 | return F::from_repr(product_sign); |
81 | } |
82 | |
83 | // anything * zero = +/- zero |
84 | if b_abs == zero { |
85 | return F::from_repr(product_sign); |
86 | } |
87 | |
88 | // one or both of a or b is denormal, the other (if applicable) is a |
89 | // normal number. Renormalize one or both of a and b, and set scale to |
90 | // include the necessary exponent adjustment. |
91 | if a_abs < implicit_bit { |
92 | let (exponent, significand) = F::normalize(a_significand); |
93 | scale += exponent; |
94 | a_significand = significand; |
95 | } |
96 | |
97 | if b_abs < implicit_bit { |
98 | let (exponent, significand) = F::normalize(b_significand); |
99 | scale += exponent; |
100 | b_significand = significand; |
101 | } |
102 | } |
103 | |
104 | // Or in the implicit significand bit. (If we fell through from the |
105 | // denormal path it was already set by normalize( ), but setting it twice |
106 | // won't hurt anything.) |
107 | a_significand |= implicit_bit; |
108 | b_significand |= implicit_bit; |
109 | |
110 | // Get the significand of a*b. Before multiplying the significands, shift |
111 | // one of them left to left-align it in the field. Thus, the product will |
112 | // have (exponentBits + 2) integral digits, all but two of which must be |
113 | // zero. Normalizing this result is just a conditional left-shift by one |
114 | // and bumping the exponent accordingly. |
115 | let (mut product_low, mut product_high) = a_significand |
116 | .widen_mul(b_significand << exponent_bits) |
117 | .lo_hi(); |
118 | |
119 | let a_exponent_i32: i32 = a_exponent.cast(); |
120 | let b_exponent_i32: i32 = b_exponent.cast(); |
121 | let mut product_exponent: i32 = a_exponent_i32 |
122 | .wrapping_add(b_exponent_i32) |
123 | .wrapping_add(scale) |
124 | .wrapping_sub(exponent_bias as i32); |
125 | |
126 | // Normalize the significand, adjust exponent if needed. |
127 | if (product_high & implicit_bit) != zero { |
128 | product_exponent = product_exponent.wrapping_add(1); |
129 | } else { |
130 | product_high = (product_high << 1) | (product_low >> (bits - 1)); |
131 | product_low <<= 1; |
132 | } |
133 | |
134 | // If we have overflowed the type, return +/- infinity. |
135 | if product_exponent >= max_exponent as i32 { |
136 | return F::from_repr(inf_rep | product_sign); |
137 | } |
138 | |
139 | if product_exponent <= 0 { |
140 | // Result is denormal before rounding |
141 | // |
142 | // If the result is so small that it just underflows to zero, return |
143 | // a zero of the appropriate sign. Mathematically there is no need to |
144 | // handle this case separately, but we make it a special case to |
145 | // simplify the shift logic. |
146 | let shift = one.wrapping_sub(product_exponent.cast()).cast(); |
147 | if shift >= bits { |
148 | return F::from_repr(product_sign); |
149 | } |
150 | |
151 | // Otherwise, shift the significand of the result so that the round |
152 | // bit is the high bit of productLo. |
153 | if shift < bits { |
154 | let sticky = product_low << (bits - shift); |
155 | product_low = product_high << (bits - shift) | product_low >> shift | sticky; |
156 | product_high >>= shift; |
157 | } else if shift < (2 * bits) { |
158 | let sticky = product_high << (2 * bits - shift) | product_low; |
159 | product_low = product_high >> (shift - bits) | sticky; |
160 | product_high = zero; |
161 | } else { |
162 | product_high = zero; |
163 | } |
164 | } else { |
165 | // Result is normal before rounding; insert the exponent. |
166 | product_high &= significand_mask; |
167 | product_high |= product_exponent.cast() << significand_bits; |
168 | } |
169 | |
170 | // Insert the sign of the result: |
171 | product_high |= product_sign; |
172 | |
173 | // Final rounding. The final result may overflow to infinity, or underflow |
174 | // to zero, but those are the correct results in those cases. We use the |
175 | // default IEEE-754 round-to-nearest, ties-to-even rounding mode. |
176 | if product_low > sign_bit { |
177 | product_high += one; |
178 | } |
179 | |
180 | if product_low == sign_bit { |
181 | product_high += product_high & one; |
182 | } |
183 | |
184 | F::from_repr(product_high) |
185 | } |
186 | |
187 | intrinsics! { |
188 | #[avr_skip] |
189 | #[aapcs_on_arm] |
190 | #[arm_aeabi_alias = __aeabi_fmul] |
191 | pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { |
192 | mul(a, b) |
193 | } |
194 | |
195 | #[avr_skip] |
196 | #[aapcs_on_arm] |
197 | #[arm_aeabi_alias = __aeabi_dmul] |
198 | pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { |
199 | mul(a, b) |
200 | } |
201 | |
202 | #[cfg (target_arch = "arm" )] |
203 | pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { |
204 | a * b |
205 | } |
206 | |
207 | #[cfg (target_arch = "arm" )] |
208 | pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 { |
209 | a * b |
210 | } |
211 | } |
212 | |