1use crate::float::Float;
2use crate::int::{CastInto, DInt, HInt, Int};
3
4fn mul<F: Float>(a: F, b: F) -> F
5where
6 u32: CastInto<F::Int>,
7 F::Int: CastInto<u32>,
8 i32: CastInto<F::Int>,
9 F::Int: CastInto<i32>,
10 F::Int: HInt,
11{
12 let one = F::Int::ONE;
13 let zero = F::Int::ZERO;
14
15 let bits = F::BITS;
16 let significand_bits = F::SIGNIFICAND_BITS;
17 let max_exponent = F::EXPONENT_MAX;
18
19 let exponent_bias = F::EXPONENT_BIAS;
20
21 let implicit_bit = F::IMPLICIT_BIT;
22 let significand_mask = F::SIGNIFICAND_MASK;
23 let sign_bit = F::SIGN_MASK as F::Int;
24 let abs_mask = sign_bit - one;
25 let exponent_mask = F::EXPONENT_MASK;
26 let inf_rep = exponent_mask;
27 let quiet_bit = implicit_bit >> 1;
28 let qnan_rep = exponent_mask | quiet_bit;
29 let exponent_bits = F::EXPONENT_BITS;
30
31 let a_rep = a.repr();
32 let b_rep = b.repr();
33
34 let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
35 let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
36 let product_sign = (a_rep ^ b_rep) & sign_bit;
37
38 let mut a_significand = a_rep & significand_mask;
39 let mut b_significand = b_rep & significand_mask;
40 let mut scale = 0;
41
42 // Detect if a or b is zero, denormal, infinity, or NaN.
43 if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
44 || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
45 {
46 let a_abs = a_rep & abs_mask;
47 let b_abs = b_rep & abs_mask;
48
49 // NaN + anything = qNaN
50 if a_abs > inf_rep {
51 return F::from_repr(a_rep | quiet_bit);
52 }
53 // anything + NaN = qNaN
54 if b_abs > inf_rep {
55 return F::from_repr(b_rep | quiet_bit);
56 }
57
58 if a_abs == inf_rep {
59 if b_abs != zero {
60 // infinity * non-zero = +/- infinity
61 return F::from_repr(a_abs | product_sign);
62 } else {
63 // infinity * zero = NaN
64 return F::from_repr(qnan_rep);
65 }
66 }
67
68 if b_abs == inf_rep {
69 if a_abs != zero {
70 // infinity * non-zero = +/- infinity
71 return F::from_repr(b_abs | product_sign);
72 } else {
73 // infinity * zero = NaN
74 return F::from_repr(qnan_rep);
75 }
76 }
77
78 // zero * anything = +/- zero
79 if a_abs == zero {
80 return F::from_repr(product_sign);
81 }
82
83 // anything * zero = +/- zero
84 if b_abs == zero {
85 return F::from_repr(product_sign);
86 }
87
88 // one or both of a or b is denormal, the other (if applicable) is a
89 // normal number. Renormalize one or both of a and b, and set scale to
90 // include the necessary exponent adjustment.
91 if a_abs < implicit_bit {
92 let (exponent, significand) = F::normalize(a_significand);
93 scale += exponent;
94 a_significand = significand;
95 }
96
97 if b_abs < implicit_bit {
98 let (exponent, significand) = F::normalize(b_significand);
99 scale += exponent;
100 b_significand = significand;
101 }
102 }
103
104 // Or in the implicit significand bit. (If we fell through from the
105 // denormal path it was already set by normalize( ), but setting it twice
106 // won't hurt anything.)
107 a_significand |= implicit_bit;
108 b_significand |= implicit_bit;
109
110 // Get the significand of a*b. Before multiplying the significands, shift
111 // one of them left to left-align it in the field. Thus, the product will
112 // have (exponentBits + 2) integral digits, all but two of which must be
113 // zero. Normalizing this result is just a conditional left-shift by one
114 // and bumping the exponent accordingly.
115 let (mut product_low, mut product_high) = a_significand
116 .widen_mul(b_significand << exponent_bits)
117 .lo_hi();
118
119 let a_exponent_i32: i32 = a_exponent.cast();
120 let b_exponent_i32: i32 = b_exponent.cast();
121 let mut product_exponent: i32 = a_exponent_i32
122 .wrapping_add(b_exponent_i32)
123 .wrapping_add(scale)
124 .wrapping_sub(exponent_bias as i32);
125
126 // Normalize the significand, adjust exponent if needed.
127 if (product_high & implicit_bit) != zero {
128 product_exponent = product_exponent.wrapping_add(1);
129 } else {
130 product_high = (product_high << 1) | (product_low >> (bits - 1));
131 product_low <<= 1;
132 }
133
134 // If we have overflowed the type, return +/- infinity.
135 if product_exponent >= max_exponent as i32 {
136 return F::from_repr(inf_rep | product_sign);
137 }
138
139 if product_exponent <= 0 {
140 // Result is denormal before rounding
141 //
142 // If the result is so small that it just underflows to zero, return
143 // a zero of the appropriate sign. Mathematically there is no need to
144 // handle this case separately, but we make it a special case to
145 // simplify the shift logic.
146 let shift = one.wrapping_sub(product_exponent.cast()).cast();
147 if shift >= bits {
148 return F::from_repr(product_sign);
149 }
150
151 // Otherwise, shift the significand of the result so that the round
152 // bit is the high bit of productLo.
153 if shift < bits {
154 let sticky = product_low << (bits - shift);
155 product_low = product_high << (bits - shift) | product_low >> shift | sticky;
156 product_high >>= shift;
157 } else if shift < (2 * bits) {
158 let sticky = product_high << (2 * bits - shift) | product_low;
159 product_low = product_high >> (shift - bits) | sticky;
160 product_high = zero;
161 } else {
162 product_high = zero;
163 }
164 } else {
165 // Result is normal before rounding; insert the exponent.
166 product_high &= significand_mask;
167 product_high |= product_exponent.cast() << significand_bits;
168 }
169
170 // Insert the sign of the result:
171 product_high |= product_sign;
172
173 // Final rounding. The final result may overflow to infinity, or underflow
174 // to zero, but those are the correct results in those cases. We use the
175 // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
176 if product_low > sign_bit {
177 product_high += one;
178 }
179
180 if product_low == sign_bit {
181 product_high += product_high & one;
182 }
183
184 F::from_repr(product_high)
185}
186
187intrinsics! {
188 #[avr_skip]
189 #[aapcs_on_arm]
190 #[arm_aeabi_alias = __aeabi_fmul]
191 pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
192 mul(a, b)
193 }
194
195 #[avr_skip]
196 #[aapcs_on_arm]
197 #[arm_aeabi_alias = __aeabi_dmul]
198 pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
199 mul(a, b)
200 }
201
202 #[cfg(target_arch = "arm")]
203 pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 {
204 a * b
205 }
206
207 #[cfg(target_arch = "arm")]
208 pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 {
209 a * b
210 }
211}
212