1use crate::float::Float;
2use crate::int::{CastInto, DInt, HInt, Int, MinInt};
3
4fn mul<F: Float>(a: F, b: F) -> F
5where
6 u32: CastInto<F::Int>,
7 F::Int: CastInto<u32>,
8 i32: CastInto<F::Int>,
9 F::Int: CastInto<i32>,
10 F::Int: HInt,
11{
12 let one = F::Int::ONE;
13 let zero = F::Int::ZERO;
14
15 let bits = F::BITS;
16 let significand_bits = F::SIG_BITS;
17 let max_exponent = F::EXP_SAT;
18
19 let exponent_bias = F::EXP_BIAS;
20
21 let implicit_bit = F::IMPLICIT_BIT;
22 let significand_mask = F::SIG_MASK;
23 let sign_bit = F::SIGN_MASK;
24 let abs_mask = sign_bit - one;
25 let exponent_mask = F::EXP_MASK;
26 let inf_rep = exponent_mask;
27 let quiet_bit = implicit_bit >> 1;
28 let qnan_rep = exponent_mask | quiet_bit;
29 let exponent_bits = F::EXP_BITS;
30
31 let a_rep = a.to_bits();
32 let b_rep = b.to_bits();
33
34 let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
35 let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
36 let product_sign = (a_rep ^ b_rep) & sign_bit;
37
38 let mut a_significand = a_rep & significand_mask;
39 let mut b_significand = b_rep & significand_mask;
40 let mut scale = 0;
41
42 // Detect if a or b is zero, denormal, infinity, or NaN.
43 if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
44 || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
45 {
46 let a_abs = a_rep & abs_mask;
47 let b_abs = b_rep & abs_mask;
48
49 // NaN + anything = qNaN
50 if a_abs > inf_rep {
51 return F::from_bits(a_rep | quiet_bit);
52 }
53 // anything + NaN = qNaN
54 if b_abs > inf_rep {
55 return F::from_bits(b_rep | quiet_bit);
56 }
57
58 if a_abs == inf_rep {
59 if b_abs != zero {
60 // infinity * non-zero = +/- infinity
61 return F::from_bits(a_abs | product_sign);
62 } else {
63 // infinity * zero = NaN
64 return F::from_bits(qnan_rep);
65 }
66 }
67
68 if b_abs == inf_rep {
69 if a_abs != zero {
70 // infinity * non-zero = +/- infinity
71 return F::from_bits(b_abs | product_sign);
72 } else {
73 // infinity * zero = NaN
74 return F::from_bits(qnan_rep);
75 }
76 }
77
78 // zero * anything = +/- zero
79 if a_abs == zero {
80 return F::from_bits(product_sign);
81 }
82
83 // anything * zero = +/- zero
84 if b_abs == zero {
85 return F::from_bits(product_sign);
86 }
87
88 // one or both of a or b is denormal, the other (if applicable) is a
89 // normal number. Renormalize one or both of a and b, and set scale to
90 // include the necessary exponent adjustment.
91 if a_abs < implicit_bit {
92 let (exponent, significand) = F::normalize(a_significand);
93 scale += exponent;
94 a_significand = significand;
95 }
96
97 if b_abs < implicit_bit {
98 let (exponent, significand) = F::normalize(b_significand);
99 scale += exponent;
100 b_significand = significand;
101 }
102 }
103
104 // Or in the implicit significand bit. (If we fell through from the
105 // denormal path it was already set by normalize( ), but setting it twice
106 // won't hurt anything.)
107 a_significand |= implicit_bit;
108 b_significand |= implicit_bit;
109
110 // Get the significand of a*b. Before multiplying the significands, shift
111 // one of them left to left-align it in the field. Thus, the product will
112 // have (exponentBits + 2) integral digits, all but two of which must be
113 // zero. Normalizing this result is just a conditional left-shift by one
114 // and bumping the exponent accordingly.
115 let (mut product_low, mut product_high) = a_significand
116 .widen_mul(b_significand << exponent_bits)
117 .lo_hi();
118
119 let a_exponent_i32: i32 = a_exponent.cast();
120 let b_exponent_i32: i32 = b_exponent.cast();
121 let mut product_exponent: i32 = a_exponent_i32
122 .wrapping_add(b_exponent_i32)
123 .wrapping_add(scale)
124 .wrapping_sub(exponent_bias as i32);
125
126 // Normalize the significand, adjust exponent if needed.
127 if (product_high & implicit_bit) != zero {
128 product_exponent = product_exponent.wrapping_add(1);
129 } else {
130 product_high = (product_high << 1) | (product_low >> (bits - 1));
131 product_low <<= 1;
132 }
133
134 // If we have overflowed the type, return +/- infinity.
135 if product_exponent >= max_exponent as i32 {
136 return F::from_bits(inf_rep | product_sign);
137 }
138
139 if product_exponent <= 0 {
140 // Result is denormal before rounding
141 //
142 // If the result is so small that it just underflows to zero, return
143 // a zero of the appropriate sign. Mathematically there is no need to
144 // handle this case separately, but we make it a special case to
145 // simplify the shift logic.
146 let shift = one.wrapping_sub(product_exponent.cast()).cast();
147 if shift >= bits {
148 return F::from_bits(product_sign);
149 }
150
151 // Otherwise, shift the significand of the result so that the round
152 // bit is the high bit of `product_low`.
153 // Ensure one of the non-highest bits in `product_low` is set if the shifted out bit are
154 // not all zero so that the result is correctly rounded below.
155 let sticky = product_low << (bits - shift) != zero;
156 product_low =
157 (product_high << (bits - shift)) | (product_low >> shift) | (sticky as u32).cast();
158 product_high >>= shift;
159 } else {
160 // Result is normal before rounding; insert the exponent.
161 product_high &= significand_mask;
162 product_high |= product_exponent.cast() << significand_bits;
163 }
164
165 // Insert the sign of the result:
166 product_high |= product_sign;
167
168 // Final rounding. The final result may overflow to infinity, or underflow
169 // to zero, but those are the correct results in those cases. We use the
170 // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
171 if product_low > sign_bit {
172 product_high += one;
173 }
174
175 if product_low == sign_bit {
176 product_high += product_high & one;
177 }
178
179 F::from_bits(product_high)
180}
181
182intrinsics! {
183 #[avr_skip]
184 #[aapcs_on_arm]
185 #[arm_aeabi_alias = __aeabi_fmul]
186 pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
187 mul(a, b)
188 }
189
190 #[avr_skip]
191 #[aapcs_on_arm]
192 #[arm_aeabi_alias = __aeabi_dmul]
193 pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
194 mul(a, b)
195 }
196
197 #[ppc_alias = __mulkf3]
198 #[cfg(f128_enabled)]
199 pub extern "C" fn __multf3(a: f128, b: f128) -> f128 {
200 mul(a, b)
201 }
202}
203