1/* SPDX-License-Identifier: MIT */
2/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
3
4use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
5use super::{CastFrom, CastInto, Float, Int, MinInt};
6
7/// Fused multiply add (f64)
8///
9/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
10#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
11pub fn fma(x: f64, y: f64, z: f64) -> f64 {
12 select_implementation! {
13 name: fma,
14 use_arch: all(target_arch = "aarch64", target_feature = "neon"),
15 args: x, y, z,
16 }
17
18 fma_round(x, y, z, Round::Nearest).val
19}
20
21/// Fused multiply add (f128)
22///
23/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
24#[cfg(f128_enabled)]
25#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
26pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
27 fma_round(x, y, z, Round::Nearest).val
28}
29
30/// Fused multiply-add that works when there is not a larger float size available. Computes
31/// `(x * y) + z`.
32#[inline]
33pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
34where
35 F: Float,
36 F: CastFrom<F::SignedInt>,
37 F: CastFrom<i8>,
38 F::Int: HInt,
39 u32: CastInto<F::Int>,
40{
41 let one = IntTy::<F>::ONE;
42 let zero = IntTy::<F>::ZERO;
43
44 // Normalize such that the top of the mantissa is zero and we have a guard bit.
45 let nx = Norm::from_float(x);
46 let ny = Norm::from_float(y);
47 let nz = Norm::from_float(z);
48
49 if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
50 // Value will overflow, defer to non-fused operations.
51 return FpResult::ok(x * y + z);
52 }
53
54 if nz.is_zero_nan_inf() {
55 if nz.is_zero() {
56 // Empty add component means we only need to multiply.
57 return FpResult::ok(x * y);
58 }
59 // `z` is NaN or infinity, which sets the result.
60 return FpResult::ok(z);
61 }
62
63 // multiply: r = x * y
64 let zhi: F::Int;
65 let zlo: F::Int;
66 let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
67
68 // Exponent result of multiplication
69 let mut e: i32 = nx.e + ny.e;
70 // Needed shift to align `z` to the multiplication result
71 let mut d: i32 = nz.e - e;
72 let sbits = F::BITS as i32;
73
74 // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz)
75 if d > 0 {
76 // The magnitude of `z` is larger than `x * y`
77 if d < sbits {
78 // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift
79 // it into `(zhi, zlo)`. No exponent adjustment necessary.
80 zlo = nz.m << d;
81 zhi = nz.m >> (sbits - d);
82 } else {
83 // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts
84 // as a shift by `sbits`).
85 zlo = zero;
86 zhi = nz.m;
87 d -= sbits;
88
89 // `z`'s exponent is large enough that it now needs to be taken into account.
90 e = nz.e - sbits;
91
92 if d == 0 {
93 // Exactly `sbits`, nothing to do
94 } else if d < sbits {
95 // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y`
96 rlo = (rhi << (sbits - d)) | (rlo >> d);
97 // Set the sticky bit
98 rlo |= IntTy::<F>::from((rlo << (sbits - d)) != zero);
99 rhi = rhi >> d;
100 } else {
101 // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set
102 // the sticky bit.
103 rlo = one;
104 rhi = zero;
105 }
106 }
107 } else {
108 // `z`'s magnitude once shifted fits entirely within `zlo`
109 zhi = zero;
110 d = -d;
111 if d == 0 {
112 // No shift needed
113 zlo = nz.m;
114 } else if d < sbits {
115 // Shift s.t. `nz.m` fits into `zlo`
116 let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
117 zlo = (nz.m >> d) | sticky;
118 } else {
119 // Would be entirely shifted out, only set the sticky bit
120 zlo = one;
121 }
122 }
123
124 /* addition */
125
126 let mut neg = nx.neg ^ ny.neg;
127 let samesign: bool = !neg ^ nz.neg;
128 let mut rhi_nonzero = true;
129
130 if samesign {
131 // r += z
132 rlo = rlo.wrapping_add(zlo);
133 rhi += zhi + IntTy::<F>::from(rlo < zlo);
134 } else {
135 // r -= z
136 let (res, borrow) = rlo.overflowing_sub(zlo);
137 rlo = res;
138 rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
139 if (rhi >> (F::BITS - 1)) != zero {
140 rlo = rlo.signed().wrapping_neg().unsigned();
141 rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
142 neg = !neg;
143 }
144 rhi_nonzero = rhi != zero;
145 }
146
147 /* Construct result */
148
149 // Shift result into `rhi`, left-aligned. Last bit is sticky
150 if rhi_nonzero {
151 // `d` > 0, need to shift both `rhi` and `rlo` into result
152 e += sbits;
153 d = rhi.leading_zeros() as i32 - 1;
154 rhi = (rhi << d) | (rlo >> (sbits - d));
155 // Update sticky
156 rhi |= IntTy::<F>::from((rlo << d) != zero);
157 } else if rlo != zero {
158 // `rhi` is zero, `rlo` is the entire result and needs to be shifted
159 d = rlo.leading_zeros() as i32 - 1;
160 if d < 0 {
161 // Shift and set sticky
162 rhi = (rlo >> 1) | (rlo & one);
163 } else {
164 rhi = rlo << d;
165 }
166 } else {
167 // exact +/- 0.0
168 return FpResult::ok(x * y + z);
169 }
170
171 e -= d;
172
173 // Use int->float conversion to populate the significand.
174 // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1]
175 let mut i: F::SignedInt = rhi.signed();
176
177 if neg {
178 i = -i;
179 }
180
181 // `|r|` is in `[0x1p62,0x1p63]` for `f64`
182 let mut r: F = F::cast_from_lossy(i);
183
184 /* Account for subnormal and rounding */
185
186 // Unbiased exponent for the maximum value of `r`
187 let max_pow = F::BITS - 1 + F::EXP_BIAS;
188
189 let mut status = Status::OK;
190
191 if e < -(max_pow as i32 - 2) {
192 // Result is subnormal before rounding
193 if e == -(max_pow as i32 - 1) {
194 let mut c = F::from_parts(false, max_pow, zero);
195 if neg {
196 c = -c;
197 }
198
199 if r == c {
200 // Min normal after rounding,
201 status.set_underflow(true);
202 r = F::MIN_POSITIVE_NORMAL.copysign(r);
203 return FpResult::new(r, status);
204 }
205
206 if (rhi << (F::SIG_BITS + 1)) != zero {
207 // Account for truncated bits. One bit will be lost in the `scalbn` call, add
208 // another top bit to avoid double rounding if inexact.
209 let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
210 i = iu.signed();
211
212 if neg {
213 i = -i;
214 }
215
216 r = F::cast_from_lossy(i);
217
218 // Remove the top bit
219 r = F::cast_from(2i8) * r - c;
220 status.set_underflow(true);
221 }
222 } else {
223 // Only round once when scaled
224 d = F::EXP_BITS as i32 - 1;
225 let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
226 i = (((rhi >> d) | sticky) << d).signed();
227
228 if neg {
229 i = -i;
230 }
231
232 r = F::cast_from_lossy(i);
233 }
234 }
235
236 // Use our exponent to scale the final value.
237 FpResult::new(super::generic::scalbn(r, e), status)
238}
239
240/// Representation of `F` that has handled subnormals.
241#[derive(Clone, Copy, Debug)]
242struct Norm<F: Float> {
243 /// Normalized significand with one guard bit, unsigned.
244 m: F::Int,
245 /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa
246 /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`.
247 e: i32,
248 neg: bool,
249}
250
251impl<F: Float> Norm<F> {
252 /// Unbias the exponent and account for the mantissa's precision, including the guard bit.
253 const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1;
254
255 /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we
256 /// adjusted the exponent such that it exceeds this threashold.
257 const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
258
259 fn from_float(x: F) -> Self {
260 let mut ix = x.to_bits();
261 let mut e = x.ex() as i32;
262 let neg = x.is_sign_negative();
263 if e == 0 {
264 // Normalize subnormals by multiplication
265 let scale_i = F::BITS - 1;
266 let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO);
267 let scaled = x * scale_f;
268 ix = scaled.to_bits();
269 e = scaled.ex() as i32;
270 e = if e == 0 {
271 // If the exponent is still zero, the input was zero. Artifically set this value
272 // such that the final `e` will exceed `ZERO_INF_NAN`.
273 1 << F::EXP_BITS
274 } else {
275 // Otherwise, account for the scaling we just did.
276 e - scale_i as i32
277 };
278 }
279
280 e -= Self::EXP_UNBIAS as i32;
281
282 // Absolute value, set the implicit bit, and shift to create a guard bit
283 ix &= F::SIG_MASK;
284 ix |= F::IMPLICIT_BIT;
285 ix <<= 1;
286
287 Self { m: ix, e, neg }
288 }
289
290 /// True if the value was zero, infinity, or NaN.
291 fn is_zero_nan_inf(self) -> bool {
292 self.e >= Self::ZERO_INF_NAN as i32
293 }
294
295 /// The only value we have
296 fn is_zero(self) -> bool {
297 // The only exponent that strictly exceeds this value is our sentinel value for zero.
298 self.e > Self::ZERO_INF_NAN as i32
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 /// Test the generic `fma_round` algorithm for a given float.
307 fn spec_test<F>()
308 where
309 F: Float,
310 F: CastFrom<F::SignedInt>,
311 F: CastFrom<i8>,
312 F::Int: HInt,
313 u32: CastInto<F::Int>,
314 {
315 let x = F::from_bits(F::Int::ONE);
316 let y = F::from_bits(F::Int::ONE);
317 let z = F::ZERO;
318
319 let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
320
321 // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
322 // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
323 // exact result"
324 assert_biteq!(fma(x, y, z), F::ZERO);
325 assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
326 assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
327 assert_biteq!(fma(-x, -y, z), F::ZERO);
328 }
329
330 #[test]
331 fn spec_test_f32() {
332 spec_test::<f32>();
333 }
334
335 #[test]
336 fn spec_test_f64() {
337 spec_test::<f64>();
338
339 let expect_underflow = [
340 (
341 hf64!("0x1.0p-1070"),
342 hf64!("0x1.0p-1070"),
343 hf64!("0x1.ffffffffffffp-1023"),
344 hf64!("0x0.ffffffffffff8p-1022"),
345 ),
346 (
347 // FIXME: we raise underflow but this should only be inexact (based on C and
348 // `rustc_apfloat`).
349 hf64!("0x1.0p-1070"),
350 hf64!("0x1.0p-1070"),
351 hf64!("-0x1.0p-1022"),
352 hf64!("-0x1.0p-1022"),
353 ),
354 ];
355
356 for (x, y, z, res) in expect_underflow {
357 let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
358 assert_biteq!(val, res);
359 assert_eq!(status, Status::UNDERFLOW);
360 }
361 }
362
363 #[test]
364 #[cfg(f128_enabled)]
365 fn spec_test_f128() {
366 spec_test::<f128>();
367 }
368
369 #[test]
370 fn fma_segfault() {
371 // These two inputs cause fma to segfault on release due to overflow:
372 assert_eq!(
373 fma(
374 -0.0000000000000002220446049250313,
375 -0.0000000000000002220446049250313,
376 -0.0000000000000002220446049250313
377 ),
378 -0.00000000000000022204460492503126,
379 );
380
381 let result = fma(-0.992, -0.992, -0.992);
382 //force rounding to storage format on x87 to prevent superious errors.
383 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
384 let result = force_eval!(result);
385 assert_eq!(result, -0.007936000000000007,);
386 }
387
388 #[test]
389 fn fma_sbb() {
390 assert_eq!(
391 fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
392 -3991680619069439e277
393 );
394 }
395
396 #[test]
397 fn fma_underflow() {
398 assert_eq!(
399 fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
400 0.0,
401 );
402 }
403}
404