| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ |
| 3 | |
| 4 | use super::support::{FpResult, IntTy, Round, Status}; |
| 5 | use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; |
| 6 | |
| 7 | // Placeholder so we can have `fmaf16` in the `Float` trait. |
| 8 | #[allow (unused)] |
| 9 | #[cfg (f16_enabled)] |
| 10 | #[cfg_attr (all(test, assert_no_panic), no_panic::no_panic)] |
| 11 | pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { |
| 12 | unimplemented!() |
| 13 | } |
| 14 | |
| 15 | /// Floating multiply add (f32) |
| 16 | /// |
| 17 | /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). |
| 18 | #[cfg_attr (all(test, assert_no_panic), no_panic::no_panic)] |
| 19 | pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { |
| 20 | select_implementation! { |
| 21 | name: fmaf, |
| 22 | use_arch: all(target_arch = "aarch64" , target_feature = "neon" ), |
| 23 | args: x, y, z, |
| 24 | } |
| 25 | |
| 26 | fma_wide_round(x, y, z, Round::Nearest).val |
| 27 | } |
| 28 | |
| 29 | /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, |
| 30 | /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. |
| 31 | #[inline ] |
| 32 | pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F> |
| 33 | where |
| 34 | F: Float + HFloat<D = B>, |
| 35 | B: Float + DFloat<H = F>, |
| 36 | B::Int: CastInto<i32>, |
| 37 | i32: CastFrom<i32>, |
| 38 | { |
| 39 | let one = IntTy::<B>::ONE; |
| 40 | |
| 41 | let xy: B = x.widen() * y.widen(); |
| 42 | let mut result: B = xy + z.widen(); |
| 43 | let mut ui: B::Int = result.to_bits(); |
| 44 | let re = result.ex(); |
| 45 | let zb: B = z.widen(); |
| 46 | |
| 47 | let prec_diff = B::SIG_BITS - F::SIG_BITS; |
| 48 | let excess_prec = ui & ((one << prec_diff) - one); |
| 49 | let halfway = one << (prec_diff - 1); |
| 50 | |
| 51 | // Common case: the larger precision is fine if... |
| 52 | // This is not a halfway case |
| 53 | if excess_prec != halfway |
| 54 | // Or the result is NaN |
| 55 | || re == B::EXP_SAT |
| 56 | // Or the result is exact |
| 57 | || (result - xy == zb && result - zb == xy) |
| 58 | // Or the mode is something other than round to nearest |
| 59 | || round != Round::Nearest |
| 60 | { |
| 61 | let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; |
| 62 | let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; |
| 63 | |
| 64 | let mut status = Status::OK; |
| 65 | |
| 66 | if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { |
| 67 | // This branch is never hit; requires previous operations to set a status |
| 68 | status.set_inexact(false); |
| 69 | |
| 70 | result = xy + z.widen(); |
| 71 | if status.inexact() { |
| 72 | status.set_underflow(true); |
| 73 | } else { |
| 74 | status.set_inexact(true); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | return FpResult { |
| 79 | val: result.narrow(), |
| 80 | status, |
| 81 | }; |
| 82 | } |
| 83 | |
| 84 | let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO; |
| 85 | let err = if neg == (zb > xy) { |
| 86 | xy - result + zb |
| 87 | } else { |
| 88 | zb - result + xy |
| 89 | }; |
| 90 | if neg == (err < B::ZERO) { |
| 91 | ui += one; |
| 92 | } else { |
| 93 | ui -= one; |
| 94 | } |
| 95 | |
| 96 | FpResult::ok(B::from_bits(ui).narrow()) |
| 97 | } |
| 98 | |
| 99 | #[cfg (test)] |
| 100 | mod tests { |
| 101 | use super::*; |
| 102 | |
| 103 | #[test ] |
| 104 | fn issue_263() { |
| 105 | let a = f32::from_bits(1266679807); |
| 106 | let b = f32::from_bits(1300234242); |
| 107 | let c = f32::from_bits(1115553792); |
| 108 | let expected = f32::from_bits(1501560833); |
| 109 | assert_eq!(fmaf(a, b, c), expected); |
| 110 | } |
| 111 | } |
| 112 | |