1 | /* SPDX-License-Identifier: MIT */ |
2 | /* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ |
3 | |
4 | use super::support::{FpResult, IntTy, Round, Status}; |
5 | use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; |
6 | |
7 | // Placeholder so we can have `fmaf16` in the `Float` trait. |
8 | #[allow (unused)] |
9 | #[cfg (f16_enabled)] |
10 | #[cfg_attr (all(test, assert_no_panic), no_panic::no_panic)] |
11 | pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { |
12 | unimplemented!() |
13 | } |
14 | |
15 | /// Floating multiply add (f32) |
16 | /// |
17 | /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). |
18 | #[cfg_attr (all(test, assert_no_panic), no_panic::no_panic)] |
19 | pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { |
20 | select_implementation! { |
21 | name: fmaf, |
22 | use_arch: all(target_arch = "aarch64" , target_feature = "neon" ), |
23 | args: x, y, z, |
24 | } |
25 | |
26 | fma_wide_round(x, y, z, Round::Nearest).val |
27 | } |
28 | |
29 | /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, |
30 | /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. |
31 | #[inline ] |
32 | pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F> |
33 | where |
34 | F: Float + HFloat<D = B>, |
35 | B: Float + DFloat<H = F>, |
36 | B::Int: CastInto<i32>, |
37 | i32: CastFrom<i32>, |
38 | { |
39 | let one = IntTy::<B>::ONE; |
40 | |
41 | let xy: B = x.widen() * y.widen(); |
42 | let mut result: B = xy + z.widen(); |
43 | let mut ui: B::Int = result.to_bits(); |
44 | let re = result.ex(); |
45 | let zb: B = z.widen(); |
46 | |
47 | let prec_diff = B::SIG_BITS - F::SIG_BITS; |
48 | let excess_prec = ui & ((one << prec_diff) - one); |
49 | let halfway = one << (prec_diff - 1); |
50 | |
51 | // Common case: the larger precision is fine if... |
52 | // This is not a halfway case |
53 | if excess_prec != halfway |
54 | // Or the result is NaN |
55 | || re == B::EXP_SAT |
56 | // Or the result is exact |
57 | || (result - xy == zb && result - zb == xy) |
58 | // Or the mode is something other than round to nearest |
59 | || round != Round::Nearest |
60 | { |
61 | let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32; |
62 | let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32; |
63 | |
64 | let mut status = Status::OK; |
65 | |
66 | if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() { |
67 | // This branch is never hit; requires previous operations to set a status |
68 | status.set_inexact(false); |
69 | |
70 | result = xy + z.widen(); |
71 | if status.inexact() { |
72 | status.set_underflow(true); |
73 | } else { |
74 | status.set_inexact(true); |
75 | } |
76 | } |
77 | |
78 | return FpResult { |
79 | val: result.narrow(), |
80 | status, |
81 | }; |
82 | } |
83 | |
84 | let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO; |
85 | let err = if neg == (zb > xy) { |
86 | xy - result + zb |
87 | } else { |
88 | zb - result + xy |
89 | }; |
90 | if neg == (err < B::ZERO) { |
91 | ui += one; |
92 | } else { |
93 | ui -= one; |
94 | } |
95 | |
96 | FpResult::ok(B::from_bits(ui).narrow()) |
97 | } |
98 | |
99 | #[cfg (test)] |
100 | mod tests { |
101 | use super::*; |
102 | |
103 | #[test ] |
104 | fn issue_263() { |
105 | let a = f32::from_bits(1266679807); |
106 | let b = f32::from_bits(1300234242); |
107 | let c = f32::from_bits(1115553792); |
108 | let expected = f32::from_bits(1501560833); |
109 | assert_eq!(fmaf(a, b, c), expected); |
110 | } |
111 | } |
112 | |