fma.rs source code [crates/libm/src/math/fma.rs]

1	/ SPDX-License-Identifier: MIT /
2	/ origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. /
3
4	use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
5	use super::{CastFrom, CastInto, Float, Int, MinInt};
6
7	/// Fused multiply add (f64)
8	///
9	/// Computes `(xy)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).*
10	#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
11	pub fn fma(x: f64, y: f64, z: f64) -> f64 {
12	select_implementation! {
13	name: fma,
14	use_arch: all(target_arch = "aarch64", target_feature = "neon"),
15	args: x, y, z,
16	}
17
18	fma_round(x, y, z, Round::Nearest).val
19	}
20
21	/// Fused multiply add (f128)
22	///
23	/// Computes `(xy)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).*
24	#[cfg(f128_enabled)]
25	#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
26	pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
27	fma_round(x, y, z, Round::Nearest).val
28	}
29
30	/// Fused multiply-add that works when there is not a larger float size available. Computes
31	/// `(x y) + z`.*
32	#[inline]
33	pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
34	where
35	F: Float,
36	F: CastFrom<F::SignedInt>,
37	F: CastFrom<i8>,
38	F::Int: HInt,
39	u32: CastInto<F::Int>,
40	{
41	let one = IntTy::<F>::ONE;
42	let zero = IntTy::<F>::ZERO;
43
44	// Normalize such that the top of the mantissa is zero and we have a guard bit.
45	let nx = Norm::from_float(x);
46	let ny = Norm::from_float(y);
47	let nz = Norm::from_float(z);
48
49	if nx.is_zero_nan_inf() \|\| ny.is_zero_nan_inf() {
50	// Value will overflow, defer to non-fused operations.
51	return FpResult::ok(x * y + z);
52	}
53
54	if nz.is_zero_nan_inf() {
55	if nz.is_zero() {
56	// Empty add component means we only need to multiply.
57	return FpResult::ok(x * y);
58	}
59	// `z` is NaN or infinity, which sets the result.
60	return FpResult::ok(z);
61	}
62
63	// multiply: r = x y*
64	let zhi: F::Int;
65	let zlo: F::Int;
66	let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
67
68	// Exponent result of multiplication
69	let mut e: i32 = nx.e + ny.e;
70	// Needed shift to align `z` to the multiplication result
71	let mut d: i32 = nz.e - e;
72	let sbits = F::BITS as i32;
73
74	// Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz)
75	if d > `0` {
76	// The magnitude of `z` is larger than `x y`*
77	if d < sbits {
78	// Maximum shift of one `F::BITS` means shifted `z` will fit into `2 F::BITS`. Shift*
79	// it into `(zhi, zlo)`. No exponent adjustment necessary.
80	zlo = nz.m << d;
81	zhi = nz.m >> (sbits - d);
82	} else {
83	// Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts
84	// as a shift by `sbits`).
85	zlo = zero;
86	zhi = nz.m;
87	d -= sbits;
88
89	// `z`'s exponent is large enough that it now needs to be taken into account.
90	e = nz.e - sbits;
91
92	if d == `0` {
93	// Exactly `sbits`, nothing to do
94	} else if d < sbits {
95	// Remaining shift fits within `sbits`. Leave `z` in place, shift `x y`*
96	rlo = (rhi << (sbits - d)) \| (rlo >> d);
97	// Set the sticky bit
98	rlo \|= IntTy::<F>::from((rlo << (sbits - d)) != zero);
99	rhi = rhi >> d;
100	} else {
101	// `z`'s magnitude is enough that `x y` is irrelevant. It was nonzero, so set*
102	// the sticky bit.
103	rlo = one;
104	rhi = zero;
105	}
106	}
107	} else {
108	// `z`'s magnitude once shifted fits entirely within `zlo`
109	zhi = zero;
110	d = -d;
111	if d == `0` {
112	// No shift needed
113	zlo = nz.m;
114	} else if d < sbits {
115	// Shift s.t. `nz.m` fits into `zlo`
116	let sticky = IntTy::<F>::from((nz.m << (sbits - d)) != zero);
117	zlo = (nz.m >> d) \| sticky;
118	} else {
119	// Would be entirely shifted out, only set the sticky bit
120	zlo = one;
121	}
122	}
123
124	/ addition /
125
126	let mut neg = nx.neg ^ ny.neg;
127	let samesign: bool = !neg ^ nz.neg;
128	let mut rhi_nonzero = `true`;
129
130	if samesign {
131	// r += z
132	rlo = rlo.wrapping_add(zlo);
133	rhi += zhi + IntTy::<F>::from(rlo < zlo);
134	} else {
135	// r -= z
136	let (res, borrow) = rlo.overflowing_sub(zlo);
137	rlo = res;
138	rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::<F>::from(borrow)));
139	if (rhi >> (F::BITS - `1`)) != zero {
140	rlo = rlo.signed().wrapping_neg().unsigned();
141	rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::<F>::from(rlo != zero);
142	neg = !neg;
143	}
144	rhi_nonzero = rhi != zero;
145	}
146
147	/ Construct result /
148
149	// Shift result into `rhi`, left-aligned. Last bit is sticky
150	if rhi_nonzero {
151	// `d` > 0, need to shift both `rhi` and `rlo` into result
152	e += sbits;
153	d = rhi.leading_zeros() as i32 - `1`;
154	rhi = (rhi << d) \| (rlo >> (sbits - d));
155	// Update sticky
156	rhi \|= IntTy::<F>::from((rlo << d) != zero);
157	} else if rlo != zero {
158	// `rhi` is zero, `rlo` is the entire result and needs to be shifted
159	d = rlo.leading_zeros() as i32 - `1`;
160	if d < `0` {
161	// Shift and set sticky
162	rhi = (rlo >> `1`) \| (rlo & one);
163	} else {
164	rhi = rlo << d;
165	}
166	} else {
167	// exact +/- 0.0
168	return FpResult::ok(x * y + z);
169	}
170
171	e -= d;
172
173	// Use int->float conversion to populate the significand.
174	// i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1]
175	let mut i: F::SignedInt = rhi.signed();
176
177	if neg {
178	i = -i;
179	}
180
181	// `\|r\|` is in `[0x1p62,0x1p63]` for `f64`
182	let mut r: F = F::cast_from_lossy(i);
183
184	/ Account for subnormal and rounding /
185
186	// Unbiased exponent for the maximum value of `r`
187	let max_pow = F::BITS - `1` + F::EXP_BIAS;
188
189	let mut status = Status::OK;
190
191	if e < -(max_pow as i32 - `2`) {
192	// Result is subnormal before rounding
193	if e == -(max_pow as i32 - `1`) {
194	let mut c = F::from_parts(`false`, max_pow, zero);
195	if neg {
196	c = -c;
197	}
198
199	if r == c {
200	// Min normal after rounding,
201	status.set_underflow(`true`);
202	r = F::MIN_POSITIVE_NORMAL.copysign(r);
203	return FpResult::new(r, status);
204	}
205
206	if (rhi << (F::SIG_BITS + `1`)) != zero {
207	// Account for truncated bits. One bit will be lost in the `scalbn` call, add
208	// another top bit to avoid double rounding if inexact.
209	let iu: F::Int = (rhi >> `1`) \| (rhi & one) \| (one << (F::BITS - `2`));
210	i = iu.signed();
211
212	if neg {
213	i = -i;
214	}
215
216	r = F::cast_from_lossy(i);
217
218	// Remove the top bit
219	r = F::cast_from(`2i8`) * r - c;
220	status.set_underflow(`true`);
221	}
222	} else {
223	// Only round once when scaled
224	d = F::EXP_BITS as i32 - `1`;
225	let sticky = IntTy::<F>::from(rhi << (F::BITS as i32 - d) != zero);
226	i = (((rhi >> d) \| sticky) << d).signed();
227
228	if neg {
229	i = -i;
230	}
231
232	r = F::cast_from_lossy(i);
233	}
234	}
235
236	// Use our exponent to scale the final value.
237	FpResult::new(super::generic::scalbn(r, e), status)
238	}
239
240	/// Representation of `F` that has handled subnormals.
241	#[derive(Clone, Copy, Debug)]
242	struct Norm<F: Float> {
243	/// Normalized significand with one guard bit, unsigned.
244	m: F::Int,
245	/// Exponent of the mantissa such that `m 2^e = x`. Accounts for the shift in the mantissa*
246	/// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`.
247	e: i32,
248	neg: bool,
249	}
250
251	impl<F: Float> Norm<F> {
252	/// Unbias the exponent and account for the mantissa's precision, including the guard bit.
253	const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + `1`;
254
255	/// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we
256	/// adjusted the exponent such that it exceeds this threashold.
257	const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS;
258
259	fn from_float(x: F) -> Self {
260	let mut ix = x.to_bits();
261	let mut e = x.ex() as i32;
262	let neg = x.is_sign_negative();
263	if e == `0` {
264	// Normalize subnormals by multiplication
265	let scale_i = F::BITS - `1`;
266	let scale_f = F::from_parts(`false`, scale_i + F::EXP_BIAS, F::Int::ZERO);
267	let scaled = x * scale_f;
268	ix = scaled.to_bits();
269	e = scaled.ex() as i32;
270	e = if e == `0` {
271	// If the exponent is still zero, the input was zero. Artifically set this value
272	// such that the final `e` will exceed `ZERO_INF_NAN`.
273	`1` << F::EXP_BITS
274	} else {
275	// Otherwise, account for the scaling we just did.
276	e - scale_i as i32
277	};
278	}
279
280	e -= Self::EXP_UNBIAS as i32;
281
282	// Absolute value, set the implicit bit, and shift to create a guard bit
283	ix &= F::SIG_MASK;
284	ix \|= F::IMPLICIT_BIT;
285	ix <<= `1`;
286
287	Self { m: ix, e, neg }
288	}
289
290	/// True if the value was zero, infinity, or NaN.
291	fn is_zero_nan_inf(self) -> bool {
292	self.e >= Self::ZERO_INF_NAN as i32
293	}
294
295	/// The only value we have
296	fn is_zero(self) -> bool {
297	// The only exponent that strictly exceeds this value is our sentinel value for zero.
298	self.e > Self::ZERO_INF_NAN as i32
299	}
300	}
301
302	#[cfg(test)]
303	mod tests {
304	use super::*;
305
306	/// Test the generic `fma_round` algorithm for a given float.
307	fn spec_test<F>()
308	where
309	F: Float,
310	F: CastFrom<F::SignedInt>,
311	F: CastFrom<i8>,
312	F::Int: HInt,
313	u32: CastInto<F::Int>,
314	{
315	let x = F::from_bits(F::Int::ONE);
316	let y = F::from_bits(F::Int::ONE);
317	let z = F::ZERO;
318
319	let fma = \|x, y, z\| fma_round(x, y, z, Round::Nearest).val;
320
321	// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
322	// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
323	// exact result"
324	assert_biteq!(fma(x, y, z), F::ZERO);
325	assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
326	assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
327	assert_biteq!(fma(-x, -y, z), F::ZERO);
328	}
329
330	#[test]
331	fn spec_test_f32() {
332	spec_test::<f32>();
333	}
334
335	#[test]
336	fn spec_test_f64() {
337	spec_test::<f64>();
338
339	let expect_underflow = [
340	(
341	hf64!("0x1.0p-1070"),
342	hf64!("0x1.0p-1070"),
343	hf64!("0x1.ffffffffffffp-1023"),
344	hf64!("0x0.ffffffffffff8p-1022"),
345	),
346	(
347	// FIXME: we raise underflow but this should only be inexact (based on C and
348	// `rustc_apfloat`).
349	hf64!("0x1.0p-1070"),
350	hf64!("0x1.0p-1070"),
351	hf64!("-0x1.0p-1022"),
352	hf64!("-0x1.0p-1022"),
353	),
354	];
355
356	for (x, y, z, res) in expect_underflow {
357	let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
358	assert_biteq!(val, res);
359	assert_eq!(status, Status::UNDERFLOW);
360	}
361	}
362
363	#[test]
364	#[cfg(f128_enabled)]
365	fn spec_test_f128() {
366	spec_test::<f128>();
367	}
368
369	#[test]
370	fn fma_segfault() {
371	// These two inputs cause fma to segfault on release due to overflow:
372	assert_eq!(
373	fma(
374	-`0.0000000000000002220446049250313`,
375	-`0.0000000000000002220446049250313`,
376	-`0.0000000000000002220446049250313`
377	),
378	-`0.00000000000000022204460492503126`,
379	);
380
381	let result = fma(`-0.992`, `-0.992`, `-0.992`);
382	//force rounding to storage format on x87 to prevent superious errors.
383	#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
384	let result = force_eval!(result);
385	assert_eq!(result, -`0.007936000000000007`,);
386	}
387
388	#[test]
389	fn fma_sbb() {
390	assert_eq!(
391	fma(-(`1.0` - f64::EPSILON), f64::MIN, f64::MIN),
392	-`3991680619069439e277`
393	);
394	}
395
396	#[test]
397	fn fma_underflow() {
398	assert_eq!(
399	fma(`1.1102230246251565e-16`, -`9.812526705433188e-305`, `1.0894e-320`),
400	`0.0`,
401	);
402	}
403	}
404