fma.rs source code [crates/core_arch/src/x86/fma.rs]

1	//! Fused Multiply-Add instruction set (FMA)
2	//!
3	//! The FMA instruction set is an extension to the 128 and 256-bit SSE
4	//! instructions in the x86 microprocessor instruction set to perform fused
5	//! multiply–add (FMA) operations.
6	//!
7	//! The references are:
8	//!
9	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
10	//! Instruction Set Reference, A-Z][intel64_ref].
11	//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
12	//! System Instructions][amd64_ref].
13	//!
14	//! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the
15	//! instructions available.
16	//!
17	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
18	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
19	//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21	use crate::core_arch::x86::*;
22	use crate::intrinsics::simd::{simd_fma, simd_neg};
23	use crate::intrinsics::{fmaf32, fmaf64};
24
25	#[cfg(test)]
26	use stdarch_test::assert_instr;
27
28	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
29	/// and `b`, and add the intermediate result to packed elements in `c`.
30	///
31	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_pd)
32	#[inline]
33	#[target_feature(enable = "fma")]
34	#[cfg_attr(test, assert_instr(vfmadd))]
35	#[stable(feature = "simd_x86", since = "1.27.0")]
36	pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37	unsafe { simd_fma(x:a, y:b, z:c) }
38	}
39
40	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
41	/// and `b`, and add the intermediate result to packed elements in `c`.
42	///
43	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_pd)
44	#[inline]
45	#[target_feature(enable = "fma")]
46	#[cfg_attr(test, assert_instr(vfmadd))]
47	#[stable(feature = "simd_x86", since = "1.27.0")]
48	pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
49	unsafe { simd_fma(x:a, y:b, z:c) }
50	}
51
52	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
53	/// and `b`, and add the intermediate result to packed elements in `c`.
54	///
55	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ps)
56	#[inline]
57	#[target_feature(enable = "fma")]
58	#[cfg_attr(test, assert_instr(vfmadd))]
59	#[stable(feature = "simd_x86", since = "1.27.0")]
60	pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
61	unsafe { simd_fma(x:a, y:b, z:c) }
62	}
63
64	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
65	/// and `b`, and add the intermediate result to packed elements in `c`.
66	///
67	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_ps)
68	#[inline]
69	#[target_feature(enable = "fma")]
70	#[cfg_attr(test, assert_instr(vfmadd))]
71	#[stable(feature = "simd_x86", since = "1.27.0")]
72	pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
73	unsafe { simd_fma(x:a, y:b, z:c) }
74	}
75
76	/// Multiplies the lower double-precision (64-bit) floating-point elements in
77	/// `a` and `b`, and add the intermediate result to the lower element in `c`.
78	/// Stores the result in the lower element of the returned value, and copy the
79	/// upper element from `a` to the upper elements of the result.
80	///
81	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_sd)
82	#[inline]
83	#[target_feature(enable = "fma")]
84	#[cfg_attr(test, assert_instr(vfmadd))]
85	#[stable(feature = "simd_x86", since = "1.27.0")]
86	pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
87	unsafe {
88	simd_insert!(
89	a,
90	`0`,
91	fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
92	)
93	}
94	}
95
96	/// Multiplies the lower single-precision (32-bit) floating-point elements in
97	/// `a` and `b`, and add the intermediate result to the lower element in `c`.
98	/// Stores the result in the lower element of the returned value, and copy the
99	/// 3 upper elements from `a` to the upper elements of the result.
100	///
101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ss)
102	#[inline]
103	#[target_feature(enable = "fma")]
104	#[cfg_attr(test, assert_instr(vfmadd))]
105	#[stable(feature = "simd_x86", since = "1.27.0")]
106	pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
107	unsafe {
108	simd_insert!(
109	a,
110	`0`,
111	fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c))
112	)
113	}
114	}
115
116	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
117	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
118	/// the intermediate result.
119	///
120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd)
121	#[inline]
122	#[target_feature(enable = "fma")]
123	#[cfg_attr(test, assert_instr(vfmaddsub))]
124	#[stable(feature = "simd_x86", since = "1.27.0")]
125	pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
126	unsafe {
127	let add: __m128d = simd_fma(x:a, y:b, z:c);
128	let sub: __m128d = simd_fma(x:a, y:b, z:simd_neg(c));
129	simd_shuffle!(add, sub, [`2`, `1`])
130	}
131	}
132
133	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
134	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
135	/// the intermediate result.
136	///
137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd)
138	#[inline]
139	#[target_feature(enable = "fma")]
140	#[cfg_attr(test, assert_instr(vfmaddsub))]
141	#[stable(feature = "simd_x86", since = "1.27.0")]
142	pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
143	unsafe {
144	let add: __m256d = simd_fma(x:a, y:b, z:c);
145	let sub: __m256d = simd_fma(x:a, y:b, z:simd_neg(c));
146	simd_shuffle!(add, sub, [`4`, `1`, `6`, `3`])
147	}
148	}
149
150	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
151	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
152	/// the intermediate result.
153	///
154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps)
155	#[inline]
156	#[target_feature(enable = "fma")]
157	#[cfg_attr(test, assert_instr(vfmaddsub))]
158	#[stable(feature = "simd_x86", since = "1.27.0")]
159	pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
160	unsafe {
161	let add: __m128 = simd_fma(x:a, y:b, z:c);
162	let sub: __m128 = simd_fma(x:a, y:b, z:simd_neg(c));
163	simd_shuffle!(add, sub, [`4`, `1`, `6`, `3`])
164	}
165	}
166
167	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
168	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
169	/// the intermediate result.
170	///
171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps)
172	#[inline]
173	#[target_feature(enable = "fma")]
174	#[cfg_attr(test, assert_instr(vfmaddsub))]
175	#[stable(feature = "simd_x86", since = "1.27.0")]
176	pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
177	unsafe {
178	let add: __m256 = simd_fma(x:a, y:b, z:c);
179	let sub: __m256 = simd_fma(x:a, y:b, z:simd_neg(c));
180	simd_shuffle!(add, sub, [`8`, `1`, `10`, `3`, `12`, `5`, `14`, `7`])
181	}
182	}
183
184	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
185	/// and `b`, and subtract packed elements in `c` from the intermediate result.
186	///
187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_pd)
188	#[inline]
189	#[target_feature(enable = "fma")]
190	#[cfg_attr(test, assert_instr(vfmsub))]
191	#[stable(feature = "simd_x86", since = "1.27.0")]
192	pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
193	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
194	}
195
196	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
197	/// and `b`, and subtract packed elements in `c` from the intermediate result.
198	///
199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_pd)
200	#[inline]
201	#[target_feature(enable = "fma")]
202	#[cfg_attr(test, assert_instr(vfmsub))]
203	#[stable(feature = "simd_x86", since = "1.27.0")]
204	pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
205	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
206	}
207
208	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
209	/// and `b`, and subtract packed elements in `c` from the intermediate result.
210	///
211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ps)
212	#[inline]
213	#[target_feature(enable = "fma")]
214	#[cfg_attr(test, assert_instr(vfmsub213ps))]
215	#[stable(feature = "simd_x86", since = "1.27.0")]
216	pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
217	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
218	}
219
220	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
221	/// and `b`, and subtract packed elements in `c` from the intermediate result.
222	///
223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_ps)
224	#[inline]
225	#[target_feature(enable = "fma")]
226	#[cfg_attr(test, assert_instr(vfmsub213ps))]
227	#[stable(feature = "simd_x86", since = "1.27.0")]
228	pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
229	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
230	}
231
232	/// Multiplies the lower double-precision (64-bit) floating-point elements in
233	/// `a` and `b`, and subtract the lower element in `c` from the intermediate
234	/// result. Store the result in the lower element of the returned value, and
235	/// copy the upper element from `a` to the upper elements of the result.
236	///
237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_sd)
238	#[inline]
239	#[target_feature(enable = "fma")]
240	#[cfg_attr(test, assert_instr(vfmsub))]
241	#[stable(feature = "simd_x86", since = "1.27.0")]
242	pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
243	unsafe {
244	simd_insert!(
245	a,
246	`0`,
247	fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
248	)
249	}
250	}
251
252	/// Multiplies the lower single-precision (32-bit) floating-point elements in
253	/// `a` and `b`, and subtract the lower element in `c` from the intermediate
254	/// result. Store the result in the lower element of the returned value, and
255	/// copy the 3 upper elements from `a` to the upper elements of the result.
256	///
257	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ss)
258	#[inline]
259	#[target_feature(enable = "fma")]
260	#[cfg_attr(test, assert_instr(vfmsub))]
261	#[stable(feature = "simd_x86", since = "1.27.0")]
262	pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
263	unsafe {
264	simd_insert!(
265	a,
266	`0`,
267	fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c))
268	)
269	}
270	}
271
272	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
273	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
274	/// the intermediate result.
275	///
276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd)
277	#[inline]
278	#[target_feature(enable = "fma")]
279	#[cfg_attr(test, assert_instr(vfmsubadd))]
280	#[stable(feature = "simd_x86", since = "1.27.0")]
281	pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
282	unsafe {
283	let add: __m128d = simd_fma(x:a, y:b, z:c);
284	let sub: __m128d = simd_fma(x:a, y:b, z:simd_neg(c));
285	simd_shuffle!(add, sub, [`0`, `3`])
286	}
287	}
288
289	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
290	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
291	/// the intermediate result.
292	///
293	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd)
294	#[inline]
295	#[target_feature(enable = "fma")]
296	#[cfg_attr(test, assert_instr(vfmsubadd))]
297	#[stable(feature = "simd_x86", since = "1.27.0")]
298	pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
299	unsafe {
300	let add: __m256d = simd_fma(x:a, y:b, z:c);
301	let sub: __m256d = simd_fma(x:a, y:b, z:simd_neg(c));
302	simd_shuffle!(add, sub, [`0`, `5`, `2`, `7`])
303	}
304	}
305
306	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
307	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
308	/// the intermediate result.
309	///
310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps)
311	#[inline]
312	#[target_feature(enable = "fma")]
313	#[cfg_attr(test, assert_instr(vfmsubadd))]
314	#[stable(feature = "simd_x86", since = "1.27.0")]
315	pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
316	unsafe {
317	let add: __m128 = simd_fma(x:a, y:b, z:c);
318	let sub: __m128 = simd_fma(x:a, y:b, z:simd_neg(c));
319	simd_shuffle!(add, sub, [`0`, `5`, `2`, `7`])
320	}
321	}
322
323	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
324	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
325	/// the intermediate result.
326	///
327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps)
328	#[inline]
329	#[target_feature(enable = "fma")]
330	#[cfg_attr(test, assert_instr(vfmsubadd))]
331	#[stable(feature = "simd_x86", since = "1.27.0")]
332	pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
333	unsafe {
334	let add: __m256 = simd_fma(x:a, y:b, z:c);
335	let sub: __m256 = simd_fma(x:a, y:b, z:simd_neg(c));
336	simd_shuffle!(add, sub, [`0`, `9`, `2`, `11`, `4`, `13`, `6`, `15`])
337	}
338	}
339
340	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
341	/// and `b`, and add the negated intermediate result to packed elements in `c`.
342	///
343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd)
344	#[inline]
345	#[target_feature(enable = "fma")]
346	#[cfg_attr(test, assert_instr(vfnmadd))]
347	#[stable(feature = "simd_x86", since = "1.27.0")]
348	pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
349	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
350	}
351
352	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
353	/// and `b`, and add the negated intermediate result to packed elements in `c`.
354	///
355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd)
356	#[inline]
357	#[target_feature(enable = "fma")]
358	#[cfg_attr(test, assert_instr(vfnmadd))]
359	#[stable(feature = "simd_x86", since = "1.27.0")]
360	pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
361	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
362	}
363
364	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
365	/// and `b`, and add the negated intermediate result to packed elements in `c`.
366	///
367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps)
368	#[inline]
369	#[target_feature(enable = "fma")]
370	#[cfg_attr(test, assert_instr(vfnmadd))]
371	#[stable(feature = "simd_x86", since = "1.27.0")]
372	pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
373	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
374	}
375
376	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
377	/// and `b`, and add the negated intermediate result to packed elements in `c`.
378	///
379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps)
380	#[inline]
381	#[target_feature(enable = "fma")]
382	#[cfg_attr(test, assert_instr(vfnmadd))]
383	#[stable(feature = "simd_x86", since = "1.27.0")]
384	pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
385	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
386	}
387
388	/// Multiplies the lower double-precision (64-bit) floating-point elements in
389	/// `a` and `b`, and add the negated intermediate result to the lower element
390	/// in `c`. Store the result in the lower element of the returned value, and
391	/// copy the upper element from `a` to the upper elements of the result.
392	///
393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_sd)
394	#[inline]
395	#[target_feature(enable = "fma")]
396	#[cfg_attr(test, assert_instr(vfnmadd))]
397	#[stable(feature = "simd_x86", since = "1.27.0")]
398	pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
399	unsafe {
400	simd_insert!(
401	a,
402	`0`,
403	fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c))
404	)
405	}
406	}
407
408	/// Multiplies the lower single-precision (32-bit) floating-point elements in
409	/// `a` and `b`, and add the negated intermediate result to the lower element
410	/// in `c`. Store the result in the lower element of the returned value, and
411	/// copy the 3 upper elements from `a` to the upper elements of the result.
412	///
413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ss)
414	#[inline]
415	#[target_feature(enable = "fma")]
416	#[cfg_attr(test, assert_instr(vfnmadd))]
417	#[stable(feature = "simd_x86", since = "1.27.0")]
418	pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
419	unsafe {
420	simd_insert!(
421	a,
422	`0`,
423	fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c))
424	)
425	}
426	}
427
428	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
429	/// and `b`, and subtract packed elements in `c` from the negated intermediate
430	/// result.
431	///
432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_pd)
433	#[inline]
434	#[target_feature(enable = "fma")]
435	#[cfg_attr(test, assert_instr(vfnmsub))]
436	#[stable(feature = "simd_x86", since = "1.27.0")]
437	pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
438	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
439	}
440
441	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
442	/// and `b`, and subtract packed elements in `c` from the negated intermediate
443	/// result.
444	///
445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_pd)
446	#[inline]
447	#[target_feature(enable = "fma")]
448	#[cfg_attr(test, assert_instr(vfnmsub))]
449	#[stable(feature = "simd_x86", since = "1.27.0")]
450	pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
451	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
452	}
453
454	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
455	/// and `b`, and subtract packed elements in `c` from the negated intermediate
456	/// result.
457	///
458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ps)
459	#[inline]
460	#[target_feature(enable = "fma")]
461	#[cfg_attr(test, assert_instr(vfnmsub))]
462	#[stable(feature = "simd_x86", since = "1.27.0")]
463	pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
464	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
465	}
466
467	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
468	/// and `b`, and subtract packed elements in `c` from the negated intermediate
469	/// result.
470	///
471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_ps)
472	#[inline]
473	#[target_feature(enable = "fma")]
474	#[cfg_attr(test, assert_instr(vfnmsub))]
475	#[stable(feature = "simd_x86", since = "1.27.0")]
476	pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
477	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
478	}
479
480	/// Multiplies the lower double-precision (64-bit) floating-point elements in
481	/// `a` and `b`, and subtract packed elements in `c` from the negated
482	/// intermediate result. Store the result in the lower element of the returned
483	/// value, and copy the upper element from `a` to the upper elements of the
484	/// result.
485	///
486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_sd)
487	#[inline]
488	#[target_feature(enable = "fma")]
489	#[cfg_attr(test, assert_instr(vfnmsub))]
490	#[stable(feature = "simd_x86", since = "1.27.0")]
491	pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
492	unsafe {
493	simd_insert!(
494	a,
495	`0`,
496	fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c))
497	)
498	}
499	}
500
501	/// Multiplies the lower single-precision (32-bit) floating-point elements in
502	/// `a` and `b`, and subtract packed elements in `c` from the negated
503	/// intermediate result. Store the result in the lower element of the
504	/// returned value, and copy the 3 upper elements from `a` to the upper
505	/// elements of the result.
506	///
507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ss)
508	#[inline]
509	#[target_feature(enable = "fma")]
510	#[cfg_attr(test, assert_instr(vfnmsub))]
511	#[stable(feature = "simd_x86", since = "1.27.0")]
512	pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
513	unsafe {
514	simd_insert!(
515	a,
516	`0`,
517	fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c))
518	)
519	}
520	}
521
522	#[cfg(test)]
523	mod tests {
524
525	use stdarch_test::simd_test;
526
527	use crate::core_arch::x86::*;
528
529	#[simd_test(enable = "fma")]
530	unsafe fn test_mm_fmadd_pd() {
531	let a = _mm_setr_pd(`1.`, `2.`);
532	let b = _mm_setr_pd(`5.`, `3.`);
533	let c = _mm_setr_pd(`4.`, `9.`);
534	let r = _mm_setr_pd(`9.`, `15.`);
535	assert_eq_m128d(_mm_fmadd_pd(a, b, c), r);
536	}
537
538	#[simd_test(enable = "fma")]
539	unsafe fn test_mm256_fmadd_pd() {
540	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
541	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
542	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
543	let r = _mm256_setr_pd(`9.`, `15.`, `22.`, `15.`);
544	assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r);
545	}
546
547	#[simd_test(enable = "fma")]
548	unsafe fn test_mm_fmadd_ps() {
549	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
550	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
551	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
552	let r = _mm_setr_ps(`9.`, `15.`, `22.`, `15.`);
553	assert_eq_m128(_mm_fmadd_ps(a, b, c), r);
554	}
555
556	#[simd_test(enable = "fma")]
557	unsafe fn test_mm256_fmadd_ps() {
558	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
559	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
560	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
561	let r = _mm256_setr_ps(`9.`, `15.`, `22.`, `15.`, `-5.`, `-49.`, `-2.`, `-31.`);
562	assert_eq_m256(_mm256_fmadd_ps(a, b, c), r);
563	}
564
565	#[simd_test(enable = "fma")]
566	unsafe fn test_mm_fmadd_sd() {
567	let a = _mm_setr_pd(`1.`, `2.`);
568	let b = _mm_setr_pd(`5.`, `3.`);
569	let c = _mm_setr_pd(`4.`, `9.`);
570	let r = _mm_setr_pd(`9.`, `2.`);
571	assert_eq_m128d(_mm_fmadd_sd(a, b, c), r);
572	}
573
574	#[simd_test(enable = "fma")]
575	unsafe fn test_mm_fmadd_ss() {
576	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
577	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
578	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
579	let r = _mm_setr_ps(`9.`, `2.`, `3.`, `4.`);
580	assert_eq_m128(_mm_fmadd_ss(a, b, c), r);
581	}
582
583	#[simd_test(enable = "fma")]
584	unsafe fn test_mm_fmaddsub_pd() {
585	let a = _mm_setr_pd(`1.`, `2.`);
586	let b = _mm_setr_pd(`5.`, `3.`);
587	let c = _mm_setr_pd(`4.`, `9.`);
588	let r = _mm_setr_pd(`1.`, `15.`);
589	assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r);
590	}
591
592	#[simd_test(enable = "fma")]
593	unsafe fn test_mm256_fmaddsub_pd() {
594	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
595	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
596	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
597	let r = _mm256_setr_pd(`1.`, `15.`, `20.`, `15.`);
598	assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r);
599	}
600
601	#[simd_test(enable = "fma")]
602	unsafe fn test_mm_fmaddsub_ps() {
603	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
604	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
605	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
606	let r = _mm_setr_ps(`1.`, `15.`, `20.`, `15.`);
607	assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r);
608	}
609
610	#[simd_test(enable = "fma")]
611	unsafe fn test_mm256_fmaddsub_ps() {
612	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
613	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
614	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
615	let r = _mm256_setr_ps(`1.`, `15.`, `20.`, `15.`, `5.`, `-49.`, `2.`, `-31.`);
616	assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r);
617	}
618
619	#[simd_test(enable = "fma")]
620	unsafe fn test_mm_fmsub_pd() {
621	let a = _mm_setr_pd(`1.`, `2.`);
622	let b = _mm_setr_pd(`5.`, `3.`);
623	let c = _mm_setr_pd(`4.`, `9.`);
624	let r = _mm_setr_pd(`1.`, `-3.`);
625	assert_eq_m128d(_mm_fmsub_pd(a, b, c), r);
626	}
627
628	#[simd_test(enable = "fma")]
629	unsafe fn test_mm256_fmsub_pd() {
630	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
631	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
632	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
633	let r = _mm256_setr_pd(`1.`, `-3.`, `20.`, `1.`);
634	assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r);
635	}
636
637	#[simd_test(enable = "fma")]
638	unsafe fn test_mm_fmsub_ps() {
639	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
640	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
641	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
642	let r = _mm_setr_ps(`1.`, `-3.`, `20.`, `1.`);
643	assert_eq_m128(_mm_fmsub_ps(a, b, c), r);
644	}
645
646	#[simd_test(enable = "fma")]
647	unsafe fn test_mm256_fmsub_ps() {
648	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
649	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
650	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
651	let r = _mm256_setr_ps(`1.`, `-3.`, `20.`, `1.`, `5.`, `-71.`, `2.`, `-25.`);
652	assert_eq_m256(_mm256_fmsub_ps(a, b, c), r);
653	}
654
655	#[simd_test(enable = "fma")]
656	unsafe fn test_mm_fmsub_sd() {
657	let a = _mm_setr_pd(`1.`, `2.`);
658	let b = _mm_setr_pd(`5.`, `3.`);
659	let c = _mm_setr_pd(`4.`, `9.`);
660	let r = _mm_setr_pd(`1.`, `2.`);
661	assert_eq_m128d(_mm_fmsub_sd(a, b, c), r);
662	}
663
664	#[simd_test(enable = "fma")]
665	unsafe fn test_mm_fmsub_ss() {
666	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
667	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
668	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
669	let r = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
670	assert_eq_m128(_mm_fmsub_ss(a, b, c), r);
671	}
672
673	#[simd_test(enable = "fma")]
674	unsafe fn test_mm_fmsubadd_pd() {
675	let a = _mm_setr_pd(`1.`, `2.`);
676	let b = _mm_setr_pd(`5.`, `3.`);
677	let c = _mm_setr_pd(`4.`, `9.`);
678	let r = _mm_setr_pd(`9.`, `-3.`);
679	assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r);
680	}
681
682	#[simd_test(enable = "fma")]
683	unsafe fn test_mm256_fmsubadd_pd() {
684	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
685	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
686	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
687	let r = _mm256_setr_pd(`9.`, `-3.`, `22.`, `1.`);
688	assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r);
689	}
690
691	#[simd_test(enable = "fma")]
692	unsafe fn test_mm_fmsubadd_ps() {
693	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
694	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
695	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
696	let r = _mm_setr_ps(`9.`, `-3.`, `22.`, `1.`);
697	assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r);
698	}
699
700	#[simd_test(enable = "fma")]
701	unsafe fn test_mm256_fmsubadd_ps() {
702	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
703	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
704	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
705	let r = _mm256_setr_ps(`9.`, `-3.`, `22.`, `1.`, `-5.`, `-71.`, `-2.`, `-25.`);
706	assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r);
707	}
708
709	#[simd_test(enable = "fma")]
710	unsafe fn test_mm_fnmadd_pd() {
711	let a = _mm_setr_pd(`1.`, `2.`);
712	let b = _mm_setr_pd(`5.`, `3.`);
713	let c = _mm_setr_pd(`4.`, `9.`);
714	let r = _mm_setr_pd(`-1.`, `3.`);
715	assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r);
716	}
717
718	#[simd_test(enable = "fma")]
719	unsafe fn test_mm256_fnmadd_pd() {
720	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
721	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
722	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
723	let r = _mm256_setr_pd(`-1.`, `3.`, `-20.`, `-1.`);
724	assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r);
725	}
726
727	#[simd_test(enable = "fma")]
728	unsafe fn test_mm_fnmadd_ps() {
729	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
730	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
731	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
732	let r = _mm_setr_ps(`-1.`, `3.`, `-20.`, `-1.`);
733	assert_eq_m128(_mm_fnmadd_ps(a, b, c), r);
734	}
735
736	#[simd_test(enable = "fma")]
737	unsafe fn test_mm256_fnmadd_ps() {
738	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
739	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
740	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
741	let r = _mm256_setr_ps(`-1.`, `3.`, `-20.`, `-1.`, `-5.`, `71.`, `-2.`, `25.`);
742	assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r);
743	}
744
745	#[simd_test(enable = "fma")]
746	unsafe fn test_mm_fnmadd_sd() {
747	let a = _mm_setr_pd(`1.`, `2.`);
748	let b = _mm_setr_pd(`5.`, `3.`);
749	let c = _mm_setr_pd(`4.`, `9.`);
750	let r = _mm_setr_pd(`-1.`, `2.`);
751	assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r);
752	}
753
754	#[simd_test(enable = "fma")]
755	unsafe fn test_mm_fnmadd_ss() {
756	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
757	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
758	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
759	let r = _mm_setr_ps(`-1.`, `2.`, `3.`, `4.`);
760	assert_eq_m128(_mm_fnmadd_ss(a, b, c), r);
761	}
762
763	#[simd_test(enable = "fma")]
764	unsafe fn test_mm_fnmsub_pd() {
765	let a = _mm_setr_pd(`1.`, `2.`);
766	let b = _mm_setr_pd(`5.`, `3.`);
767	let c = _mm_setr_pd(`4.`, `9.`);
768	let r = _mm_setr_pd(`-9.`, `-15.`);
769	assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r);
770	}
771
772	#[simd_test(enable = "fma")]
773	unsafe fn test_mm256_fnmsub_pd() {
774	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
775	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
776	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
777	let r = _mm256_setr_pd(`-9.`, `-15.`, `-22.`, `-15.`);
778	assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r);
779	}
780
781	#[simd_test(enable = "fma")]
782	unsafe fn test_mm_fnmsub_ps() {
783	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
784	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
785	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
786	let r = _mm_setr_ps(`-9.`, `-15.`, `-22.`, `-15.`);
787	assert_eq_m128(_mm_fnmsub_ps(a, b, c), r);
788	}
789
790	#[simd_test(enable = "fma")]
791	unsafe fn test_mm256_fnmsub_ps() {
792	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
793	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
794	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
795	let r = _mm256_setr_ps(`-9.`, `-15.`, `-22.`, `-15.`, `5.`, `49.`, `2.`, `31.`);
796	assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r);
797	}
798
799	#[simd_test(enable = "fma")]
800	unsafe fn test_mm_fnmsub_sd() {
801	let a = _mm_setr_pd(`1.`, `2.`);
802	let b = _mm_setr_pd(`5.`, `3.`);
803	let c = _mm_setr_pd(`4.`, `9.`);
804	let r = _mm_setr_pd(`-9.`, `2.`);
805	assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r);
806	}
807
808	#[simd_test(enable = "fma")]
809	unsafe fn test_mm_fnmsub_ss() {
810	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
811	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
812	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
813	let r = _mm_setr_ps(`-9.`, `2.`, `3.`, `4.`);
814	assert_eq_m128(_mm_fnmsub_ss(a, b, c), r);
815	}
816	}
817