fma.rs source code [crates/core_arch/src/x86/fma.rs]

1	//! Fused Multiply-Add instruction set (FMA)
2	//!
3	//! The FMA instruction set is an extension to the 128 and 256-bit SSE
4	//! instructions in the x86 microprocessor instruction set to perform fused
5	//! multiply–add (FMA) operations.
6	//!
7	//! The references are:
8	//!
9	//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
10	//! Instruction Set Reference, A-Z][intel64_ref].
11	//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
12	//! System Instructions][amd64_ref].
13	//!
14	//! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the
15	//! instructions available.
16	//!
17	//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
18	//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
19	//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21	use crate::core_arch::x86::*;
22	use crate::intrinsics::simd::simd_fma;
23
24	#[cfg(test)]
25	use stdarch_test::assert_instr;
26
27	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
28	/// and `b`, and add the intermediate result to packed elements in `c`.
29	///
30	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_pd)
31	#[inline]
32	#[target_feature(enable = "fma")]
33	#[cfg_attr(test, assert_instr(vfmadd))]
34	#[stable(feature = "simd_x86", since = "1.27.0")]
35	pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36	simd_fma(x:a, y:b, z:c)
37	}
38
39	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
40	/// and `b`, and add the intermediate result to packed elements in `c`.
41	///
42	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_pd)
43	#[inline]
44	#[target_feature(enable = "fma")]
45	#[cfg_attr(test, assert_instr(vfmadd))]
46	#[stable(feature = "simd_x86", since = "1.27.0")]
47	pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
48	simd_fma(x:a, y:b, z:c)
49	}
50
51	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
52	/// and `b`, and add the intermediate result to packed elements in `c`.
53	///
54	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ps)
55	#[inline]
56	#[target_feature(enable = "fma")]
57	#[cfg_attr(test, assert_instr(vfmadd))]
58	#[stable(feature = "simd_x86", since = "1.27.0")]
59	pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
60	simd_fma(x:a, y:b, z:c)
61	}
62
63	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
64	/// and `b`, and add the intermediate result to packed elements in `c`.
65	///
66	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_ps)
67	#[inline]
68	#[target_feature(enable = "fma")]
69	#[cfg_attr(test, assert_instr(vfmadd))]
70	#[stable(feature = "simd_x86", since = "1.27.0")]
71	pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
72	simd_fma(x:a, y:b, z:c)
73	}
74
75	/// Multiplies the lower double-precision (64-bit) floating-point elements in
76	/// `a` and `b`, and add the intermediate result to the lower element in `c`.
77	/// Stores the result in the lower element of the returned value, and copy the
78	/// upper element from `a` to the upper elements of the result.
79	///
80	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_sd)
81	#[inline]
82	#[target_feature(enable = "fma")]
83	#[cfg_attr(test, assert_instr(vfmadd))]
84	#[stable(feature = "simd_x86", since = "1.27.0")]
85	pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
86	vfmaddsd(a, b, c)
87	}
88
89	/// Multiplies the lower single-precision (32-bit) floating-point elements in
90	/// `a` and `b`, and add the intermediate result to the lower element in `c`.
91	/// Stores the result in the lower element of the returned value, and copy the
92	/// 3 upper elements from `a` to the upper elements of the result.
93	///
94	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ss)
95	#[inline]
96	#[target_feature(enable = "fma")]
97	#[cfg_attr(test, assert_instr(vfmadd))]
98	#[stable(feature = "simd_x86", since = "1.27.0")]
99	pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
100	vfmaddss(a, b, c)
101	}
102
103	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
104	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
105	/// the intermediate result.
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd)
108	#[inline]
109	#[target_feature(enable = "fma")]
110	#[cfg_attr(test, assert_instr(vfmaddsub))]
111	#[stable(feature = "simd_x86", since = "1.27.0")]
112	pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
113	vfmaddsubpd(a, b, c)
114	}
115
116	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
117	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
118	/// the intermediate result.
119	///
120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd)
121	#[inline]
122	#[target_feature(enable = "fma")]
123	#[cfg_attr(test, assert_instr(vfmaddsub))]
124	#[stable(feature = "simd_x86", since = "1.27.0")]
125	pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
126	vfmaddsubpd256(a, b, c)
127	}
128
129	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
130	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
131	/// the intermediate result.
132	///
133	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps)
134	#[inline]
135	#[target_feature(enable = "fma")]
136	#[cfg_attr(test, assert_instr(vfmaddsub))]
137	#[stable(feature = "simd_x86", since = "1.27.0")]
138	pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
139	vfmaddsubps(a, b, c)
140	}
141
142	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
143	/// and `b`, and alternatively add and subtract packed elements in `c` to/from
144	/// the intermediate result.
145	///
146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps)
147	#[inline]
148	#[target_feature(enable = "fma")]
149	#[cfg_attr(test, assert_instr(vfmaddsub))]
150	#[stable(feature = "simd_x86", since = "1.27.0")]
151	pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
152	vfmaddsubps256(a, b, c)
153	}
154
155	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
156	/// and `b`, and subtract packed elements in `c` from the intermediate result.
157	///
158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_pd)
159	#[inline]
160	#[target_feature(enable = "fma")]
161	#[cfg_attr(test, assert_instr(vfmsub))]
162	#[stable(feature = "simd_x86", since = "1.27.0")]
163	pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
164	vfmsubpd(a, b, c)
165	}
166
167	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
168	/// and `b`, and subtract packed elements in `c` from the intermediate result.
169	///
170	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_pd)
171	#[inline]
172	#[target_feature(enable = "fma")]
173	#[cfg_attr(test, assert_instr(vfmsub))]
174	#[stable(feature = "simd_x86", since = "1.27.0")]
175	pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
176	vfmsubpd256(a, b, c)
177	}
178
179	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
180	/// and `b`, and subtract packed elements in `c` from the intermediate result.
181	///
182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ps)
183	#[inline]
184	#[target_feature(enable = "fma")]
185	#[cfg_attr(test, assert_instr(vfmsub213ps))]
186	#[stable(feature = "simd_x86", since = "1.27.0")]
187	pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
188	vfmsubps(a, b, c)
189	}
190
191	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
192	/// and `b`, and subtract packed elements in `c` from the intermediate result.
193	///
194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_ps)
195	#[inline]
196	#[target_feature(enable = "fma")]
197	#[cfg_attr(test, assert_instr(vfmsub213ps))]
198	#[stable(feature = "simd_x86", since = "1.27.0")]
199	pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
200	vfmsubps256(a, b, c)
201	}
202
203	/// Multiplies the lower double-precision (64-bit) floating-point elements in
204	/// `a` and `b`, and subtract the lower element in `c` from the intermediate
205	/// result. Store the result in the lower element of the returned value, and
206	/// copy the upper element from `a` to the upper elements of the result.
207	///
208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_sd)
209	#[inline]
210	#[target_feature(enable = "fma")]
211	#[cfg_attr(test, assert_instr(vfmsub))]
212	#[stable(feature = "simd_x86", since = "1.27.0")]
213	pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
214	vfmsubsd(a, b, c)
215	}
216
217	/// Multiplies the lower single-precision (32-bit) floating-point elements in
218	/// `a` and `b`, and subtract the lower element in `c` from the intermediate
219	/// result. Store the result in the lower element of the returned value, and
220	/// copy the 3 upper elements from `a` to the upper elements of the result.
221	///
222	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ss)
223	#[inline]
224	#[target_feature(enable = "fma")]
225	#[cfg_attr(test, assert_instr(vfmsub))]
226	#[stable(feature = "simd_x86", since = "1.27.0")]
227	pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
228	vfmsubss(a, b, c)
229	}
230
231	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
232	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
233	/// the intermediate result.
234	///
235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd)
236	#[inline]
237	#[target_feature(enable = "fma")]
238	#[cfg_attr(test, assert_instr(vfmsubadd))]
239	#[stable(feature = "simd_x86", since = "1.27.0")]
240	pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
241	vfmsubaddpd(a, b, c)
242	}
243
244	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
245	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
246	/// the intermediate result.
247	///
248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd)
249	#[inline]
250	#[target_feature(enable = "fma")]
251	#[cfg_attr(test, assert_instr(vfmsubadd))]
252	#[stable(feature = "simd_x86", since = "1.27.0")]
253	pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
254	vfmsubaddpd256(a, b, c)
255	}
256
257	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
258	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
259	/// the intermediate result.
260	///
261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps)
262	#[inline]
263	#[target_feature(enable = "fma")]
264	#[cfg_attr(test, assert_instr(vfmsubadd))]
265	#[stable(feature = "simd_x86", since = "1.27.0")]
266	pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
267	vfmsubaddps(a, b, c)
268	}
269
270	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
271	/// and `b`, and alternatively subtract and add packed elements in `c` from/to
272	/// the intermediate result.
273	///
274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps)
275	#[inline]
276	#[target_feature(enable = "fma")]
277	#[cfg_attr(test, assert_instr(vfmsubadd))]
278	#[stable(feature = "simd_x86", since = "1.27.0")]
279	pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
280	vfmsubaddps256(a, b, c)
281	}
282
283	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
284	/// and `b`, and add the negated intermediate result to packed elements in `c`.
285	///
286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd)
287	#[inline]
288	#[target_feature(enable = "fma")]
289	#[cfg_attr(test, assert_instr(vfnmadd))]
290	#[stable(feature = "simd_x86", since = "1.27.0")]
291	pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
292	vfnmaddpd(a, b, c)
293	}
294
295	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
296	/// and `b`, and add the negated intermediate result to packed elements in `c`.
297	///
298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd)
299	#[inline]
300	#[target_feature(enable = "fma")]
301	#[cfg_attr(test, assert_instr(vfnmadd))]
302	#[stable(feature = "simd_x86", since = "1.27.0")]
303	pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
304	vfnmaddpd256(a, b, c)
305	}
306
307	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
308	/// and `b`, and add the negated intermediate result to packed elements in `c`.
309	///
310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps)
311	#[inline]
312	#[target_feature(enable = "fma")]
313	#[cfg_attr(test, assert_instr(vfnmadd))]
314	#[stable(feature = "simd_x86", since = "1.27.0")]
315	pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
316	vfnmaddps(a, b, c)
317	}
318
319	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
320	/// and `b`, and add the negated intermediate result to packed elements in `c`.
321	///
322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps)
323	#[inline]
324	#[target_feature(enable = "fma")]
325	#[cfg_attr(test, assert_instr(vfnmadd))]
326	#[stable(feature = "simd_x86", since = "1.27.0")]
327	pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
328	vfnmaddps256(a, b, c)
329	}
330
331	/// Multiplies the lower double-precision (64-bit) floating-point elements in
332	/// `a` and `b`, and add the negated intermediate result to the lower element
333	/// in `c`. Store the result in the lower element of the returned value, and
334	/// copy the upper element from `a` to the upper elements of the result.
335	///
336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_sd)
337	#[inline]
338	#[target_feature(enable = "fma")]
339	#[cfg_attr(test, assert_instr(vfnmadd))]
340	#[stable(feature = "simd_x86", since = "1.27.0")]
341	pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
342	vfnmaddsd(a, b, c)
343	}
344
345	/// Multiplies the lower single-precision (32-bit) floating-point elements in
346	/// `a` and `b`, and add the negated intermediate result to the lower element
347	/// in `c`. Store the result in the lower element of the returned value, and
348	/// copy the 3 upper elements from `a` to the upper elements of the result.
349	///
350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ss)
351	#[inline]
352	#[target_feature(enable = "fma")]
353	#[cfg_attr(test, assert_instr(vfnmadd))]
354	#[stable(feature = "simd_x86", since = "1.27.0")]
355	pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
356	vfnmaddss(a, b, c)
357	}
358
359	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
360	/// and `b`, and subtract packed elements in `c` from the negated intermediate
361	/// result.
362	///
363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_pd)
364	#[inline]
365	#[target_feature(enable = "fma")]
366	#[cfg_attr(test, assert_instr(vfnmsub))]
367	#[stable(feature = "simd_x86", since = "1.27.0")]
368	pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
369	vfnmsubpd(a, b, c)
370	}
371
372	/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
373	/// and `b`, and subtract packed elements in `c` from the negated intermediate
374	/// result.
375	///
376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_pd)
377	#[inline]
378	#[target_feature(enable = "fma")]
379	#[cfg_attr(test, assert_instr(vfnmsub))]
380	#[stable(feature = "simd_x86", since = "1.27.0")]
381	pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
382	vfnmsubpd256(a, b, c)
383	}
384
385	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
386	/// and `b`, and subtract packed elements in `c` from the negated intermediate
387	/// result.
388	///
389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ps)
390	#[inline]
391	#[target_feature(enable = "fma")]
392	#[cfg_attr(test, assert_instr(vfnmsub))]
393	#[stable(feature = "simd_x86", since = "1.27.0")]
394	pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
395	vfnmsubps(a, b, c)
396	}
397
398	/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
399	/// and `b`, and subtract packed elements in `c` from the negated intermediate
400	/// result.
401	///
402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_ps)
403	#[inline]
404	#[target_feature(enable = "fma")]
405	#[cfg_attr(test, assert_instr(vfnmsub))]
406	#[stable(feature = "simd_x86", since = "1.27.0")]
407	pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
408	vfnmsubps256(a, b, c)
409	}
410
411	/// Multiplies the lower double-precision (64-bit) floating-point elements in
412	/// `a` and `b`, and subtract packed elements in `c` from the negated
413	/// intermediate result. Store the result in the lower element of the returned
414	/// value, and copy the upper element from `a` to the upper elements of the
415	/// result.
416	///
417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_sd)
418	#[inline]
419	#[target_feature(enable = "fma")]
420	#[cfg_attr(test, assert_instr(vfnmsub))]
421	#[stable(feature = "simd_x86", since = "1.27.0")]
422	pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
423	vfnmsubsd(a, b, c)
424	}
425
426	/// Multiplies the lower single-precision (32-bit) floating-point elements in
427	/// `a` and `b`, and subtract packed elements in `c` from the negated
428	/// intermediate result. Store the result in the lower element of the
429	/// returned value, and copy the 3 upper elements from `a` to the upper
430	/// elements of the result.
431	///
432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ss)
433	#[inline]
434	#[target_feature(enable = "fma")]
435	#[cfg_attr(test, assert_instr(vfnmsub))]
436	#[stable(feature = "simd_x86", since = "1.27.0")]
437	pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
438	vfnmsubss(a, b, c)
439	}
440
441	#[allow(improper_ctypes)]
442	extern "C" {
443	#[link_name = "llvm.x86.fma.vfmadd.sd"]
444	fn vfmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
445	#[link_name = "llvm.x86.fma.vfmadd.ss"]
446	fn vfmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
447	#[link_name = "llvm.x86.fma.vfmaddsub.pd"]
448	fn vfmaddsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
449	#[link_name = "llvm.x86.fma.vfmaddsub.pd.256"]
450	fn vfmaddsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
451	#[link_name = "llvm.x86.fma.vfmaddsub.ps"]
452	fn vfmaddsubps(a: __m128, b: __m128, c: __m128) -> __m128;
453	#[link_name = "llvm.x86.fma.vfmaddsub.ps.256"]
454	fn vfmaddsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
455	#[link_name = "llvm.x86.fma.vfmsub.pd"]
456	fn vfmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
457	#[link_name = "llvm.x86.fma.vfmsub.pd.256"]
458	fn vfmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
459	#[link_name = "llvm.x86.fma.vfmsub.ps"]
460	fn vfmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
461	#[link_name = "llvm.x86.fma.vfmsub.ps.256"]
462	fn vfmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
463	#[link_name = "llvm.x86.fma.vfmsub.sd"]
464	fn vfmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
465	#[link_name = "llvm.x86.fma.vfmsub.ss"]
466	fn vfmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
467	#[link_name = "llvm.x86.fma.vfmsubadd.pd"]
468	fn vfmsubaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
469	#[link_name = "llvm.x86.fma.vfmsubadd.pd.256"]
470	fn vfmsubaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
471	#[link_name = "llvm.x86.fma.vfmsubadd.ps"]
472	fn vfmsubaddps(a: __m128, b: __m128, c: __m128) -> __m128;
473	#[link_name = "llvm.x86.fma.vfmsubadd.ps.256"]
474	fn vfmsubaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
475	#[link_name = "llvm.x86.fma.vfnmadd.pd"]
476	fn vfnmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
477	#[link_name = "llvm.x86.fma.vfnmadd.pd.256"]
478	fn vfnmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
479	#[link_name = "llvm.x86.fma.vfnmadd.ps"]
480	fn vfnmaddps(a: __m128, b: __m128, c: __m128) -> __m128;
481	#[link_name = "llvm.x86.fma.vfnmadd.ps.256"]
482	fn vfnmaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
483	#[link_name = "llvm.x86.fma.vfnmadd.sd"]
484	fn vfnmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
485	#[link_name = "llvm.x86.fma.vfnmadd.ss"]
486	fn vfnmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
487	#[link_name = "llvm.x86.fma.vfnmsub.pd"]
488	fn vfnmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
489	#[link_name = "llvm.x86.fma.vfnmsub.pd.256"]
490	fn vfnmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
491	#[link_name = "llvm.x86.fma.vfnmsub.ps"]
492	fn vfnmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
493	#[link_name = "llvm.x86.fma.vfnmsub.ps.256"]
494	fn vfnmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
495	#[link_name = "llvm.x86.fma.vfnmsub.sd"]
496	fn vfnmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
497	#[link_name = "llvm.x86.fma.vfnmsub.ss"]
498	fn vfnmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
499	}
500
501	#[cfg(test)]
502	mod tests {
503
504	use stdarch_test::simd_test;
505
506	use crate::core_arch::x86::*;
507
508	#[simd_test(enable = "fma")]
509	unsafe fn test_mm_fmadd_pd() {
510	let a = _mm_setr_pd(`1.`, `2.`);
511	let b = _mm_setr_pd(`5.`, `3.`);
512	let c = _mm_setr_pd(`4.`, `9.`);
513	let r = _mm_setr_pd(`9.`, `15.`);
514	assert_eq_m128d(_mm_fmadd_pd(a, b, c), r);
515	}
516
517	#[simd_test(enable = "fma")]
518	unsafe fn test_mm256_fmadd_pd() {
519	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
520	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
521	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
522	let r = _mm256_setr_pd(`9.`, `15.`, `22.`, `15.`);
523	assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r);
524	}
525
526	#[simd_test(enable = "fma")]
527	unsafe fn test_mm_fmadd_ps() {
528	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
529	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
530	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
531	let r = _mm_setr_ps(`9.`, `15.`, `22.`, `15.`);
532	assert_eq_m128(_mm_fmadd_ps(a, b, c), r);
533	}
534
535	#[simd_test(enable = "fma")]
536	unsafe fn test_mm256_fmadd_ps() {
537	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
538	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
539	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
540	let r = _mm256_setr_ps(`9.`, `15.`, `22.`, `15.`, `-5.`, `-49.`, `-2.`, `-31.`);
541	assert_eq_m256(_mm256_fmadd_ps(a, b, c), r);
542	}
543
544	#[simd_test(enable = "fma")]
545	unsafe fn test_mm_fmadd_sd() {
546	let a = _mm_setr_pd(`1.`, `2.`);
547	let b = _mm_setr_pd(`5.`, `3.`);
548	let c = _mm_setr_pd(`4.`, `9.`);
549	let r = _mm_setr_pd(`9.`, `2.`);
550	assert_eq_m128d(_mm_fmadd_sd(a, b, c), r);
551	}
552
553	#[simd_test(enable = "fma")]
554	unsafe fn test_mm_fmadd_ss() {
555	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
556	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
557	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
558	let r = _mm_setr_ps(`9.`, `2.`, `3.`, `4.`);
559	assert_eq_m128(_mm_fmadd_ss(a, b, c), r);
560	}
561
562	#[simd_test(enable = "fma")]
563	unsafe fn test_mm_fmaddsub_pd() {
564	let a = _mm_setr_pd(`1.`, `2.`);
565	let b = _mm_setr_pd(`5.`, `3.`);
566	let c = _mm_setr_pd(`4.`, `9.`);
567	let r = _mm_setr_pd(`1.`, `15.`);
568	assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r);
569	}
570
571	#[simd_test(enable = "fma")]
572	unsafe fn test_mm256_fmaddsub_pd() {
573	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
574	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
575	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
576	let r = _mm256_setr_pd(`1.`, `15.`, `20.`, `15.`);
577	assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r);
578	}
579
580	#[simd_test(enable = "fma")]
581	unsafe fn test_mm_fmaddsub_ps() {
582	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
583	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
584	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
585	let r = _mm_setr_ps(`1.`, `15.`, `20.`, `15.`);
586	assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r);
587	}
588
589	#[simd_test(enable = "fma")]
590	unsafe fn test_mm256_fmaddsub_ps() {
591	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
592	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
593	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
594	let r = _mm256_setr_ps(`1.`, `15.`, `20.`, `15.`, `5.`, `-49.`, `2.`, `-31.`);
595	assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r);
596	}
597
598	#[simd_test(enable = "fma")]
599	unsafe fn test_mm_fmsub_pd() {
600	let a = _mm_setr_pd(`1.`, `2.`);
601	let b = _mm_setr_pd(`5.`, `3.`);
602	let c = _mm_setr_pd(`4.`, `9.`);
603	let r = _mm_setr_pd(`1.`, `-3.`);
604	assert_eq_m128d(_mm_fmsub_pd(a, b, c), r);
605	}
606
607	#[simd_test(enable = "fma")]
608	unsafe fn test_mm256_fmsub_pd() {
609	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
610	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
611	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
612	let r = _mm256_setr_pd(`1.`, `-3.`, `20.`, `1.`);
613	assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r);
614	}
615
616	#[simd_test(enable = "fma")]
617	unsafe fn test_mm_fmsub_ps() {
618	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
619	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
620	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
621	let r = _mm_setr_ps(`1.`, `-3.`, `20.`, `1.`);
622	assert_eq_m128(_mm_fmsub_ps(a, b, c), r);
623	}
624
625	#[simd_test(enable = "fma")]
626	unsafe fn test_mm256_fmsub_ps() {
627	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
628	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
629	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
630	let r = _mm256_setr_ps(`1.`, `-3.`, `20.`, `1.`, `5.`, `-71.`, `2.`, `-25.`);
631	assert_eq_m256(_mm256_fmsub_ps(a, b, c), r);
632	}
633
634	#[simd_test(enable = "fma")]
635	unsafe fn test_mm_fmsub_sd() {
636	let a = _mm_setr_pd(`1.`, `2.`);
637	let b = _mm_setr_pd(`5.`, `3.`);
638	let c = _mm_setr_pd(`4.`, `9.`);
639	let r = _mm_setr_pd(`1.`, `2.`);
640	assert_eq_m128d(_mm_fmsub_sd(a, b, c), r);
641	}
642
643	#[simd_test(enable = "fma")]
644	unsafe fn test_mm_fmsub_ss() {
645	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
646	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
647	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
648	let r = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
649	assert_eq_m128(_mm_fmsub_ss(a, b, c), r);
650	}
651
652	#[simd_test(enable = "fma")]
653	unsafe fn test_mm_fmsubadd_pd() {
654	let a = _mm_setr_pd(`1.`, `2.`);
655	let b = _mm_setr_pd(`5.`, `3.`);
656	let c = _mm_setr_pd(`4.`, `9.`);
657	let r = _mm_setr_pd(`9.`, `-3.`);
658	assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r);
659	}
660
661	#[simd_test(enable = "fma")]
662	unsafe fn test_mm256_fmsubadd_pd() {
663	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
664	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
665	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
666	let r = _mm256_setr_pd(`9.`, `-3.`, `22.`, `1.`);
667	assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r);
668	}
669
670	#[simd_test(enable = "fma")]
671	unsafe fn test_mm_fmsubadd_ps() {
672	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
673	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
674	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
675	let r = _mm_setr_ps(`9.`, `-3.`, `22.`, `1.`);
676	assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r);
677	}
678
679	#[simd_test(enable = "fma")]
680	unsafe fn test_mm256_fmsubadd_ps() {
681	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
682	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
683	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
684	let r = _mm256_setr_ps(`9.`, `-3.`, `22.`, `1.`, `-5.`, `-71.`, `-2.`, `-25.`);
685	assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r);
686	}
687
688	#[simd_test(enable = "fma")]
689	unsafe fn test_mm_fnmadd_pd() {
690	let a = _mm_setr_pd(`1.`, `2.`);
691	let b = _mm_setr_pd(`5.`, `3.`);
692	let c = _mm_setr_pd(`4.`, `9.`);
693	let r = _mm_setr_pd(`-1.`, `3.`);
694	assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r);
695	}
696
697	#[simd_test(enable = "fma")]
698	unsafe fn test_mm256_fnmadd_pd() {
699	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
700	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
701	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
702	let r = _mm256_setr_pd(`-1.`, `3.`, `-20.`, `-1.`);
703	assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r);
704	}
705
706	#[simd_test(enable = "fma")]
707	unsafe fn test_mm_fnmadd_ps() {
708	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
709	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
710	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
711	let r = _mm_setr_ps(`-1.`, `3.`, `-20.`, `-1.`);
712	assert_eq_m128(_mm_fnmadd_ps(a, b, c), r);
713	}
714
715	#[simd_test(enable = "fma")]
716	unsafe fn test_mm256_fnmadd_ps() {
717	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
718	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
719	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
720	let r = _mm256_setr_ps(`-1.`, `3.`, `-20.`, `-1.`, `-5.`, `71.`, `-2.`, `25.`);
721	assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r);
722	}
723
724	#[simd_test(enable = "fma")]
725	unsafe fn test_mm_fnmadd_sd() {
726	let a = _mm_setr_pd(`1.`, `2.`);
727	let b = _mm_setr_pd(`5.`, `3.`);
728	let c = _mm_setr_pd(`4.`, `9.`);
729	let r = _mm_setr_pd(`-1.`, `2.`);
730	assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r);
731	}
732
733	#[simd_test(enable = "fma")]
734	unsafe fn test_mm_fnmadd_ss() {
735	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
736	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
737	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
738	let r = _mm_setr_ps(`-1.`, `2.`, `3.`, `4.`);
739	assert_eq_m128(_mm_fnmadd_ss(a, b, c), r);
740	}
741
742	#[simd_test(enable = "fma")]
743	unsafe fn test_mm_fnmsub_pd() {
744	let a = _mm_setr_pd(`1.`, `2.`);
745	let b = _mm_setr_pd(`5.`, `3.`);
746	let c = _mm_setr_pd(`4.`, `9.`);
747	let r = _mm_setr_pd(`-9.`, `-15.`);
748	assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r);
749	}
750
751	#[simd_test(enable = "fma")]
752	unsafe fn test_mm256_fnmsub_pd() {
753	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
754	let b = _mm256_setr_pd(`5.`, `3.`, `7.`, `2.`);
755	let c = _mm256_setr_pd(`4.`, `9.`, `1.`, `7.`);
756	let r = _mm256_setr_pd(`-9.`, `-15.`, `-22.`, `-15.`);
757	assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r);
758	}
759
760	#[simd_test(enable = "fma")]
761	unsafe fn test_mm_fnmsub_ps() {
762	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
763	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
764	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
765	let r = _mm_setr_ps(`-9.`, `-15.`, `-22.`, `-15.`);
766	assert_eq_m128(_mm_fnmsub_ps(a, b, c), r);
767	}
768
769	#[simd_test(enable = "fma")]
770	unsafe fn test_mm256_fnmsub_ps() {
771	let a = _mm256_setr_ps(`1.`, `2.`, `3.`, `4.`, `0.`, `10.`, `-1.`, `-2.`);
772	let b = _mm256_setr_ps(`5.`, `3.`, `7.`, `2.`, `4.`, `-6.`, `0.`, `14.`);
773	let c = _mm256_setr_ps(`4.`, `9.`, `1.`, `7.`, `-5.`, `11.`, `-2.`, `-3.`);
774	let r = _mm256_setr_ps(`-9.`, `-15.`, `-22.`, `-15.`, `5.`, `49.`, `2.`, `31.`);
775	assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r);
776	}
777
778	#[simd_test(enable = "fma")]
779	unsafe fn test_mm_fnmsub_sd() {
780	let a = _mm_setr_pd(`1.`, `2.`);
781	let b = _mm_setr_pd(`5.`, `3.`);
782	let c = _mm_setr_pd(`4.`, `9.`);
783	let r = _mm_setr_pd(`-9.`, `2.`);
784	assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r);
785	}
786
787	#[simd_test(enable = "fma")]
788	unsafe fn test_mm_fnmsub_ss() {
789	let a = _mm_setr_ps(`1.`, `2.`, `3.`, `4.`);
790	let b = _mm_setr_ps(`5.`, `3.`, `7.`, `2.`);
791	let c = _mm_setr_ps(`4.`, `9.`, `1.`, `7.`);
792	let r = _mm_setr_ps(`-9.`, `2.`, `3.`, `4.`);
793	assert_eq_m128(_mm_fnmsub_ss(a, b, c), r);
794	}
795	}
796