1//! Fused Multiply-Add instruction set (FMA)
2//!
3//! The FMA instruction set is an extension to the 128 and 256-bit SSE
4//! instructions in the x86 microprocessor instruction set to perform fused
5//! multiply–add (FMA) operations.
6//!
7//! The references are:
8//!
9//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
10//! Instruction Set Reference, A-Z][intel64_ref].
11//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
12//! System Instructions][amd64_ref].
13//!
14//! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the
15//! instructions available.
16//!
17//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
18//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
19//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21use crate::core_arch::simd_llvm::simd_fma;
22use crate::core_arch::x86::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
28/// and `b`, and add the intermediate result to packed elements in `c`.
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_pd)
31#[inline]
32#[target_feature(enable = "fma")]
33#[cfg_attr(test, assert_instr(vfmadd))]
34#[stable(feature = "simd_x86", since = "1.27.0")]
35pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36 simd_fma(a, b, c)
37}
38
39/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
40/// and `b`, and add the intermediate result to packed elements in `c`.
41///
42/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_pd)
43#[inline]
44#[target_feature(enable = "fma")]
45#[cfg_attr(test, assert_instr(vfmadd))]
46#[stable(feature = "simd_x86", since = "1.27.0")]
47pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
48 simd_fma(a, b, c)
49}
50
51/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
52/// and `b`, and add the intermediate result to packed elements in `c`.
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ps)
55#[inline]
56#[target_feature(enable = "fma")]
57#[cfg_attr(test, assert_instr(vfmadd))]
58#[stable(feature = "simd_x86", since = "1.27.0")]
59pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
60 simd_fma(a, b, c)
61}
62
63/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
64/// and `b`, and add the intermediate result to packed elements in `c`.
65///
66/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmadd_ps)
67#[inline]
68#[target_feature(enable = "fma")]
69#[cfg_attr(test, assert_instr(vfmadd))]
70#[stable(feature = "simd_x86", since = "1.27.0")]
71pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
72 simd_fma(a, b, c)
73}
74
75/// Multiplies the lower double-precision (64-bit) floating-point elements in
76/// `a` and `b`, and add the intermediate result to the lower element in `c`.
77/// Stores the result in the lower element of the returned value, and copy the
78/// upper element from `a` to the upper elements of the result.
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_sd)
81#[inline]
82#[target_feature(enable = "fma")]
83#[cfg_attr(test, assert_instr(vfmadd))]
84#[stable(feature = "simd_x86", since = "1.27.0")]
85pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
86 vfmaddsd(a, b, c)
87}
88
89/// Multiplies the lower single-precision (32-bit) floating-point elements in
90/// `a` and `b`, and add the intermediate result to the lower element in `c`.
91/// Stores the result in the lower element of the returned value, and copy the
92/// 3 upper elements from `a` to the upper elements of the result.
93///
94/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmadd_ss)
95#[inline]
96#[target_feature(enable = "fma")]
97#[cfg_attr(test, assert_instr(vfmadd))]
98#[stable(feature = "simd_x86", since = "1.27.0")]
99pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
100 vfmaddss(a, b, c)
101}
102
103/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
104/// and `b`, and alternatively add and subtract packed elements in `c` to/from
105/// the intermediate result.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd)
108#[inline]
109#[target_feature(enable = "fma")]
110#[cfg_attr(test, assert_instr(vfmaddsub))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
113 vfmaddsubpd(a, b, c)
114}
115
116/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
117/// and `b`, and alternatively add and subtract packed elements in `c` to/from
118/// the intermediate result.
119///
120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd)
121#[inline]
122#[target_feature(enable = "fma")]
123#[cfg_attr(test, assert_instr(vfmaddsub))]
124#[stable(feature = "simd_x86", since = "1.27.0")]
125pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
126 vfmaddsubpd256(a, b, c)
127}
128
129/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
130/// and `b`, and alternatively add and subtract packed elements in `c` to/from
131/// the intermediate result.
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps)
134#[inline]
135#[target_feature(enable = "fma")]
136#[cfg_attr(test, assert_instr(vfmaddsub))]
137#[stable(feature = "simd_x86", since = "1.27.0")]
138pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
139 vfmaddsubps(a, b, c)
140}
141
142/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
143/// and `b`, and alternatively add and subtract packed elements in `c` to/from
144/// the intermediate result.
145///
146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps)
147#[inline]
148#[target_feature(enable = "fma")]
149#[cfg_attr(test, assert_instr(vfmaddsub))]
150#[stable(feature = "simd_x86", since = "1.27.0")]
151pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
152 vfmaddsubps256(a, b, c)
153}
154
155/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
156/// and `b`, and subtract packed elements in `c` from the intermediate result.
157///
158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_pd)
159#[inline]
160#[target_feature(enable = "fma")]
161#[cfg_attr(test, assert_instr(vfmsub))]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
164 vfmsubpd(a, b, c)
165}
166
167/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
168/// and `b`, and subtract packed elements in `c` from the intermediate result.
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_pd)
171#[inline]
172#[target_feature(enable = "fma")]
173#[cfg_attr(test, assert_instr(vfmsub))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
176 vfmsubpd256(a, b, c)
177}
178
179/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
180/// and `b`, and subtract packed elements in `c` from the intermediate result.
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ps)
183#[inline]
184#[target_feature(enable = "fma")]
185#[cfg_attr(test, assert_instr(vfmsub213ps))]
186#[stable(feature = "simd_x86", since = "1.27.0")]
187pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
188 vfmsubps(a, b, c)
189}
190
191/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
192/// and `b`, and subtract packed elements in `c` from the intermediate result.
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsub_ps)
195#[inline]
196#[target_feature(enable = "fma")]
197#[cfg_attr(test, assert_instr(vfmsub213ps))]
198#[stable(feature = "simd_x86", since = "1.27.0")]
199pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
200 vfmsubps256(a, b, c)
201}
202
203/// Multiplies the lower double-precision (64-bit) floating-point elements in
204/// `a` and `b`, and subtract the lower element in `c` from the intermediate
205/// result. Store the result in the lower element of the returned value, and
206/// copy the upper element from `a` to the upper elements of the result.
207///
208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_sd)
209#[inline]
210#[target_feature(enable = "fma")]
211#[cfg_attr(test, assert_instr(vfmsub))]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
214 vfmsubsd(a, b, c)
215}
216
217/// Multiplies the lower single-precision (32-bit) floating-point elements in
218/// `a` and `b`, and subtract the lower element in `c` from the intermediate
219/// result. Store the result in the lower element of the returned value, and
220/// copy the 3 upper elements from `a` to the upper elements of the result.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsub_ss)
223#[inline]
224#[target_feature(enable = "fma")]
225#[cfg_attr(test, assert_instr(vfmsub))]
226#[stable(feature = "simd_x86", since = "1.27.0")]
227pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
228 vfmsubss(a, b, c)
229}
230
231/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
232/// and `b`, and alternatively subtract and add packed elements in `c` from/to
233/// the intermediate result.
234///
235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd)
236#[inline]
237#[target_feature(enable = "fma")]
238#[cfg_attr(test, assert_instr(vfmsubadd))]
239#[stable(feature = "simd_x86", since = "1.27.0")]
240pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
241 vfmsubaddpd(a, b, c)
242}
243
244/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
245/// and `b`, and alternatively subtract and add packed elements in `c` from/to
246/// the intermediate result.
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd)
249#[inline]
250#[target_feature(enable = "fma")]
251#[cfg_attr(test, assert_instr(vfmsubadd))]
252#[stable(feature = "simd_x86", since = "1.27.0")]
253pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
254 vfmsubaddpd256(a, b, c)
255}
256
257/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
258/// and `b`, and alternatively subtract and add packed elements in `c` from/to
259/// the intermediate result.
260///
261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps)
262#[inline]
263#[target_feature(enable = "fma")]
264#[cfg_attr(test, assert_instr(vfmsubadd))]
265#[stable(feature = "simd_x86", since = "1.27.0")]
266pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
267 vfmsubaddps(a, b, c)
268}
269
270/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
271/// and `b`, and alternatively subtract and add packed elements in `c` from/to
272/// the intermediate result.
273///
274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps)
275#[inline]
276#[target_feature(enable = "fma")]
277#[cfg_attr(test, assert_instr(vfmsubadd))]
278#[stable(feature = "simd_x86", since = "1.27.0")]
279pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
280 vfmsubaddps256(a, b, c)
281}
282
283/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
284/// and `b`, and add the negated intermediate result to packed elements in `c`.
285///
286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd)
287#[inline]
288#[target_feature(enable = "fma")]
289#[cfg_attr(test, assert_instr(vfnmadd))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
292 vfnmaddpd(a, b, c)
293}
294
295/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
296/// and `b`, and add the negated intermediate result to packed elements in `c`.
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd)
299#[inline]
300#[target_feature(enable = "fma")]
301#[cfg_attr(test, assert_instr(vfnmadd))]
302#[stable(feature = "simd_x86", since = "1.27.0")]
303pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
304 vfnmaddpd256(a, b, c)
305}
306
307/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
308/// and `b`, and add the negated intermediate result to packed elements in `c`.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps)
311#[inline]
312#[target_feature(enable = "fma")]
313#[cfg_attr(test, assert_instr(vfnmadd))]
314#[stable(feature = "simd_x86", since = "1.27.0")]
315pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
316 vfnmaddps(a, b, c)
317}
318
319/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
320/// and `b`, and add the negated intermediate result to packed elements in `c`.
321///
322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps)
323#[inline]
324#[target_feature(enable = "fma")]
325#[cfg_attr(test, assert_instr(vfnmadd))]
326#[stable(feature = "simd_x86", since = "1.27.0")]
327pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
328 vfnmaddps256(a, b, c)
329}
330
331/// Multiplies the lower double-precision (64-bit) floating-point elements in
332/// `a` and `b`, and add the negated intermediate result to the lower element
333/// in `c`. Store the result in the lower element of the returned value, and
334/// copy the upper element from `a` to the upper elements of the result.
335///
336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_sd)
337#[inline]
338#[target_feature(enable = "fma")]
339#[cfg_attr(test, assert_instr(vfnmadd))]
340#[stable(feature = "simd_x86", since = "1.27.0")]
341pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
342 vfnmaddsd(a, b, c)
343}
344
345/// Multiplies the lower single-precision (32-bit) floating-point elements in
346/// `a` and `b`, and add the negated intermediate result to the lower element
347/// in `c`. Store the result in the lower element of the returned value, and
348/// copy the 3 upper elements from `a` to the upper elements of the result.
349///
350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ss)
351#[inline]
352#[target_feature(enable = "fma")]
353#[cfg_attr(test, assert_instr(vfnmadd))]
354#[stable(feature = "simd_x86", since = "1.27.0")]
355pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
356 vfnmaddss(a, b, c)
357}
358
359/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
360/// and `b`, and subtract packed elements in `c` from the negated intermediate
361/// result.
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_pd)
364#[inline]
365#[target_feature(enable = "fma")]
366#[cfg_attr(test, assert_instr(vfnmsub))]
367#[stable(feature = "simd_x86", since = "1.27.0")]
368pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
369 vfnmsubpd(a, b, c)
370}
371
372/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
373/// and `b`, and subtract packed elements in `c` from the negated intermediate
374/// result.
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_pd)
377#[inline]
378#[target_feature(enable = "fma")]
379#[cfg_attr(test, assert_instr(vfnmsub))]
380#[stable(feature = "simd_x86", since = "1.27.0")]
381pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
382 vfnmsubpd256(a, b, c)
383}
384
385/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
386/// and `b`, and subtract packed elements in `c` from the negated intermediate
387/// result.
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ps)
390#[inline]
391#[target_feature(enable = "fma")]
392#[cfg_attr(test, assert_instr(vfnmsub))]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
395 vfnmsubps(a, b, c)
396}
397
398/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
399/// and `b`, and subtract packed elements in `c` from the negated intermediate
400/// result.
401///
402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmsub_ps)
403#[inline]
404#[target_feature(enable = "fma")]
405#[cfg_attr(test, assert_instr(vfnmsub))]
406#[stable(feature = "simd_x86", since = "1.27.0")]
407pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
408 vfnmsubps256(a, b, c)
409}
410
411/// Multiplies the lower double-precision (64-bit) floating-point elements in
412/// `a` and `b`, and subtract packed elements in `c` from the negated
413/// intermediate result. Store the result in the lower element of the returned
414/// value, and copy the upper element from `a` to the upper elements of the
415/// result.
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_sd)
418#[inline]
419#[target_feature(enable = "fma")]
420#[cfg_attr(test, assert_instr(vfnmsub))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
423 vfnmsubsd(a, b, c)
424}
425
426/// Multiplies the lower single-precision (32-bit) floating-point elements in
427/// `a` and `b`, and subtract packed elements in `c` from the negated
428/// intermediate result. Store the result in the lower element of the
429/// returned value, and copy the 3 upper elements from `a` to the upper
430/// elements of the result.
431///
432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmsub_ss)
433#[inline]
434#[target_feature(enable = "fma")]
435#[cfg_attr(test, assert_instr(vfnmsub))]
436#[stable(feature = "simd_x86", since = "1.27.0")]
437pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
438 vfnmsubss(a, b, c)
439}
440
441#[allow(improper_ctypes)]
442extern "C" {
443 #[link_name = "llvm.x86.fma.vfmadd.sd"]
444 fn vfmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
445 #[link_name = "llvm.x86.fma.vfmadd.ss"]
446 fn vfmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
447 #[link_name = "llvm.x86.fma.vfmaddsub.pd"]
448 fn vfmaddsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
449 #[link_name = "llvm.x86.fma.vfmaddsub.pd.256"]
450 fn vfmaddsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
451 #[link_name = "llvm.x86.fma.vfmaddsub.ps"]
452 fn vfmaddsubps(a: __m128, b: __m128, c: __m128) -> __m128;
453 #[link_name = "llvm.x86.fma.vfmaddsub.ps.256"]
454 fn vfmaddsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
455 #[link_name = "llvm.x86.fma.vfmsub.pd"]
456 fn vfmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
457 #[link_name = "llvm.x86.fma.vfmsub.pd.256"]
458 fn vfmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
459 #[link_name = "llvm.x86.fma.vfmsub.ps"]
460 fn vfmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
461 #[link_name = "llvm.x86.fma.vfmsub.ps.256"]
462 fn vfmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
463 #[link_name = "llvm.x86.fma.vfmsub.sd"]
464 fn vfmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
465 #[link_name = "llvm.x86.fma.vfmsub.ss"]
466 fn vfmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
467 #[link_name = "llvm.x86.fma.vfmsubadd.pd"]
468 fn vfmsubaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
469 #[link_name = "llvm.x86.fma.vfmsubadd.pd.256"]
470 fn vfmsubaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
471 #[link_name = "llvm.x86.fma.vfmsubadd.ps"]
472 fn vfmsubaddps(a: __m128, b: __m128, c: __m128) -> __m128;
473 #[link_name = "llvm.x86.fma.vfmsubadd.ps.256"]
474 fn vfmsubaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
475 #[link_name = "llvm.x86.fma.vfnmadd.pd"]
476 fn vfnmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
477 #[link_name = "llvm.x86.fma.vfnmadd.pd.256"]
478 fn vfnmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
479 #[link_name = "llvm.x86.fma.vfnmadd.ps"]
480 fn vfnmaddps(a: __m128, b: __m128, c: __m128) -> __m128;
481 #[link_name = "llvm.x86.fma.vfnmadd.ps.256"]
482 fn vfnmaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
483 #[link_name = "llvm.x86.fma.vfnmadd.sd"]
484 fn vfnmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
485 #[link_name = "llvm.x86.fma.vfnmadd.ss"]
486 fn vfnmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
487 #[link_name = "llvm.x86.fma.vfnmsub.pd"]
488 fn vfnmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
489 #[link_name = "llvm.x86.fma.vfnmsub.pd.256"]
490 fn vfnmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
491 #[link_name = "llvm.x86.fma.vfnmsub.ps"]
492 fn vfnmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
493 #[link_name = "llvm.x86.fma.vfnmsub.ps.256"]
494 fn vfnmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
495 #[link_name = "llvm.x86.fma.vfnmsub.sd"]
496 fn vfnmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
497 #[link_name = "llvm.x86.fma.vfnmsub.ss"]
498 fn vfnmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
499}
500
501#[cfg(test)]
502mod tests {
503
504 use stdarch_test::simd_test;
505
506 use crate::core_arch::x86::*;
507
508 #[simd_test(enable = "fma")]
509 unsafe fn test_mm_fmadd_pd() {
510 let a = _mm_setr_pd(1., 2.);
511 let b = _mm_setr_pd(5., 3.);
512 let c = _mm_setr_pd(4., 9.);
513 let r = _mm_setr_pd(9., 15.);
514 assert_eq_m128d(_mm_fmadd_pd(a, b, c), r);
515 }
516
517 #[simd_test(enable = "fma")]
518 unsafe fn test_mm256_fmadd_pd() {
519 let a = _mm256_setr_pd(1., 2., 3., 4.);
520 let b = _mm256_setr_pd(5., 3., 7., 2.);
521 let c = _mm256_setr_pd(4., 9., 1., 7.);
522 let r = _mm256_setr_pd(9., 15., 22., 15.);
523 assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r);
524 }
525
526 #[simd_test(enable = "fma")]
527 unsafe fn test_mm_fmadd_ps() {
528 let a = _mm_setr_ps(1., 2., 3., 4.);
529 let b = _mm_setr_ps(5., 3., 7., 2.);
530 let c = _mm_setr_ps(4., 9., 1., 7.);
531 let r = _mm_setr_ps(9., 15., 22., 15.);
532 assert_eq_m128(_mm_fmadd_ps(a, b, c), r);
533 }
534
535 #[simd_test(enable = "fma")]
536 unsafe fn test_mm256_fmadd_ps() {
537 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
538 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
539 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
540 let r = _mm256_setr_ps(9., 15., 22., 15., -5., -49., -2., -31.);
541 assert_eq_m256(_mm256_fmadd_ps(a, b, c), r);
542 }
543
544 #[simd_test(enable = "fma")]
545 unsafe fn test_mm_fmadd_sd() {
546 let a = _mm_setr_pd(1., 2.);
547 let b = _mm_setr_pd(5., 3.);
548 let c = _mm_setr_pd(4., 9.);
549 let r = _mm_setr_pd(9., 2.);
550 assert_eq_m128d(_mm_fmadd_sd(a, b, c), r);
551 }
552
553 #[simd_test(enable = "fma")]
554 unsafe fn test_mm_fmadd_ss() {
555 let a = _mm_setr_ps(1., 2., 3., 4.);
556 let b = _mm_setr_ps(5., 3., 7., 2.);
557 let c = _mm_setr_ps(4., 9., 1., 7.);
558 let r = _mm_setr_ps(9., 2., 3., 4.);
559 assert_eq_m128(_mm_fmadd_ss(a, b, c), r);
560 }
561
562 #[simd_test(enable = "fma")]
563 unsafe fn test_mm_fmaddsub_pd() {
564 let a = _mm_setr_pd(1., 2.);
565 let b = _mm_setr_pd(5., 3.);
566 let c = _mm_setr_pd(4., 9.);
567 let r = _mm_setr_pd(1., 15.);
568 assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r);
569 }
570
571 #[simd_test(enable = "fma")]
572 unsafe fn test_mm256_fmaddsub_pd() {
573 let a = _mm256_setr_pd(1., 2., 3., 4.);
574 let b = _mm256_setr_pd(5., 3., 7., 2.);
575 let c = _mm256_setr_pd(4., 9., 1., 7.);
576 let r = _mm256_setr_pd(1., 15., 20., 15.);
577 assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r);
578 }
579
580 #[simd_test(enable = "fma")]
581 unsafe fn test_mm_fmaddsub_ps() {
582 let a = _mm_setr_ps(1., 2., 3., 4.);
583 let b = _mm_setr_ps(5., 3., 7., 2.);
584 let c = _mm_setr_ps(4., 9., 1., 7.);
585 let r = _mm_setr_ps(1., 15., 20., 15.);
586 assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r);
587 }
588
589 #[simd_test(enable = "fma")]
590 unsafe fn test_mm256_fmaddsub_ps() {
591 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
592 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
593 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
594 let r = _mm256_setr_ps(1., 15., 20., 15., 5., -49., 2., -31.);
595 assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r);
596 }
597
598 #[simd_test(enable = "fma")]
599 unsafe fn test_mm_fmsub_pd() {
600 let a = _mm_setr_pd(1., 2.);
601 let b = _mm_setr_pd(5., 3.);
602 let c = _mm_setr_pd(4., 9.);
603 let r = _mm_setr_pd(1., -3.);
604 assert_eq_m128d(_mm_fmsub_pd(a, b, c), r);
605 }
606
607 #[simd_test(enable = "fma")]
608 unsafe fn test_mm256_fmsub_pd() {
609 let a = _mm256_setr_pd(1., 2., 3., 4.);
610 let b = _mm256_setr_pd(5., 3., 7., 2.);
611 let c = _mm256_setr_pd(4., 9., 1., 7.);
612 let r = _mm256_setr_pd(1., -3., 20., 1.);
613 assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r);
614 }
615
616 #[simd_test(enable = "fma")]
617 unsafe fn test_mm_fmsub_ps() {
618 let a = _mm_setr_ps(1., 2., 3., 4.);
619 let b = _mm_setr_ps(5., 3., 7., 2.);
620 let c = _mm_setr_ps(4., 9., 1., 7.);
621 let r = _mm_setr_ps(1., -3., 20., 1.);
622 assert_eq_m128(_mm_fmsub_ps(a, b, c), r);
623 }
624
625 #[simd_test(enable = "fma")]
626 unsafe fn test_mm256_fmsub_ps() {
627 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
628 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
629 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
630 let r = _mm256_setr_ps(1., -3., 20., 1., 5., -71., 2., -25.);
631 assert_eq_m256(_mm256_fmsub_ps(a, b, c), r);
632 }
633
634 #[simd_test(enable = "fma")]
635 unsafe fn test_mm_fmsub_sd() {
636 let a = _mm_setr_pd(1., 2.);
637 let b = _mm_setr_pd(5., 3.);
638 let c = _mm_setr_pd(4., 9.);
639 let r = _mm_setr_pd(1., 2.);
640 assert_eq_m128d(_mm_fmsub_sd(a, b, c), r);
641 }
642
643 #[simd_test(enable = "fma")]
644 unsafe fn test_mm_fmsub_ss() {
645 let a = _mm_setr_ps(1., 2., 3., 4.);
646 let b = _mm_setr_ps(5., 3., 7., 2.);
647 let c = _mm_setr_ps(4., 9., 1., 7.);
648 let r = _mm_setr_ps(1., 2., 3., 4.);
649 assert_eq_m128(_mm_fmsub_ss(a, b, c), r);
650 }
651
652 #[simd_test(enable = "fma")]
653 unsafe fn test_mm_fmsubadd_pd() {
654 let a = _mm_setr_pd(1., 2.);
655 let b = _mm_setr_pd(5., 3.);
656 let c = _mm_setr_pd(4., 9.);
657 let r = _mm_setr_pd(9., -3.);
658 assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r);
659 }
660
661 #[simd_test(enable = "fma")]
662 unsafe fn test_mm256_fmsubadd_pd() {
663 let a = _mm256_setr_pd(1., 2., 3., 4.);
664 let b = _mm256_setr_pd(5., 3., 7., 2.);
665 let c = _mm256_setr_pd(4., 9., 1., 7.);
666 let r = _mm256_setr_pd(9., -3., 22., 1.);
667 assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r);
668 }
669
670 #[simd_test(enable = "fma")]
671 unsafe fn test_mm_fmsubadd_ps() {
672 let a = _mm_setr_ps(1., 2., 3., 4.);
673 let b = _mm_setr_ps(5., 3., 7., 2.);
674 let c = _mm_setr_ps(4., 9., 1., 7.);
675 let r = _mm_setr_ps(9., -3., 22., 1.);
676 assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r);
677 }
678
679 #[simd_test(enable = "fma")]
680 unsafe fn test_mm256_fmsubadd_ps() {
681 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
682 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
683 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
684 let r = _mm256_setr_ps(9., -3., 22., 1., -5., -71., -2., -25.);
685 assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r);
686 }
687
688 #[simd_test(enable = "fma")]
689 unsafe fn test_mm_fnmadd_pd() {
690 let a = _mm_setr_pd(1., 2.);
691 let b = _mm_setr_pd(5., 3.);
692 let c = _mm_setr_pd(4., 9.);
693 let r = _mm_setr_pd(-1., 3.);
694 assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r);
695 }
696
697 #[simd_test(enable = "fma")]
698 unsafe fn test_mm256_fnmadd_pd() {
699 let a = _mm256_setr_pd(1., 2., 3., 4.);
700 let b = _mm256_setr_pd(5., 3., 7., 2.);
701 let c = _mm256_setr_pd(4., 9., 1., 7.);
702 let r = _mm256_setr_pd(-1., 3., -20., -1.);
703 assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r);
704 }
705
706 #[simd_test(enable = "fma")]
707 unsafe fn test_mm_fnmadd_ps() {
708 let a = _mm_setr_ps(1., 2., 3., 4.);
709 let b = _mm_setr_ps(5., 3., 7., 2.);
710 let c = _mm_setr_ps(4., 9., 1., 7.);
711 let r = _mm_setr_ps(-1., 3., -20., -1.);
712 assert_eq_m128(_mm_fnmadd_ps(a, b, c), r);
713 }
714
715 #[simd_test(enable = "fma")]
716 unsafe fn test_mm256_fnmadd_ps() {
717 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
718 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
719 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
720 let r = _mm256_setr_ps(-1., 3., -20., -1., -5., 71., -2., 25.);
721 assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r);
722 }
723
724 #[simd_test(enable = "fma")]
725 unsafe fn test_mm_fnmadd_sd() {
726 let a = _mm_setr_pd(1., 2.);
727 let b = _mm_setr_pd(5., 3.);
728 let c = _mm_setr_pd(4., 9.);
729 let r = _mm_setr_pd(-1., 2.);
730 assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r);
731 }
732
733 #[simd_test(enable = "fma")]
734 unsafe fn test_mm_fnmadd_ss() {
735 let a = _mm_setr_ps(1., 2., 3., 4.);
736 let b = _mm_setr_ps(5., 3., 7., 2.);
737 let c = _mm_setr_ps(4., 9., 1., 7.);
738 let r = _mm_setr_ps(-1., 2., 3., 4.);
739 assert_eq_m128(_mm_fnmadd_ss(a, b, c), r);
740 }
741
742 #[simd_test(enable = "fma")]
743 unsafe fn test_mm_fnmsub_pd() {
744 let a = _mm_setr_pd(1., 2.);
745 let b = _mm_setr_pd(5., 3.);
746 let c = _mm_setr_pd(4., 9.);
747 let r = _mm_setr_pd(-9., -15.);
748 assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r);
749 }
750
751 #[simd_test(enable = "fma")]
752 unsafe fn test_mm256_fnmsub_pd() {
753 let a = _mm256_setr_pd(1., 2., 3., 4.);
754 let b = _mm256_setr_pd(5., 3., 7., 2.);
755 let c = _mm256_setr_pd(4., 9., 1., 7.);
756 let r = _mm256_setr_pd(-9., -15., -22., -15.);
757 assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r);
758 }
759
760 #[simd_test(enable = "fma")]
761 unsafe fn test_mm_fnmsub_ps() {
762 let a = _mm_setr_ps(1., 2., 3., 4.);
763 let b = _mm_setr_ps(5., 3., 7., 2.);
764 let c = _mm_setr_ps(4., 9., 1., 7.);
765 let r = _mm_setr_ps(-9., -15., -22., -15.);
766 assert_eq_m128(_mm_fnmsub_ps(a, b, c), r);
767 }
768
769 #[simd_test(enable = "fma")]
770 unsafe fn test_mm256_fnmsub_ps() {
771 let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
772 let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
773 let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
774 let r = _mm256_setr_ps(-9., -15., -22., -15., 5., 49., 2., 31.);
775 assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r);
776 }
777
778 #[simd_test(enable = "fma")]
779 unsafe fn test_mm_fnmsub_sd() {
780 let a = _mm_setr_pd(1., 2.);
781 let b = _mm_setr_pd(5., 3.);
782 let c = _mm_setr_pd(4., 9.);
783 let r = _mm_setr_pd(-9., 2.);
784 assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r);
785 }
786
787 #[simd_test(enable = "fma")]
788 unsafe fn test_mm_fnmsub_ss() {
789 let a = _mm_setr_ps(1., 2., 3., 4.);
790 let b = _mm_setr_ps(5., 3., 7., 2.);
791 let c = _mm_setr_ps(4., 9., 1., 7.);
792 let r = _mm_setr_ps(-9., 2., 3., 4.);
793 assert_eq_m128(_mm_fnmsub_ss(a, b, c), r);
794 }
795}
796