1 | use crate::core_arch::x86::*; |
2 | |
3 | #[cfg (test)] |
4 | use stdarch_test::assert_instr; |
5 | |
6 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
7 | /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit |
8 | /// unsigned integer from the intermediate result with the |
9 | /// corresponding unsigned 64-bit integer in `a`, and store the |
10 | /// results in `dst`. |
11 | /// |
12 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488) |
13 | #[inline ] |
14 | #[target_feature (enable = "avx512ifma" )] |
15 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
16 | #[cfg_attr (test, assert_instr(vpmadd52huq))] |
17 | pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
18 | vpmadd52huq_512(z:a, x:b, y:c) |
19 | } |
20 | |
21 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
22 | /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit |
23 | /// unsigned integer from the intermediate result with the |
24 | /// corresponding unsigned 64-bit integer in `a`, and store the |
25 | /// results in `dst`. |
26 | /// |
27 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52) |
28 | #[inline ] |
29 | #[target_feature (enable = "avx512ifma" )] |
30 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
31 | #[cfg_attr (test, assert_instr(vpmadd52luq))] |
32 | pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { |
33 | vpmadd52luq_512(z:a, x:b, y:c) |
34 | } |
35 | |
36 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
37 | /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit |
38 | /// unsigned integer from the intermediate result with the |
39 | /// corresponding unsigned 64-bit integer in `a`, and store the |
40 | /// results in `dst`. |
41 | /// |
42 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485) |
43 | #[inline ] |
44 | #[target_feature (enable = "avx512ifma,avx512vl" )] |
45 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
46 | #[cfg_attr (test, assert_instr(vpmadd52huq))] |
47 | pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
48 | vpmadd52huq_256(z:a, x:b, y:c) |
49 | } |
50 | |
51 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
52 | /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit |
53 | /// unsigned integer from the intermediate result with the |
54 | /// corresponding unsigned 64-bit integer in `a`, and store the |
55 | /// results in `dst`. |
56 | /// |
57 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494) |
58 | #[inline ] |
59 | #[target_feature (enable = "avx512ifma,avx512vl" )] |
60 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
61 | #[cfg_attr (test, assert_instr(vpmadd52luq))] |
62 | pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { |
63 | vpmadd52luq_256(z:a, x:b, y:c) |
64 | } |
65 | |
66 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
67 | /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit |
68 | /// unsigned integer from the intermediate result with the |
69 | /// corresponding unsigned 64-bit integer in `a`, and store the |
70 | /// results in `dst`. |
71 | /// |
72 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL) |
73 | #[inline ] |
74 | #[target_feature (enable = "avx512ifma,avx512vl" )] |
75 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
76 | #[cfg_attr (test, assert_instr(vpmadd52huq))] |
77 | pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
78 | vpmadd52huq_128(z:a, x:b, y:c) |
79 | } |
80 | |
81 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of |
82 | /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit |
83 | /// unsigned integer from the intermediate result with the |
84 | /// corresponding unsigned 64-bit integer in `a`, and store the |
85 | /// results in `dst`. |
86 | /// |
87 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL) |
88 | #[inline ] |
89 | #[target_feature (enable = "avx512ifma,avx512vl" )] |
90 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
91 | #[cfg_attr (test, assert_instr(vpmadd52luq))] |
92 | pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { |
93 | vpmadd52luq_128(z:a, x:b, y:c) |
94 | } |
95 | |
96 | #[allow (improper_ctypes)] |
97 | extern "C" { |
98 | #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128" ] |
99 | fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; |
100 | #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128" ] |
101 | fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; |
102 | #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256" ] |
103 | fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; |
104 | #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256" ] |
105 | fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; |
106 | #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512" ] |
107 | fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; |
108 | #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512" ] |
109 | fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; |
110 | } |
111 | |
112 | #[cfg (test)] |
113 | mod tests { |
114 | |
115 | use stdarch_test::simd_test; |
116 | |
117 | use crate::core_arch::x86::*; |
118 | |
119 | #[simd_test(enable = "avx512ifma" )] |
120 | unsafe fn test_mm512_madd52hi_epu64() { |
121 | let mut a = _mm512_set1_epi64(10 << 40); |
122 | let b = _mm512_set1_epi64((11 << 40) + 4); |
123 | let c = _mm512_set1_epi64((12 << 40) + 3); |
124 | |
125 | a = _mm512_madd52hi_epu64(a, b, c); |
126 | |
127 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) |
128 | let expected = _mm512_set1_epi64(11030549757952); |
129 | |
130 | assert_eq_m512i(a, expected); |
131 | } |
132 | |
133 | #[simd_test(enable = "avx512ifma" )] |
134 | unsafe fn test_mm512_madd52lo_epu64() { |
135 | let mut a = _mm512_set1_epi64(10 << 40); |
136 | let b = _mm512_set1_epi64((11 << 40) + 4); |
137 | let c = _mm512_set1_epi64((12 << 40) + 3); |
138 | |
139 | a = _mm512_madd52lo_epu64(a, b, c); |
140 | |
141 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) |
142 | let expected = _mm512_set1_epi64(100055558127628); |
143 | |
144 | assert_eq_m512i(a, expected); |
145 | } |
146 | |
147 | #[simd_test(enable = "avx512ifma,avx512vl" )] |
148 | unsafe fn test_mm256_madd52hi_epu64() { |
149 | let mut a = _mm256_set1_epi64x(10 << 40); |
150 | let b = _mm256_set1_epi64x((11 << 40) + 4); |
151 | let c = _mm256_set1_epi64x((12 << 40) + 3); |
152 | |
153 | a = _mm256_madd52hi_epu64(a, b, c); |
154 | |
155 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) |
156 | let expected = _mm256_set1_epi64x(11030549757952); |
157 | |
158 | assert_eq_m256i(a, expected); |
159 | } |
160 | |
161 | #[simd_test(enable = "avx512ifma,avx512vl" )] |
162 | unsafe fn test_mm256_madd52lo_epu64() { |
163 | let mut a = _mm256_set1_epi64x(10 << 40); |
164 | let b = _mm256_set1_epi64x((11 << 40) + 4); |
165 | let c = _mm256_set1_epi64x((12 << 40) + 3); |
166 | |
167 | a = _mm256_madd52lo_epu64(a, b, c); |
168 | |
169 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) |
170 | let expected = _mm256_set1_epi64x(100055558127628); |
171 | |
172 | assert_eq_m256i(a, expected); |
173 | } |
174 | |
175 | #[simd_test(enable = "avx512ifma,avx512vl" )] |
176 | unsafe fn test_mm_madd52hi_epu64() { |
177 | let mut a = _mm_set1_epi64x(10 << 40); |
178 | let b = _mm_set1_epi64x((11 << 40) + 4); |
179 | let c = _mm_set1_epi64x((12 << 40) + 3); |
180 | |
181 | a = _mm_madd52hi_epu64(a, b, c); |
182 | |
183 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) |
184 | let expected = _mm_set1_epi64x(11030549757952); |
185 | |
186 | assert_eq_m128i(a, expected); |
187 | } |
188 | |
189 | #[simd_test(enable = "avx512ifma,avx512vl" )] |
190 | unsafe fn test_mm_madd52lo_epu64() { |
191 | let mut a = _mm_set1_epi64x(10 << 40); |
192 | let b = _mm_set1_epi64x((11 << 40) + 4); |
193 | let c = _mm_set1_epi64x((12 << 40) + 3); |
194 | |
195 | a = _mm_madd52hi_epu64(a, b, c); |
196 | |
197 | // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) |
198 | let expected = _mm_set1_epi64x(11030549757952); |
199 | |
200 | assert_eq_m128i(a, expected); |
201 | } |
202 | } |
203 | |