1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply packed unsigned 52-bit integers in each 64-bit element of
8/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
9/// unsigned integer from the intermediate result with the
10/// corresponding unsigned 64-bit integer in `a`, and store the
11/// results in `dst`.
12///
13/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64)
14#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19 unsafe { vpmadd52huq_512(z:a, x:b, y:c) }
20}
21
22/// Multiply packed unsigned 52-bit integers in each 64-bit element of
23/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
24/// unsigned integer from the intermediate result with the
25/// corresponding unsigned 64-bit integer in `a`, and store the
26/// results in `dst` using writemask `k` (elements are copied
27/// from `k` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64)
30#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_512(a, b, c), no:a) }
36}
37
38/// Multiply packed unsigned 52-bit integers in each 64-bit element of
39/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
40/// unsigned integer from the intermediate result with the
41/// corresponding unsigned 64-bit integer in `a`, and store the
42/// results in `dst` using writemask `k` (elements are zeroed
43/// out when the corresponding mask bit is not set).
44///
45/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64)
46#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_512(a, b, c), no:_mm512_setzero_si512()) }
52}
53
54/// Multiply packed unsigned 52-bit integers in each 64-bit element of
55/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
56/// unsigned integer from the intermediate result with the
57/// corresponding unsigned 64-bit integer in `a`, and store the
58/// results in `dst`.
59///
60/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64)
61#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66 unsafe { vpmadd52luq_512(z:a, x:b, y:c) }
67}
68
69/// Multiply packed unsigned 52-bit integers in each 64-bit element of
70/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
71/// unsigned integer from the intermediate result with the
72/// corresponding unsigned 64-bit integer in `a`, and store the
73/// results in `dst` using writemask `k` (elements are copied
74/// from `k` when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64)
77#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_512(a, b, c), no:a) }
83}
84
85/// Multiply packed unsigned 52-bit integers in each 64-bit element of
86/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
87/// unsigned integer from the intermediate result with the
88/// corresponding unsigned 64-bit integer in `a`, and store the
89/// results in `dst` using writemask `k` (elements are zeroed
90/// out when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64)
93#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_512(a, b, c), no:_mm512_setzero_si512()) }
99}
100
101/// Multiply packed unsigned 52-bit integers in each 64-bit element of
102/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
103/// unsigned integer from the intermediate result with the
104/// corresponding unsigned 64-bit integer in `a`, and store the
105/// results in `dst`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
108#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(
112 all(test, any(target_os = "linux", target_env = "msvc")),
113 assert_instr(vpmadd52huq)
114)]
115pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
116 unsafe { vpmadd52huq_256(z:a, x:b, y:c) }
117}
118
119/// Multiply packed unsigned 52-bit integers in each 64-bit element of
120/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
121/// unsigned integer from the intermediate result with the
122/// corresponding unsigned 64-bit integer in `a`, and store the
123/// results in `dst`.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64)
126#[inline]
127#[target_feature(enable = "avx512ifma,avx512vl")]
128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
129#[cfg_attr(test, assert_instr(vpmadd52huq))]
130pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
131 unsafe { vpmadd52huq_256(z:a, x:b, y:c) }
132}
133
134/// Multiply packed unsigned 52-bit integers in each 64-bit element of
135/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
136/// unsigned integer from the intermediate result with the
137/// corresponding unsigned 64-bit integer in `a`, and store the
138/// results in `dst` using writemask `k` (elements are copied
139/// from `k` when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64)
142#[inline]
143#[target_feature(enable = "avx512ifma,avx512vl")]
144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
145#[cfg_attr(test, assert_instr(vpmadd52huq))]
146pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
147 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_256(a, b, c), no:a) }
148}
149
150/// Multiply packed unsigned 52-bit integers in each 64-bit element of
151/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
152/// unsigned integer from the intermediate result with the
153/// corresponding unsigned 64-bit integer in `a`, and store the
154/// results in `dst` using writemask `k` (elements are zeroed
155/// out when the corresponding mask bit is not set).
156///
157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64)
158#[inline]
159#[target_feature(enable = "avx512ifma,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpmadd52huq))]
162pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
163 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_256(a, b, c), no:_mm256_setzero_si256()) }
164}
165
166/// Multiply packed unsigned 52-bit integers in each 64-bit element of
167/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
168/// unsigned integer from the intermediate result with the
169/// corresponding unsigned 64-bit integer in `a`, and store the
170/// results in `dst`.
171///
172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
173#[inline]
174#[target_feature(enable = "avxifma")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176#[cfg_attr(
177 all(test, any(target_os = "linux", target_env = "msvc")),
178 assert_instr(vpmadd52luq)
179)]
180pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
181 unsafe { vpmadd52luq_256(z:a, x:b, y:c) }
182}
183
184/// Multiply packed unsigned 52-bit integers in each 64-bit element of
185/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
186/// unsigned integer from the intermediate result with the
187/// corresponding unsigned 64-bit integer in `a`, and store the
188/// results in `dst`.
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64)
191#[inline]
192#[target_feature(enable = "avx512ifma,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpmadd52luq))]
195pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
196 unsafe { vpmadd52luq_256(z:a, x:b, y:c) }
197}
198
199/// Multiply packed unsigned 52-bit integers in each 64-bit element of
200/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
201/// unsigned integer from the intermediate result with the
202/// corresponding unsigned 64-bit integer in `a`, and store the
203/// results in `dst` using writemask `k` (elements are copied
204/// from `k` when the corresponding mask bit is not set).
205///
206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64)
207#[inline]
208#[target_feature(enable = "avx512ifma,avx512vl")]
209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
210#[cfg_attr(test, assert_instr(vpmadd52luq))]
211pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
212 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_256(a, b, c), no:a) }
213}
214
215/// Multiply packed unsigned 52-bit integers in each 64-bit element of
216/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
217/// unsigned integer from the intermediate result with the
218/// corresponding unsigned 64-bit integer in `a`, and store the
219/// results in `dst` using writemask `k` (elements are zeroed
220/// out when the corresponding mask bit is not set).
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64)
223#[inline]
224#[target_feature(enable = "avx512ifma,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpmadd52luq))]
227pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
228 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_256(a, b, c), no:_mm256_setzero_si256()) }
229}
230
231/// Multiply packed unsigned 52-bit integers in each 64-bit element of
232/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
233/// unsigned integer from the intermediate result with the
234/// corresponding unsigned 64-bit integer in `a`, and store the
235/// results in `dst`.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
238#[inline]
239#[target_feature(enable = "avxifma")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(
242 all(test, any(target_os = "linux", target_env = "msvc")),
243 assert_instr(vpmadd52huq)
244)]
245pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
246 unsafe { vpmadd52huq_128(z:a, x:b, y:c) }
247}
248
249/// Multiply packed unsigned 52-bit integers in each 64-bit element of
250/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
251/// unsigned integer from the intermediate result with the
252/// corresponding unsigned 64-bit integer in `a`, and store the
253/// results in `dst`.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64)
256#[inline]
257#[target_feature(enable = "avx512ifma,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpmadd52huq))]
260pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
261 unsafe { vpmadd52huq_128(z:a, x:b, y:c) }
262}
263
264/// Multiply packed unsigned 52-bit integers in each 64-bit element of
265/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
266/// unsigned integer from the intermediate result with the
267/// corresponding unsigned 64-bit integer in `a`, and store the
268/// results in `dst` using writemask `k` (elements are copied
269/// from `k` when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64)
272#[inline]
273#[target_feature(enable = "avx512ifma,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpmadd52huq))]
276pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
277 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_128(a, b, c), no:a) }
278}
279
280/// Multiply packed unsigned 52-bit integers in each 64-bit element of
281/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
282/// unsigned integer from the intermediate result with the
283/// corresponding unsigned 64-bit integer in `a`, and store the
284/// results in `dst` using writemask `k` (elements are zeroed
285/// out when the corresponding mask bit is not set).
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64)
288#[inline]
289#[target_feature(enable = "avx512ifma,avx512vl")]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291#[cfg_attr(test, assert_instr(vpmadd52huq))]
292pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
293 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_128(a, b, c), no:_mm_setzero_si128()) }
294}
295
296/// Multiply packed unsigned 52-bit integers in each 64-bit element of
297/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
298/// unsigned integer from the intermediate result with the
299/// corresponding unsigned 64-bit integer in `a`, and store the
300/// results in `dst`.
301///
302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
303#[inline]
304#[target_feature(enable = "avxifma")]
305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
306#[cfg_attr(
307 all(test, any(target_os = "linux", target_env = "msvc")),
308 assert_instr(vpmadd52luq)
309)]
310pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
311 unsafe { vpmadd52luq_128(z:a, x:b, y:c) }
312}
313
314/// Multiply packed unsigned 52-bit integers in each 64-bit element of
315/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
316/// unsigned integer from the intermediate result with the
317/// corresponding unsigned 64-bit integer in `a`, and store the
318/// results in `dst`.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64)
321#[inline]
322#[target_feature(enable = "avx512ifma,avx512vl")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr(vpmadd52luq))]
325pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
326 unsafe { vpmadd52luq_128(z:a, x:b, y:c) }
327}
328
329/// Multiply packed unsigned 52-bit integers in each 64-bit element of
330/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
331/// unsigned integer from the intermediate result with the
332/// corresponding unsigned 64-bit integer in `a`, and store the
333/// results in `dst` using writemask `k` (elements are copied
334/// from `k` when the corresponding mask bit is not set).
335///
336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64)
337#[inline]
338#[target_feature(enable = "avx512ifma,avx512vl")]
339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
340#[cfg_attr(test, assert_instr(vpmadd52luq))]
341pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
342 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_128(a, b, c), no:a) }
343}
344
345/// Multiply packed unsigned 52-bit integers in each 64-bit element of
346/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
347/// unsigned integer from the intermediate result with the
348/// corresponding unsigned 64-bit integer in `a`, and store the
349/// results in `dst` using writemask `k` (elements are zeroed
350/// out when the corresponding mask bit is not set).
351///
352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64)
353#[inline]
354#[target_feature(enable = "avx512ifma,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpmadd52luq))]
357pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
358 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_128(a, b, c), no:_mm_setzero_si128()) }
359}
360
361#[allow(improper_ctypes)]
362unsafe extern "C" {
363 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
364 unsafefn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
365 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
366 unsafefn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
367 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
368 unsafefn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
369 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
370 unsafefn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
371 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
372 unsafefn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
373 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
374 unsafefn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
375}
376
377#[cfg(test)]
378mod tests {
379
380 use stdarch_test::simd_test;
381
382 use crate::core_arch::x86::*;
383
384 const K: __mmask8 = 0b01101101;
385
386 #[simd_test(enable = "avx512ifma")]
387 unsafe fn test_mm512_madd52hi_epu64() {
388 let a = _mm512_set1_epi64(10 << 40);
389 let b = _mm512_set1_epi64((11 << 40) + 4);
390 let c = _mm512_set1_epi64((12 << 40) + 3);
391
392 let actual = _mm512_madd52hi_epu64(a, b, c);
393
394 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
395 let expected = _mm512_set1_epi64(11030549757952);
396
397 assert_eq_m512i(expected, actual);
398 }
399
400 #[simd_test(enable = "avx512ifma")]
401 unsafe fn test_mm512_mask_madd52hi_epu64() {
402 let a = _mm512_set1_epi64(10 << 40);
403 let b = _mm512_set1_epi64((11 << 40) + 4);
404 let c = _mm512_set1_epi64((12 << 40) + 3);
405
406 let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
407
408 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
409 let mut expected = _mm512_set1_epi64(11030549757952);
410 expected = _mm512_mask_blend_epi64(K, a, expected);
411
412 assert_eq_m512i(expected, actual);
413 }
414
415 #[simd_test(enable = "avx512ifma")]
416 unsafe fn test_mm512_maskz_madd52hi_epu64() {
417 let a = _mm512_set1_epi64(10 << 40);
418 let b = _mm512_set1_epi64((11 << 40) + 4);
419 let c = _mm512_set1_epi64((12 << 40) + 3);
420
421 let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
422
423 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
424 let mut expected = _mm512_set1_epi64(11030549757952);
425 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
426
427 assert_eq_m512i(expected, actual);
428 }
429
430 #[simd_test(enable = "avx512ifma")]
431 unsafe fn test_mm512_madd52lo_epu64() {
432 let a = _mm512_set1_epi64(10 << 40);
433 let b = _mm512_set1_epi64((11 << 40) + 4);
434 let c = _mm512_set1_epi64((12 << 40) + 3);
435
436 let actual = _mm512_madd52lo_epu64(a, b, c);
437
438 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
439 let expected = _mm512_set1_epi64(100055558127628);
440
441 assert_eq_m512i(expected, actual);
442 }
443
444 #[simd_test(enable = "avx512ifma")]
445 unsafe fn test_mm512_mask_madd52lo_epu64() {
446 let a = _mm512_set1_epi64(10 << 40);
447 let b = _mm512_set1_epi64((11 << 40) + 4);
448 let c = _mm512_set1_epi64((12 << 40) + 3);
449
450 let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
451
452 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
453 let mut expected = _mm512_set1_epi64(100055558127628);
454 expected = _mm512_mask_blend_epi64(K, a, expected);
455
456 assert_eq_m512i(expected, actual);
457 }
458
459 #[simd_test(enable = "avx512ifma")]
460 unsafe fn test_mm512_maskz_madd52lo_epu64() {
461 let a = _mm512_set1_epi64(10 << 40);
462 let b = _mm512_set1_epi64((11 << 40) + 4);
463 let c = _mm512_set1_epi64((12 << 40) + 3);
464
465 let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
466
467 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
468 let mut expected = _mm512_set1_epi64(100055558127628);
469 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
470
471 assert_eq_m512i(expected, actual);
472 }
473
474 #[simd_test(enable = "avxifma")]
475 unsafe fn test_mm256_madd52hi_avx_epu64() {
476 let a = _mm256_set1_epi64x(10 << 40);
477 let b = _mm256_set1_epi64x((11 << 40) + 4);
478 let c = _mm256_set1_epi64x((12 << 40) + 3);
479
480 let actual = _mm256_madd52hi_avx_epu64(a, b, c);
481
482 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
483 let expected = _mm256_set1_epi64x(11030549757952);
484
485 assert_eq_m256i(expected, actual);
486 }
487
488 #[simd_test(enable = "avx512ifma,avx512vl")]
489 unsafe fn test_mm256_madd52hi_epu64() {
490 let a = _mm256_set1_epi64x(10 << 40);
491 let b = _mm256_set1_epi64x((11 << 40) + 4);
492 let c = _mm256_set1_epi64x((12 << 40) + 3);
493
494 let actual = _mm256_madd52hi_epu64(a, b, c);
495
496 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
497 let expected = _mm256_set1_epi64x(11030549757952);
498
499 assert_eq_m256i(expected, actual);
500 }
501
502 #[simd_test(enable = "avx512ifma,avx512vl")]
503 unsafe fn test_mm256_mask_madd52hi_epu64() {
504 let a = _mm256_set1_epi64x(10 << 40);
505 let b = _mm256_set1_epi64x((11 << 40) + 4);
506 let c = _mm256_set1_epi64x((12 << 40) + 3);
507
508 let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
509
510 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
511 let mut expected = _mm256_set1_epi64x(11030549757952);
512 expected = _mm256_mask_blend_epi64(K, a, expected);
513
514 assert_eq_m256i(expected, actual);
515 }
516
517 #[simd_test(enable = "avx512ifma,avx512vl")]
518 unsafe fn test_mm256_maskz_madd52hi_epu64() {
519 let a = _mm256_set1_epi64x(10 << 40);
520 let b = _mm256_set1_epi64x((11 << 40) + 4);
521 let c = _mm256_set1_epi64x((12 << 40) + 3);
522
523 let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
524
525 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
526 let mut expected = _mm256_set1_epi64x(11030549757952);
527 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
528
529 assert_eq_m256i(expected, actual);
530 }
531
532 #[simd_test(enable = "avxifma")]
533 unsafe fn test_mm256_madd52lo_avx_epu64() {
534 let a = _mm256_set1_epi64x(10 << 40);
535 let b = _mm256_set1_epi64x((11 << 40) + 4);
536 let c = _mm256_set1_epi64x((12 << 40) + 3);
537
538 let actual = _mm256_madd52lo_avx_epu64(a, b, c);
539
540 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
541 let expected = _mm256_set1_epi64x(100055558127628);
542
543 assert_eq_m256i(expected, actual);
544 }
545
546 #[simd_test(enable = "avx512ifma,avx512vl")]
547 unsafe fn test_mm256_madd52lo_epu64() {
548 let a = _mm256_set1_epi64x(10 << 40);
549 let b = _mm256_set1_epi64x((11 << 40) + 4);
550 let c = _mm256_set1_epi64x((12 << 40) + 3);
551
552 let actual = _mm256_madd52lo_epu64(a, b, c);
553
554 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
555 let expected = _mm256_set1_epi64x(100055558127628);
556
557 assert_eq_m256i(expected, actual);
558 }
559
560 #[simd_test(enable = "avx512ifma,avx512vl")]
561 unsafe fn test_mm256_mask_madd52lo_epu64() {
562 let a = _mm256_set1_epi64x(10 << 40);
563 let b = _mm256_set1_epi64x((11 << 40) + 4);
564 let c = _mm256_set1_epi64x((12 << 40) + 3);
565
566 let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
567
568 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
569 let mut expected = _mm256_set1_epi64x(100055558127628);
570 expected = _mm256_mask_blend_epi64(K, a, expected);
571
572 assert_eq_m256i(expected, actual);
573 }
574
575 #[simd_test(enable = "avx512ifma,avx512vl")]
576 unsafe fn test_mm256_maskz_madd52lo_epu64() {
577 let a = _mm256_set1_epi64x(10 << 40);
578 let b = _mm256_set1_epi64x((11 << 40) + 4);
579 let c = _mm256_set1_epi64x((12 << 40) + 3);
580
581 let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
582
583 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
584 let mut expected = _mm256_set1_epi64x(100055558127628);
585 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
586
587 assert_eq_m256i(expected, actual);
588 }
589
590 #[simd_test(enable = "avxifma")]
591 unsafe fn test_mm_madd52hi_avx_epu64() {
592 let a = _mm_set1_epi64x(10 << 40);
593 let b = _mm_set1_epi64x((11 << 40) + 4);
594 let c = _mm_set1_epi64x((12 << 40) + 3);
595
596 let actual = _mm_madd52hi_avx_epu64(a, b, c);
597
598 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
599 let expected = _mm_set1_epi64x(11030549757952);
600
601 assert_eq_m128i(expected, actual);
602 }
603
604 #[simd_test(enable = "avx512ifma,avx512vl")]
605 unsafe fn test_mm_madd52hi_epu64() {
606 let a = _mm_set1_epi64x(10 << 40);
607 let b = _mm_set1_epi64x((11 << 40) + 4);
608 let c = _mm_set1_epi64x((12 << 40) + 3);
609
610 let actual = _mm_madd52hi_epu64(a, b, c);
611
612 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
613 let expected = _mm_set1_epi64x(11030549757952);
614
615 assert_eq_m128i(expected, actual);
616 }
617
618 #[simd_test(enable = "avx512ifma,avx512vl")]
619 unsafe fn test_mm_mask_madd52hi_epu64() {
620 let a = _mm_set1_epi64x(10 << 40);
621 let b = _mm_set1_epi64x((11 << 40) + 4);
622 let c = _mm_set1_epi64x((12 << 40) + 3);
623
624 let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
625
626 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
627 let mut expected = _mm_set1_epi64x(11030549757952);
628 expected = _mm_mask_blend_epi64(K, a, expected);
629
630 assert_eq_m128i(expected, actual);
631 }
632
633 #[simd_test(enable = "avx512ifma,avx512vl")]
634 unsafe fn test_mm_maskz_madd52hi_epu64() {
635 let a = _mm_set1_epi64x(10 << 40);
636 let b = _mm_set1_epi64x((11 << 40) + 4);
637 let c = _mm_set1_epi64x((12 << 40) + 3);
638
639 let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
640
641 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
642 let mut expected = _mm_set1_epi64x(11030549757952);
643 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
644
645 assert_eq_m128i(expected, actual);
646 }
647
648 #[simd_test(enable = "avxifma")]
649 unsafe fn test_mm_madd52lo_avx_epu64() {
650 let a = _mm_set1_epi64x(10 << 40);
651 let b = _mm_set1_epi64x((11 << 40) + 4);
652 let c = _mm_set1_epi64x((12 << 40) + 3);
653
654 let actual = _mm_madd52lo_avx_epu64(a, b, c);
655
656 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
657 let expected = _mm_set1_epi64x(100055558127628);
658
659 assert_eq_m128i(expected, actual);
660 }
661
662 #[simd_test(enable = "avx512ifma,avx512vl")]
663 unsafe fn test_mm_madd52lo_epu64() {
664 let a = _mm_set1_epi64x(10 << 40);
665 let b = _mm_set1_epi64x((11 << 40) + 4);
666 let c = _mm_set1_epi64x((12 << 40) + 3);
667
668 let actual = _mm_madd52lo_epu64(a, b, c);
669
670 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
671 let expected = _mm_set1_epi64x(100055558127628);
672
673 assert_eq_m128i(expected, actual);
674 }
675
676 #[simd_test(enable = "avx512ifma,avx512vl")]
677 unsafe fn test_mm_mask_madd52lo_epu64() {
678 let a = _mm_set1_epi64x(10 << 40);
679 let b = _mm_set1_epi64x((11 << 40) + 4);
680 let c = _mm_set1_epi64x((12 << 40) + 3);
681
682 let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
683
684 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
685 let mut expected = _mm_set1_epi64x(100055558127628);
686 expected = _mm_mask_blend_epi64(K, a, expected);
687
688 assert_eq_m128i(expected, actual);
689 }
690
691 #[simd_test(enable = "avx512ifma,avx512vl")]
692 unsafe fn test_mm_maskz_madd52lo_epu64() {
693 let a = _mm_set1_epi64x(10 << 40);
694 let b = _mm_set1_epi64x((11 << 40) + 4);
695 let c = _mm_set1_epi64x((12 << 40) + 3);
696
697 let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
698
699 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
700 let mut expected = _mm_set1_epi64x(100055558127628);
701 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
702
703 assert_eq_m128i(expected, actual);
704 }
705}
706