1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply packed unsigned 52-bit integers in each 64-bit element of
8/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
9/// unsigned integer from the intermediate result with the
10/// corresponding unsigned 64-bit integer in `a`, and store the
11/// results in `dst`.
12///
13/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64)
14#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19 unsafe { vpmadd52huq_512(z:a, x:b, y:c) }
20}
21
22/// Multiply packed unsigned 52-bit integers in each 64-bit element of
23/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
24/// unsigned integer from the intermediate result with the
25/// corresponding unsigned 64-bit integer in `a`, and store the
26/// results in `dst` using writemask `k` (elements are copied
27/// from `k` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64)
30#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_512(a, b, c), no:a) }
36}
37
38/// Multiply packed unsigned 52-bit integers in each 64-bit element of
39/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
40/// unsigned integer from the intermediate result with the
41/// corresponding unsigned 64-bit integer in `a`, and store the
42/// results in `dst` using writemask `k` (elements are zeroed
43/// out when the corresponding mask bit is not set).
44///
45/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64)
46#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_512(a, b, c), no:_mm512_setzero_si512()) }
52}
53
54/// Multiply packed unsigned 52-bit integers in each 64-bit element of
55/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
56/// unsigned integer from the intermediate result with the
57/// corresponding unsigned 64-bit integer in `a`, and store the
58/// results in `dst`.
59///
60/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64)
61#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66 unsafe { vpmadd52luq_512(z:a, x:b, y:c) }
67}
68
69/// Multiply packed unsigned 52-bit integers in each 64-bit element of
70/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
71/// unsigned integer from the intermediate result with the
72/// corresponding unsigned 64-bit integer in `a`, and store the
73/// results in `dst` using writemask `k` (elements are copied
74/// from `k` when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64)
77#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_512(a, b, c), no:a) }
83}
84
85/// Multiply packed unsigned 52-bit integers in each 64-bit element of
86/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
87/// unsigned integer from the intermediate result with the
88/// corresponding unsigned 64-bit integer in `a`, and store the
89/// results in `dst` using writemask `k` (elements are zeroed
90/// out when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64)
93#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_512(a, b, c), no:_mm512_setzero_si512()) }
99}
100
101/// Multiply packed unsigned 52-bit integers in each 64-bit element of
102/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
103/// unsigned integer from the intermediate result with the
104/// corresponding unsigned 64-bit integer in `a`, and store the
105/// results in `dst`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
108#[inline]
109#[target_feature(enable = "avxifma")]
110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
111#[cfg_attr(test, assert_instr(vpmadd52huq))]
112pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
113 unsafe { vpmadd52huq_256(z:a, x:b, y:c) }
114}
115
116/// Multiply packed unsigned 52-bit integers in each 64-bit element of
117/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
118/// unsigned integer from the intermediate result with the
119/// corresponding unsigned 64-bit integer in `a`, and store the
120/// results in `dst`.
121///
122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64)
123#[inline]
124#[target_feature(enable = "avx512ifma,avx512vl")]
125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
126#[cfg_attr(test, assert_instr(vpmadd52huq))]
127pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
128 unsafe { vpmadd52huq_256(z:a, x:b, y:c) }
129}
130
131/// Multiply packed unsigned 52-bit integers in each 64-bit element of
132/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
133/// unsigned integer from the intermediate result with the
134/// corresponding unsigned 64-bit integer in `a`, and store the
135/// results in `dst` using writemask `k` (elements are copied
136/// from `k` when the corresponding mask bit is not set).
137///
138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64)
139#[inline]
140#[target_feature(enable = "avx512ifma,avx512vl")]
141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
142#[cfg_attr(test, assert_instr(vpmadd52huq))]
143pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
144 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_256(a, b, c), no:a) }
145}
146
147/// Multiply packed unsigned 52-bit integers in each 64-bit element of
148/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
149/// unsigned integer from the intermediate result with the
150/// corresponding unsigned 64-bit integer in `a`, and store the
151/// results in `dst` using writemask `k` (elements are zeroed
152/// out when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64)
155#[inline]
156#[target_feature(enable = "avx512ifma,avx512vl")]
157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
158#[cfg_attr(test, assert_instr(vpmadd52huq))]
159pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
160 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_256(a, b, c), no:_mm256_setzero_si256()) }
161}
162
163/// Multiply packed unsigned 52-bit integers in each 64-bit element of
164/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
165/// unsigned integer from the intermediate result with the
166/// corresponding unsigned 64-bit integer in `a`, and store the
167/// results in `dst`.
168///
169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
170#[inline]
171#[target_feature(enable = "avxifma")]
172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
173#[cfg_attr(test, assert_instr(vpmadd52luq))]
174pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
175 unsafe { vpmadd52luq_256(z:a, x:b, y:c) }
176}
177
178/// Multiply packed unsigned 52-bit integers in each 64-bit element of
179/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
180/// unsigned integer from the intermediate result with the
181/// corresponding unsigned 64-bit integer in `a`, and store the
182/// results in `dst`.
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64)
185#[inline]
186#[target_feature(enable = "avx512ifma,avx512vl")]
187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
188#[cfg_attr(test, assert_instr(vpmadd52luq))]
189pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
190 unsafe { vpmadd52luq_256(z:a, x:b, y:c) }
191}
192
193/// Multiply packed unsigned 52-bit integers in each 64-bit element of
194/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
195/// unsigned integer from the intermediate result with the
196/// corresponding unsigned 64-bit integer in `a`, and store the
197/// results in `dst` using writemask `k` (elements are copied
198/// from `k` when the corresponding mask bit is not set).
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64)
201#[inline]
202#[target_feature(enable = "avx512ifma,avx512vl")]
203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
204#[cfg_attr(test, assert_instr(vpmadd52luq))]
205pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
206 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_256(a, b, c), no:a) }
207}
208
209/// Multiply packed unsigned 52-bit integers in each 64-bit element of
210/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
211/// unsigned integer from the intermediate result with the
212/// corresponding unsigned 64-bit integer in `a`, and store the
213/// results in `dst` using writemask `k` (elements are zeroed
214/// out when the corresponding mask bit is not set).
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64)
217#[inline]
218#[target_feature(enable = "avx512ifma,avx512vl")]
219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
220#[cfg_attr(test, assert_instr(vpmadd52luq))]
221pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
222 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_256(a, b, c), no:_mm256_setzero_si256()) }
223}
224
225/// Multiply packed unsigned 52-bit integers in each 64-bit element of
226/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
227/// unsigned integer from the intermediate result with the
228/// corresponding unsigned 64-bit integer in `a`, and store the
229/// results in `dst`.
230///
231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
232#[inline]
233#[target_feature(enable = "avxifma")]
234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
235#[cfg_attr(test, assert_instr(vpmadd52huq))]
236pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
237 unsafe { vpmadd52huq_128(z:a, x:b, y:c) }
238}
239
240/// Multiply packed unsigned 52-bit integers in each 64-bit element of
241/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
242/// unsigned integer from the intermediate result with the
243/// corresponding unsigned 64-bit integer in `a`, and store the
244/// results in `dst`.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64)
247#[inline]
248#[target_feature(enable = "avx512ifma,avx512vl")]
249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
250#[cfg_attr(test, assert_instr(vpmadd52huq))]
251pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
252 unsafe { vpmadd52huq_128(z:a, x:b, y:c) }
253}
254
255/// Multiply packed unsigned 52-bit integers in each 64-bit element of
256/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
257/// unsigned integer from the intermediate result with the
258/// corresponding unsigned 64-bit integer in `a`, and store the
259/// results in `dst` using writemask `k` (elements are copied
260/// from `k` when the corresponding mask bit is not set).
261///
262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64)
263#[inline]
264#[target_feature(enable = "avx512ifma,avx512vl")]
265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
266#[cfg_attr(test, assert_instr(vpmadd52huq))]
267pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
268 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_128(a, b, c), no:a) }
269}
270
271/// Multiply packed unsigned 52-bit integers in each 64-bit element of
272/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
273/// unsigned integer from the intermediate result with the
274/// corresponding unsigned 64-bit integer in `a`, and store the
275/// results in `dst` using writemask `k` (elements are zeroed
276/// out when the corresponding mask bit is not set).
277///
278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64)
279#[inline]
280#[target_feature(enable = "avx512ifma,avx512vl")]
281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
282#[cfg_attr(test, assert_instr(vpmadd52huq))]
283pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
284 unsafe { simd_select_bitmask(m:k, yes:vpmadd52huq_128(a, b, c), no:_mm_setzero_si128()) }
285}
286
287/// Multiply packed unsigned 52-bit integers in each 64-bit element of
288/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
289/// unsigned integer from the intermediate result with the
290/// corresponding unsigned 64-bit integer in `a`, and store the
291/// results in `dst`.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
294#[inline]
295#[target_feature(enable = "avxifma")]
296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
297#[cfg_attr(test, assert_instr(vpmadd52luq))]
298pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
299 unsafe { vpmadd52luq_128(z:a, x:b, y:c) }
300}
301
302/// Multiply packed unsigned 52-bit integers in each 64-bit element of
303/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
304/// unsigned integer from the intermediate result with the
305/// corresponding unsigned 64-bit integer in `a`, and store the
306/// results in `dst`.
307///
308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64)
309#[inline]
310#[target_feature(enable = "avx512ifma,avx512vl")]
311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
312#[cfg_attr(test, assert_instr(vpmadd52luq))]
313pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
314 unsafe { vpmadd52luq_128(z:a, x:b, y:c) }
315}
316
317/// Multiply packed unsigned 52-bit integers in each 64-bit element of
318/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
319/// unsigned integer from the intermediate result with the
320/// corresponding unsigned 64-bit integer in `a`, and store the
321/// results in `dst` using writemask `k` (elements are copied
322/// from `k` when the corresponding mask bit is not set).
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64)
325#[inline]
326#[target_feature(enable = "avx512ifma,avx512vl")]
327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
328#[cfg_attr(test, assert_instr(vpmadd52luq))]
329pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
330 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_128(a, b, c), no:a) }
331}
332
333/// Multiply packed unsigned 52-bit integers in each 64-bit element of
334/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
335/// unsigned integer from the intermediate result with the
336/// corresponding unsigned 64-bit integer in `a`, and store the
337/// results in `dst` using writemask `k` (elements are zeroed
338/// out when the corresponding mask bit is not set).
339///
340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64)
341#[inline]
342#[target_feature(enable = "avx512ifma,avx512vl")]
343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
344#[cfg_attr(test, assert_instr(vpmadd52luq))]
345pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
346 unsafe { simd_select_bitmask(m:k, yes:vpmadd52luq_128(a, b, c), no:_mm_setzero_si128()) }
347}
348
349#[allow(improper_ctypes)]
350unsafe extern "C" {
351 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
352 unsafefn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
353 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
354 unsafefn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
355 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
356 unsafefn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
357 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
358 unsafefn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
359 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
360 unsafefn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
361 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
362 unsafefn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
363}
364
365#[cfg(test)]
366mod tests {
367
368 use stdarch_test::simd_test;
369
370 use crate::core_arch::x86::*;
371
372 const K: __mmask8 = 0b01101101;
373
374 #[simd_test(enable = "avx512ifma")]
375 unsafe fn test_mm512_madd52hi_epu64() {
376 let a = _mm512_set1_epi64(10 << 40);
377 let b = _mm512_set1_epi64((11 << 40) + 4);
378 let c = _mm512_set1_epi64((12 << 40) + 3);
379
380 let actual = _mm512_madd52hi_epu64(a, b, c);
381
382 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
383 let expected = _mm512_set1_epi64(11030549757952);
384
385 assert_eq_m512i(expected, actual);
386 }
387
388 #[simd_test(enable = "avx512ifma")]
389 unsafe fn test_mm512_mask_madd52hi_epu64() {
390 let a = _mm512_set1_epi64(10 << 40);
391 let b = _mm512_set1_epi64((11 << 40) + 4);
392 let c = _mm512_set1_epi64((12 << 40) + 3);
393
394 let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
395
396 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
397 let mut expected = _mm512_set1_epi64(11030549757952);
398 expected = _mm512_mask_blend_epi64(K, a, expected);
399
400 assert_eq_m512i(expected, actual);
401 }
402
403 #[simd_test(enable = "avx512ifma")]
404 unsafe fn test_mm512_maskz_madd52hi_epu64() {
405 let a = _mm512_set1_epi64(10 << 40);
406 let b = _mm512_set1_epi64((11 << 40) + 4);
407 let c = _mm512_set1_epi64((12 << 40) + 3);
408
409 let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
410
411 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
412 let mut expected = _mm512_set1_epi64(11030549757952);
413 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
414
415 assert_eq_m512i(expected, actual);
416 }
417
418 #[simd_test(enable = "avx512ifma")]
419 unsafe fn test_mm512_madd52lo_epu64() {
420 let a = _mm512_set1_epi64(10 << 40);
421 let b = _mm512_set1_epi64((11 << 40) + 4);
422 let c = _mm512_set1_epi64((12 << 40) + 3);
423
424 let actual = _mm512_madd52lo_epu64(a, b, c);
425
426 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
427 let expected = _mm512_set1_epi64(100055558127628);
428
429 assert_eq_m512i(expected, actual);
430 }
431
432 #[simd_test(enable = "avx512ifma")]
433 unsafe fn test_mm512_mask_madd52lo_epu64() {
434 let a = _mm512_set1_epi64(10 << 40);
435 let b = _mm512_set1_epi64((11 << 40) + 4);
436 let c = _mm512_set1_epi64((12 << 40) + 3);
437
438 let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
439
440 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
441 let mut expected = _mm512_set1_epi64(100055558127628);
442 expected = _mm512_mask_blend_epi64(K, a, expected);
443
444 assert_eq_m512i(expected, actual);
445 }
446
447 #[simd_test(enable = "avx512ifma")]
448 unsafe fn test_mm512_maskz_madd52lo_epu64() {
449 let a = _mm512_set1_epi64(10 << 40);
450 let b = _mm512_set1_epi64((11 << 40) + 4);
451 let c = _mm512_set1_epi64((12 << 40) + 3);
452
453 let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
454
455 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
456 let mut expected = _mm512_set1_epi64(100055558127628);
457 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
458
459 assert_eq_m512i(expected, actual);
460 }
461
462 #[simd_test(enable = "avxifma")]
463 unsafe fn test_mm256_madd52hi_avx_epu64() {
464 let a = _mm256_set1_epi64x(10 << 40);
465 let b = _mm256_set1_epi64x((11 << 40) + 4);
466 let c = _mm256_set1_epi64x((12 << 40) + 3);
467
468 let actual = _mm256_madd52hi_avx_epu64(a, b, c);
469
470 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
471 let expected = _mm256_set1_epi64x(11030549757952);
472
473 assert_eq_m256i(expected, actual);
474 }
475
476 #[simd_test(enable = "avx512ifma,avx512vl")]
477 unsafe fn test_mm256_madd52hi_epu64() {
478 let a = _mm256_set1_epi64x(10 << 40);
479 let b = _mm256_set1_epi64x((11 << 40) + 4);
480 let c = _mm256_set1_epi64x((12 << 40) + 3);
481
482 let actual = _mm256_madd52hi_epu64(a, b, c);
483
484 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
485 let expected = _mm256_set1_epi64x(11030549757952);
486
487 assert_eq_m256i(expected, actual);
488 }
489
490 #[simd_test(enable = "avx512ifma,avx512vl")]
491 unsafe fn test_mm256_mask_madd52hi_epu64() {
492 let a = _mm256_set1_epi64x(10 << 40);
493 let b = _mm256_set1_epi64x((11 << 40) + 4);
494 let c = _mm256_set1_epi64x((12 << 40) + 3);
495
496 let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
497
498 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
499 let mut expected = _mm256_set1_epi64x(11030549757952);
500 expected = _mm256_mask_blend_epi64(K, a, expected);
501
502 assert_eq_m256i(expected, actual);
503 }
504
505 #[simd_test(enable = "avx512ifma,avx512vl")]
506 unsafe fn test_mm256_maskz_madd52hi_epu64() {
507 let a = _mm256_set1_epi64x(10 << 40);
508 let b = _mm256_set1_epi64x((11 << 40) + 4);
509 let c = _mm256_set1_epi64x((12 << 40) + 3);
510
511 let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
512
513 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
514 let mut expected = _mm256_set1_epi64x(11030549757952);
515 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
516
517 assert_eq_m256i(expected, actual);
518 }
519
520 #[simd_test(enable = "avxifma")]
521 unsafe fn test_mm256_madd52lo_avx_epu64() {
522 let a = _mm256_set1_epi64x(10 << 40);
523 let b = _mm256_set1_epi64x((11 << 40) + 4);
524 let c = _mm256_set1_epi64x((12 << 40) + 3);
525
526 let actual = _mm256_madd52lo_avx_epu64(a, b, c);
527
528 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
529 let expected = _mm256_set1_epi64x(100055558127628);
530
531 assert_eq_m256i(expected, actual);
532 }
533
534 #[simd_test(enable = "avx512ifma,avx512vl")]
535 unsafe fn test_mm256_madd52lo_epu64() {
536 let a = _mm256_set1_epi64x(10 << 40);
537 let b = _mm256_set1_epi64x((11 << 40) + 4);
538 let c = _mm256_set1_epi64x((12 << 40) + 3);
539
540 let actual = _mm256_madd52lo_epu64(a, b, c);
541
542 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
543 let expected = _mm256_set1_epi64x(100055558127628);
544
545 assert_eq_m256i(expected, actual);
546 }
547
548 #[simd_test(enable = "avx512ifma,avx512vl")]
549 unsafe fn test_mm256_mask_madd52lo_epu64() {
550 let a = _mm256_set1_epi64x(10 << 40);
551 let b = _mm256_set1_epi64x((11 << 40) + 4);
552 let c = _mm256_set1_epi64x((12 << 40) + 3);
553
554 let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
555
556 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
557 let mut expected = _mm256_set1_epi64x(100055558127628);
558 expected = _mm256_mask_blend_epi64(K, a, expected);
559
560 assert_eq_m256i(expected, actual);
561 }
562
563 #[simd_test(enable = "avx512ifma,avx512vl")]
564 unsafe fn test_mm256_maskz_madd52lo_epu64() {
565 let a = _mm256_set1_epi64x(10 << 40);
566 let b = _mm256_set1_epi64x((11 << 40) + 4);
567 let c = _mm256_set1_epi64x((12 << 40) + 3);
568
569 let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
570
571 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
572 let mut expected = _mm256_set1_epi64x(100055558127628);
573 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
574
575 assert_eq_m256i(expected, actual);
576 }
577
578 #[simd_test(enable = "avxifma")]
579 unsafe fn test_mm_madd52hi_avx_epu64() {
580 let a = _mm_set1_epi64x(10 << 40);
581 let b = _mm_set1_epi64x((11 << 40) + 4);
582 let c = _mm_set1_epi64x((12 << 40) + 3);
583
584 let actual = _mm_madd52hi_avx_epu64(a, b, c);
585
586 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
587 let expected = _mm_set1_epi64x(11030549757952);
588
589 assert_eq_m128i(expected, actual);
590 }
591
592 #[simd_test(enable = "avx512ifma,avx512vl")]
593 unsafe fn test_mm_madd52hi_epu64() {
594 let a = _mm_set1_epi64x(10 << 40);
595 let b = _mm_set1_epi64x((11 << 40) + 4);
596 let c = _mm_set1_epi64x((12 << 40) + 3);
597
598 let actual = _mm_madd52hi_epu64(a, b, c);
599
600 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
601 let expected = _mm_set1_epi64x(11030549757952);
602
603 assert_eq_m128i(expected, actual);
604 }
605
606 #[simd_test(enable = "avx512ifma,avx512vl")]
607 unsafe fn test_mm_mask_madd52hi_epu64() {
608 let a = _mm_set1_epi64x(10 << 40);
609 let b = _mm_set1_epi64x((11 << 40) + 4);
610 let c = _mm_set1_epi64x((12 << 40) + 3);
611
612 let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
613
614 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
615 let mut expected = _mm_set1_epi64x(11030549757952);
616 expected = _mm_mask_blend_epi64(K, a, expected);
617
618 assert_eq_m128i(expected, actual);
619 }
620
621 #[simd_test(enable = "avx512ifma,avx512vl")]
622 unsafe fn test_mm_maskz_madd52hi_epu64() {
623 let a = _mm_set1_epi64x(10 << 40);
624 let b = _mm_set1_epi64x((11 << 40) + 4);
625 let c = _mm_set1_epi64x((12 << 40) + 3);
626
627 let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
628
629 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
630 let mut expected = _mm_set1_epi64x(11030549757952);
631 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
632
633 assert_eq_m128i(expected, actual);
634 }
635
636 #[simd_test(enable = "avxifma")]
637 unsafe fn test_mm_madd52lo_avx_epu64() {
638 let a = _mm_set1_epi64x(10 << 40);
639 let b = _mm_set1_epi64x((11 << 40) + 4);
640 let c = _mm_set1_epi64x((12 << 40) + 3);
641
642 let actual = _mm_madd52lo_avx_epu64(a, b, c);
643
644 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
645 let expected = _mm_set1_epi64x(100055558127628);
646
647 assert_eq_m128i(expected, actual);
648 }
649
650 #[simd_test(enable = "avx512ifma,avx512vl")]
651 unsafe fn test_mm_madd52lo_epu64() {
652 let a = _mm_set1_epi64x(10 << 40);
653 let b = _mm_set1_epi64x((11 << 40) + 4);
654 let c = _mm_set1_epi64x((12 << 40) + 3);
655
656 let actual = _mm_madd52lo_epu64(a, b, c);
657
658 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
659 let expected = _mm_set1_epi64x(100055558127628);
660
661 assert_eq_m128i(expected, actual);
662 }
663
664 #[simd_test(enable = "avx512ifma,avx512vl")]
665 unsafe fn test_mm_mask_madd52lo_epu64() {
666 let a = _mm_set1_epi64x(10 << 40);
667 let b = _mm_set1_epi64x((11 << 40) + 4);
668 let c = _mm_set1_epi64x((12 << 40) + 3);
669
670 let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
671
672 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
673 let mut expected = _mm_set1_epi64x(100055558127628);
674 expected = _mm_mask_blend_epi64(K, a, expected);
675
676 assert_eq_m128i(expected, actual);
677 }
678
679 #[simd_test(enable = "avx512ifma,avx512vl")]
680 unsafe fn test_mm_maskz_madd52lo_epu64() {
681 let a = _mm_set1_epi64x(10 << 40);
682 let b = _mm_set1_epi64x((11 << 40) + 4);
683 let c = _mm_set1_epi64x((12 << 40) + 3);
684
685 let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
686
687 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
688 let mut expected = _mm_set1_epi64x(100055558127628);
689 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
690
691 assert_eq_m128i(expected, actual);
692 }
693}
694