1use crate::core_arch::x86::*;
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6/// Multiply packed unsigned 52-bit integers in each 64-bit element of
7/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
8/// unsigned integer from the intermediate result with the
9/// corresponding unsigned 64-bit integer in `a`, and store the
10/// results in `dst`.
11///
12/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488)
13#[inline]
14#[target_feature(enable = "avx512ifma")]
15#[cfg_attr(test, assert_instr(vpmadd52huq))]
16pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
17 vpmadd52huq_512(z:a, x:b, y:c)
18}
19
20/// Multiply packed unsigned 52-bit integers in each 64-bit element of
21/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
22/// unsigned integer from the intermediate result with the
23/// corresponding unsigned 64-bit integer in `a`, and store the
24/// results in `dst`.
25///
26/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52)
27#[inline]
28#[target_feature(enable = "avx512ifma")]
29#[cfg_attr(test, assert_instr(vpmadd52luq))]
30pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
31 vpmadd52luq_512(z:a, x:b, y:c)
32}
33
34/// Multiply packed unsigned 52-bit integers in each 64-bit element of
35/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
36/// unsigned integer from the intermediate result with the
37/// corresponding unsigned 64-bit integer in `a`, and store the
38/// results in `dst`.
39///
40/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
41#[inline]
42#[target_feature(enable = "avx512ifma,avx512vl")]
43#[cfg_attr(test, assert_instr(vpmadd52huq))]
44pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
45 vpmadd52huq_256(z:a, x:b, y:c)
46}
47
48/// Multiply packed unsigned 52-bit integers in each 64-bit element of
49/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
50/// unsigned integer from the intermediate result with the
51/// corresponding unsigned 64-bit integer in `a`, and store the
52/// results in `dst`.
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
55#[inline]
56#[target_feature(enable = "avx512ifma,avx512vl")]
57#[cfg_attr(test, assert_instr(vpmadd52luq))]
58pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
59 vpmadd52luq_256(z:a, x:b, y:c)
60}
61
62/// Multiply packed unsigned 52-bit integers in each 64-bit element of
63/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
64/// unsigned integer from the intermediate result with the
65/// corresponding unsigned 64-bit integer in `a`, and store the
66/// results in `dst`.
67///
68/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
69#[inline]
70#[target_feature(enable = "avx512ifma,avx512vl")]
71#[cfg_attr(test, assert_instr(vpmadd52huq))]
72pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
73 vpmadd52huq_128(z:a, x:b, y:c)
74}
75
76/// Multiply packed unsigned 52-bit integers in each 64-bit element of
77/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
78/// unsigned integer from the intermediate result with the
79/// corresponding unsigned 64-bit integer in `a`, and store the
80/// results in `dst`.
81///
82/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
83#[inline]
84#[target_feature(enable = "avx512ifma,avx512vl")]
85#[cfg_attr(test, assert_instr(vpmadd52luq))]
86pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
87 vpmadd52luq_128(z:a, x:b, y:c)
88}
89
90#[allow(improper_ctypes)]
91extern "C" {
92 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
93 fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
94 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
95 fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
96 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
97 fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
98 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
99 fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
100 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
101 fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
102 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
103 fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
104}
105
106#[cfg(test)]
107mod tests {
108
109 use stdarch_test::simd_test;
110
111 use crate::core_arch::x86::*;
112
113 #[simd_test(enable = "avx512ifma")]
114 unsafe fn test_mm512_madd52hi_epu64() {
115 let mut a = _mm512_set1_epi64(10 << 40);
116 let b = _mm512_set1_epi64((11 << 40) + 4);
117 let c = _mm512_set1_epi64((12 << 40) + 3);
118
119 a = _mm512_madd52hi_epu64(a, b, c);
120
121 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
122 let expected = _mm512_set1_epi64(11030549757952);
123
124 assert_eq_m512i(a, expected);
125 }
126
127 #[simd_test(enable = "avx512ifma")]
128 unsafe fn test_mm512_madd52lo_epu64() {
129 let mut a = _mm512_set1_epi64(10 << 40);
130 let b = _mm512_set1_epi64((11 << 40) + 4);
131 let c = _mm512_set1_epi64((12 << 40) + 3);
132
133 a = _mm512_madd52lo_epu64(a, b, c);
134
135 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
136 let expected = _mm512_set1_epi64(100055558127628);
137
138 assert_eq_m512i(a, expected);
139 }
140
141 #[simd_test(enable = "avx512ifma,avx512vl")]
142 unsafe fn test_mm256_madd52hi_epu64() {
143 let mut a = _mm256_set1_epi64x(10 << 40);
144 let b = _mm256_set1_epi64x((11 << 40) + 4);
145 let c = _mm256_set1_epi64x((12 << 40) + 3);
146
147 a = _mm256_madd52hi_epu64(a, b, c);
148
149 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
150 let expected = _mm256_set1_epi64x(11030549757952);
151
152 assert_eq_m256i(a, expected);
153 }
154
155 #[simd_test(enable = "avx512ifma,avx512vl")]
156 unsafe fn test_mm256_madd52lo_epu64() {
157 let mut a = _mm256_set1_epi64x(10 << 40);
158 let b = _mm256_set1_epi64x((11 << 40) + 4);
159 let c = _mm256_set1_epi64x((12 << 40) + 3);
160
161 a = _mm256_madd52lo_epu64(a, b, c);
162
163 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
164 let expected = _mm256_set1_epi64x(100055558127628);
165
166 assert_eq_m256i(a, expected);
167 }
168
169 #[simd_test(enable = "avx512ifma,avx512vl")]
170 unsafe fn test_mm_madd52hi_epu64() {
171 let mut a = _mm_set1_epi64x(10 << 40);
172 let b = _mm_set1_epi64x((11 << 40) + 4);
173 let c = _mm_set1_epi64x((12 << 40) + 3);
174
175 a = _mm_madd52hi_epu64(a, b, c);
176
177 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
178 let expected = _mm_set1_epi64x(11030549757952);
179
180 assert_eq_m128i(a, expected);
181 }
182
183 #[simd_test(enable = "avx512ifma,avx512vl")]
184 unsafe fn test_mm_madd52lo_epu64() {
185 let mut a = _mm_set1_epi64x(10 << 40);
186 let b = _mm_set1_epi64x((11 << 40) + 4);
187 let c = _mm_set1_epi64x((12 << 40) + 3);
188
189 a = _mm_madd52hi_epu64(a, b, c);
190
191 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
192 let expected = _mm_set1_epi64x(11030549757952);
193
194 assert_eq_m128i(a, expected);
195 }
196}
197