1use crate::core_arch::x86::*;
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6/// Multiply packed unsigned 52-bit integers in each 64-bit element of
7/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
8/// unsigned integer from the intermediate result with the
9/// corresponding unsigned 64-bit integer in `a`, and store the
10/// results in `dst`.
11///
12/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488)
13#[inline]
14#[target_feature(enable = "avx512ifma")]
15#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16#[cfg_attr(test, assert_instr(vpmadd52huq))]
17pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
18 vpmadd52huq_512(z:a, x:b, y:c)
19}
20
21/// Multiply packed unsigned 52-bit integers in each 64-bit element of
22/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
23/// unsigned integer from the intermediate result with the
24/// corresponding unsigned 64-bit integer in `a`, and store the
25/// results in `dst`.
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52)
28#[inline]
29#[target_feature(enable = "avx512ifma")]
30#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31#[cfg_attr(test, assert_instr(vpmadd52luq))]
32pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
33 vpmadd52luq_512(z:a, x:b, y:c)
34}
35
36/// Multiply packed unsigned 52-bit integers in each 64-bit element of
37/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
38/// unsigned integer from the intermediate result with the
39/// corresponding unsigned 64-bit integer in `a`, and store the
40/// results in `dst`.
41///
42/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
43#[inline]
44#[target_feature(enable = "avx512ifma,avx512vl")]
45#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
46#[cfg_attr(test, assert_instr(vpmadd52huq))]
47pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
48 vpmadd52huq_256(z:a, x:b, y:c)
49}
50
51/// Multiply packed unsigned 52-bit integers in each 64-bit element of
52/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
53/// unsigned integer from the intermediate result with the
54/// corresponding unsigned 64-bit integer in `a`, and store the
55/// results in `dst`.
56///
57/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
58#[inline]
59#[target_feature(enable = "avx512ifma,avx512vl")]
60#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
61#[cfg_attr(test, assert_instr(vpmadd52luq))]
62pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
63 vpmadd52luq_256(z:a, x:b, y:c)
64}
65
66/// Multiply packed unsigned 52-bit integers in each 64-bit element of
67/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
68/// unsigned integer from the intermediate result with the
69/// corresponding unsigned 64-bit integer in `a`, and store the
70/// results in `dst`.
71///
72/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
73#[inline]
74#[target_feature(enable = "avx512ifma,avx512vl")]
75#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
76#[cfg_attr(test, assert_instr(vpmadd52huq))]
77pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
78 vpmadd52huq_128(z:a, x:b, y:c)
79}
80
81/// Multiply packed unsigned 52-bit integers in each 64-bit element of
82/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
83/// unsigned integer from the intermediate result with the
84/// corresponding unsigned 64-bit integer in `a`, and store the
85/// results in `dst`.
86///
87/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
88#[inline]
89#[target_feature(enable = "avx512ifma,avx512vl")]
90#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91#[cfg_attr(test, assert_instr(vpmadd52luq))]
92pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
93 vpmadd52luq_128(z:a, x:b, y:c)
94}
95
96#[allow(improper_ctypes)]
97extern "C" {
98 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
99 fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
100 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
101 fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
102 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
103 fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
104 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
105 fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
106 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
107 fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
108 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
109 fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
110}
111
112#[cfg(test)]
113mod tests {
114
115 use stdarch_test::simd_test;
116
117 use crate::core_arch::x86::*;
118
119 #[simd_test(enable = "avx512ifma")]
120 unsafe fn test_mm512_madd52hi_epu64() {
121 let mut a = _mm512_set1_epi64(10 << 40);
122 let b = _mm512_set1_epi64((11 << 40) + 4);
123 let c = _mm512_set1_epi64((12 << 40) + 3);
124
125 a = _mm512_madd52hi_epu64(a, b, c);
126
127 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
128 let expected = _mm512_set1_epi64(11030549757952);
129
130 assert_eq_m512i(a, expected);
131 }
132
133 #[simd_test(enable = "avx512ifma")]
134 unsafe fn test_mm512_madd52lo_epu64() {
135 let mut a = _mm512_set1_epi64(10 << 40);
136 let b = _mm512_set1_epi64((11 << 40) + 4);
137 let c = _mm512_set1_epi64((12 << 40) + 3);
138
139 a = _mm512_madd52lo_epu64(a, b, c);
140
141 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
142 let expected = _mm512_set1_epi64(100055558127628);
143
144 assert_eq_m512i(a, expected);
145 }
146
147 #[simd_test(enable = "avx512ifma,avx512vl")]
148 unsafe fn test_mm256_madd52hi_epu64() {
149 let mut a = _mm256_set1_epi64x(10 << 40);
150 let b = _mm256_set1_epi64x((11 << 40) + 4);
151 let c = _mm256_set1_epi64x((12 << 40) + 3);
152
153 a = _mm256_madd52hi_epu64(a, b, c);
154
155 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
156 let expected = _mm256_set1_epi64x(11030549757952);
157
158 assert_eq_m256i(a, expected);
159 }
160
161 #[simd_test(enable = "avx512ifma,avx512vl")]
162 unsafe fn test_mm256_madd52lo_epu64() {
163 let mut a = _mm256_set1_epi64x(10 << 40);
164 let b = _mm256_set1_epi64x((11 << 40) + 4);
165 let c = _mm256_set1_epi64x((12 << 40) + 3);
166
167 a = _mm256_madd52lo_epu64(a, b, c);
168
169 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
170 let expected = _mm256_set1_epi64x(100055558127628);
171
172 assert_eq_m256i(a, expected);
173 }
174
175 #[simd_test(enable = "avx512ifma,avx512vl")]
176 unsafe fn test_mm_madd52hi_epu64() {
177 let mut a = _mm_set1_epi64x(10 << 40);
178 let b = _mm_set1_epi64x((11 << 40) + 4);
179 let c = _mm_set1_epi64x((12 << 40) + 3);
180
181 a = _mm_madd52hi_epu64(a, b, c);
182
183 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
184 let expected = _mm_set1_epi64x(11030549757952);
185
186 assert_eq_m128i(a, expected);
187 }
188
189 #[simd_test(enable = "avx512ifma,avx512vl")]
190 unsafe fn test_mm_madd52lo_epu64() {
191 let mut a = _mm_set1_epi64x(10 << 40);
192 let b = _mm_set1_epi64x((11 << 40) + 4);
193 let c = _mm_set1_epi64x((12 << 40) + 3);
194
195 a = _mm_madd52hi_epu64(a, b, c);
196
197 // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
198 let expected = _mm_set1_epi64x(11030549757952);
199
200 assert_eq_m128i(a, expected);
201 }
202}
203