1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15 unsafe { transmute(src:vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26 unsafe {
27 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
28 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
29 }
30}
31
32/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
35#[inline]
36#[target_feature(enable = "avx512vnni")]
37#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38#[cfg_attr(test, assert_instr(vpdpwssd))]
39pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
40 unsafe {
41 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
42 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
43 }
44}
45
46/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713)
49#[inline]
50#[target_feature(enable = "avxvnni")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(
53 all(test, any(target_os = "linux", target_env = "msvc")),
54 assert_instr(vpdpwssd)
55)]
56pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
57 unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
58}
59
60/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
63#[inline]
64#[target_feature(enable = "avx512vnni,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpdpwssd))]
67pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
68 unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
69}
70
71/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
74#[inline]
75#[target_feature(enable = "avx512vnni,avx512vl")]
76#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
77#[cfg_attr(test, assert_instr(vpdpwssd))]
78pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
79 unsafe {
80 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
81 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
82 }
83}
84
85/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
86///
87/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
88#[inline]
89#[target_feature(enable = "avx512vnni,avx512vl")]
90#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91#[cfg_attr(test, assert_instr(vpdpwssd))]
92pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
93 unsafe {
94 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
95 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
96 }
97}
98
99/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712)
102#[inline]
103#[target_feature(enable = "avxvnni")]
104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105#[cfg_attr(
106 all(test, any(target_os = "linux", target_env = "msvc")),
107 assert_instr(vpdpwssd)
108)]
109pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
110 unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
111}
112
113/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
116#[inline]
117#[target_feature(enable = "avx512vnni,avx512vl")]
118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
119#[cfg_attr(test, assert_instr(vpdpwssd))]
120pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
121 unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
122}
123
124/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
127#[inline]
128#[target_feature(enable = "avx512vnni,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpdpwssd))]
131pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
132 unsafe {
133 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
134 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
135 }
136}
137
138/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
141#[inline]
142#[target_feature(enable = "avx512vnni,avx512vl")]
143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
144#[cfg_attr(test, assert_instr(vpdpwssd))]
145pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
146 unsafe {
147 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
148 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
149 }
150}
151
152/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
155#[inline]
156#[target_feature(enable = "avx512vnni")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158#[cfg_attr(test, assert_instr(vpdpwssds))]
159pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
160 unsafe { transmute(src:vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
161}
162
163/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
164///
165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
166#[inline]
167#[target_feature(enable = "avx512vnni")]
168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
169#[cfg_attr(test, assert_instr(vpdpwssds))]
170pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
171 unsafe {
172 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
173 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
174 }
175}
176
177/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
180#[inline]
181#[target_feature(enable = "avx512vnni")]
182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
183#[cfg_attr(test, assert_instr(vpdpwssds))]
184pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
185 unsafe {
186 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
187 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
188 }
189}
190
191/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
192///
193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726)
194#[inline]
195#[target_feature(enable = "avxvnni")]
196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
197#[cfg_attr(
198 all(test, any(target_os = "linux", target_env = "msvc")),
199 assert_instr(vpdpwssds)
200)]
201pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
202 unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
203}
204
205/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
206///
207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
208#[inline]
209#[target_feature(enable = "avx512vnni,avx512vl")]
210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
211#[cfg_attr(test, assert_instr(vpdpwssds))]
212pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
213 unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
214}
215
216/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
217///
218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
219#[inline]
220#[target_feature(enable = "avx512vnni,avx512vl")]
221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
222#[cfg_attr(test, assert_instr(vpdpwssds))]
223pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
224 unsafe {
225 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
226 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
227 }
228}
229
230/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
231///
232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
233#[inline]
234#[target_feature(enable = "avx512vnni,avx512vl")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr(vpdpwssds))]
237pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
238 unsafe {
239 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
240 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
241 }
242}
243
244/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725)
247#[inline]
248#[target_feature(enable = "avxvnni")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(
251 all(test, any(target_os = "linux", target_env = "msvc")),
252 assert_instr(vpdpwssds)
253)]
254pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
255 unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
256}
257
258/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
259///
260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
261#[inline]
262#[target_feature(enable = "avx512vnni,avx512vl")]
263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
264#[cfg_attr(test, assert_instr(vpdpwssds))]
265pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
266 unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
267}
268
269/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
272#[inline]
273#[target_feature(enable = "avx512vnni,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpdpwssds))]
276pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
277 unsafe {
278 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
279 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
280 }
281}
282
283/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
284///
285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
286#[inline]
287#[target_feature(enable = "avx512vnni,avx512vl")]
288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289#[cfg_attr(test, assert_instr(vpdpwssds))]
290pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
291 unsafe {
292 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
293 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
294 }
295}
296
297/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
300#[inline]
301#[target_feature(enable = "avx512vnni")]
302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
303#[cfg_attr(test, assert_instr(vpdpbusd))]
304pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
305 unsafe { transmute(src:vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
306}
307
308/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
311#[inline]
312#[target_feature(enable = "avx512vnni")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpdpbusd))]
315pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
316 unsafe {
317 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
318 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
319 }
320}
321
322/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
325#[inline]
326#[target_feature(enable = "avx512vnni")]
327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
328#[cfg_attr(test, assert_instr(vpdpbusd))]
329pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
330 unsafe {
331 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
332 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
333 }
334}
335
336/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683)
339#[inline]
340#[target_feature(enable = "avxvnni")]
341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
342#[cfg_attr(
343 all(test, any(target_os = "linux", target_env = "msvc")),
344 assert_instr(vpdpbusd)
345)]
346pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
347 unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
348}
349
350/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
351///
352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
353#[inline]
354#[target_feature(enable = "avx512vnni,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpdpbusd))]
357pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
358 unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
359}
360
361/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
364#[inline]
365#[target_feature(enable = "avx512vnni,avx512vl")]
366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
367#[cfg_attr(test, assert_instr(vpdpbusd))]
368pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
369 unsafe {
370 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
371 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
372 }
373}
374
375/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
376///
377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
378#[inline]
379#[target_feature(enable = "avx512vnni,avx512vl")]
380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
381#[cfg_attr(test, assert_instr(vpdpbusd))]
382pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
383 unsafe {
384 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
385 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
386 }
387}
388
389/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
390///
391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682)
392#[inline]
393#[target_feature(enable = "avxvnni")]
394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
395#[cfg_attr(
396 all(test, any(target_os = "linux", target_env = "msvc")),
397 assert_instr(vpdpbusd)
398)]
399pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
400 unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
401}
402
403/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
404///
405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
406#[inline]
407#[target_feature(enable = "avx512vnni,avx512vl")]
408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
409#[cfg_attr(test, assert_instr(vpdpbusd))]
410pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
412}
413
414/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
417#[inline]
418#[target_feature(enable = "avx512vnni,avx512vl")]
419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
420#[cfg_attr(test, assert_instr(vpdpbusd))]
421pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
422 unsafe {
423 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
424 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
425 }
426}
427
428/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
429///
430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
431#[inline]
432#[target_feature(enable = "avx512vnni,avx512vl")]
433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
434#[cfg_attr(test, assert_instr(vpdpbusd))]
435pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
436 unsafe {
437 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
438 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
439 }
440}
441
442/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
443///
444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
445#[inline]
446#[target_feature(enable = "avx512vnni")]
447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
448#[cfg_attr(test, assert_instr(vpdpbusds))]
449pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
450 unsafe { transmute(src:vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
451}
452
453/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
456#[inline]
457#[target_feature(enable = "avx512vnni")]
458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
459#[cfg_attr(test, assert_instr(vpdpbusds))]
460pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
461 unsafe {
462 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
463 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
464 }
465}
466
467/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
468///
469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
470#[inline]
471#[target_feature(enable = "avx512vnni")]
472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
473#[cfg_attr(test, assert_instr(vpdpbusds))]
474pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
475 unsafe {
476 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
477 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
478 }
479}
480
481/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
482///
483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696)
484#[inline]
485#[target_feature(enable = "avxvnni")]
486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
487#[cfg_attr(
488 all(test, any(target_os = "linux", target_env = "msvc")),
489 assert_instr(vpdpbusds)
490)]
491pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
492 unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
493}
494
495/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
498#[inline]
499#[target_feature(enable = "avx512vnni,avx512vl")]
500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501#[cfg_attr(test, assert_instr(vpdpbusds))]
502pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
503 unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
504}
505
506/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
509#[inline]
510#[target_feature(enable = "avx512vnni,avx512vl")]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512#[cfg_attr(test, assert_instr(vpdpbusds))]
513pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
514 unsafe {
515 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
516 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
517 }
518}
519
520/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521///
522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
523#[inline]
524#[target_feature(enable = "avx512vnni,avx512vl")]
525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
526#[cfg_attr(test, assert_instr(vpdpbusds))]
527pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
528 unsafe {
529 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
530 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
531 }
532}
533
534/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695)
537#[inline]
538#[target_feature(enable = "avxvnni")]
539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
540#[cfg_attr(
541 all(test, any(target_os = "linux", target_env = "msvc")),
542 assert_instr(vpdpbusds)
543)]
544pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
545 unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
546}
547
548/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
549///
550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
551#[inline]
552#[target_feature(enable = "avx512vnni,avx512vl")]
553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
554#[cfg_attr(test, assert_instr(vpdpbusds))]
555pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
556 unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
557}
558
559/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
560///
561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
562#[inline]
563#[target_feature(enable = "avx512vnni,avx512vl")]
564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
565#[cfg_attr(test, assert_instr(vpdpbusds))]
566pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
567 unsafe {
568 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
569 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
570 }
571}
572
573/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
574///
575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
576#[inline]
577#[target_feature(enable = "avx512vnni,avx512vl")]
578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
579#[cfg_attr(test, assert_instr(vpdpbusds))]
580pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
581 unsafe {
582 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
583 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
584 }
585}
586
587/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
588/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
589/// 32-bit integer in src, and store the packed 32-bit results in dst.
590///
591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674)
592#[inline]
593#[target_feature(enable = "avxvnniint8")]
594#[cfg_attr(
595 all(test, any(target_os = "linux", target_env = "msvc")),
596 assert_instr(vpdpbssd)
597)]
598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
599pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
600 unsafe { transmute(src:vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
601}
602
603/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
604/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
605/// 32-bit integer in src, and store the packed 32-bit results in dst.
606///
607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675)
608#[inline]
609#[target_feature(enable = "avxvnniint8")]
610#[cfg_attr(
611 all(test, any(target_os = "linux", target_env = "msvc")),
612 assert_instr(vpdpbssd)
613)]
614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
615pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
616 unsafe { transmute(src:vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
617}
618
619/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
620/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
621/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
622///
623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676)
624#[inline]
625#[target_feature(enable = "avxvnniint8")]
626#[cfg_attr(
627 all(test, any(target_os = "linux", target_env = "msvc")),
628 assert_instr(vpdpbssds)
629)]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
632 unsafe { transmute(src:vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
633}
634
635/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
636/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
637/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677)
640#[inline]
641#[target_feature(enable = "avxvnniint8")]
642#[cfg_attr(
643 all(test, any(target_os = "linux", target_env = "msvc")),
644 assert_instr(vpdpbssds)
645)]
646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
647pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
648 unsafe { transmute(src:vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
649}
650
651/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
652/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
653/// 32-bit integer in src, and store the packed 32-bit results in dst.
654///
655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678)
656#[inline]
657#[target_feature(enable = "avxvnniint8")]
658#[cfg_attr(
659 all(test, any(target_os = "linux", target_env = "msvc")),
660 assert_instr(vpdpbsud)
661)]
662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
664 unsafe { transmute(src:vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
665}
666
667/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
668/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
669/// 32-bit integer in src, and store the packed 32-bit results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679)
672#[inline]
673#[target_feature(enable = "avxvnniint8")]
674#[cfg_attr(
675 all(test, any(target_os = "linux", target_env = "msvc")),
676 assert_instr(vpdpbsud)
677)]
678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
679pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
680 unsafe { transmute(src:vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
681}
682
683/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
684/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
685/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
686///
687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680)
688#[inline]
689#[target_feature(enable = "avxvnniint8")]
690#[cfg_attr(
691 all(test, any(target_os = "linux", target_env = "msvc")),
692 assert_instr(vpdpbsuds)
693)]
694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
695pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
696 unsafe { transmute(src:vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
697}
698
699/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
700/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
701/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681)
704#[inline]
705#[target_feature(enable = "avxvnniint8")]
706#[cfg_attr(
707 all(test, any(target_os = "linux", target_env = "msvc")),
708 assert_instr(vpdpbsuds)
709)]
710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
711pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
712 unsafe { transmute(src:vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
713}
714
715/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
716/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
717/// 32-bit integer in src, and store the packed 32-bit results in dst.
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708)
720#[inline]
721#[target_feature(enable = "avxvnniint8")]
722#[cfg_attr(
723 all(test, any(target_os = "linux", target_env = "msvc")),
724 assert_instr(vpdpbuud)
725)]
726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
727pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
728 unsafe { transmute(src:vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
729}
730
731/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
732/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
733/// 32-bit integer in src, and store the packed 32-bit results in dst.
734///
735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709)
736#[inline]
737#[target_feature(enable = "avxvnniint8")]
738#[cfg_attr(
739 all(test, any(target_os = "linux", target_env = "msvc")),
740 assert_instr(vpdpbuud)
741)]
742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
743pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
744 unsafe { transmute(src:vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
745}
746
747/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
748/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
749/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
750///
751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710)
752#[inline]
753#[target_feature(enable = "avxvnniint8")]
754#[cfg_attr(
755 all(test, any(target_os = "linux", target_env = "msvc")),
756 assert_instr(vpdpbuuds)
757)]
758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
759pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
760 unsafe { transmute(src:vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
761}
762
763/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
764/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
765/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
766///
767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711)
768#[inline]
769#[target_feature(enable = "avxvnniint8")]
770#[cfg_attr(
771 all(test, any(target_os = "linux", target_env = "msvc")),
772 assert_instr(vpdpbuuds)
773)]
774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
775pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
776 unsafe { transmute(src:vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
777}
778
779/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
780/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
781/// 32-bit integer in src, and store the packed 32-bit results in dst.
782///
783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738)
784#[inline]
785#[target_feature(enable = "avxvnniint16")]
786#[cfg_attr(
787 all(test, any(target_os = "linux", target_env = "msvc")),
788 assert_instr(vpdpwsud)
789)]
790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
791pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
792 unsafe { transmute(src:vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
793}
794
795/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
796/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
797/// 32-bit integer in src, and store the packed 32-bit results in dst.
798///
799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739)
800#[inline]
801#[target_feature(enable = "avxvnniint16")]
802#[cfg_attr(
803 all(test, any(target_os = "linux", target_env = "msvc")),
804 assert_instr(vpdpwsud)
805)]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
808 unsafe { transmute(src:vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
809}
810
811/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
812/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
813/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740)
816#[inline]
817#[target_feature(enable = "avxvnniint16")]
818#[cfg_attr(
819 all(test, any(target_os = "linux", target_env = "msvc")),
820 assert_instr(vpdpwsuds)
821)]
822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
823pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
824 unsafe { transmute(src:vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
825}
826
827/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
828/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
829/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
830///
831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741)
832#[inline]
833#[target_feature(enable = "avxvnniint16")]
834#[cfg_attr(
835 all(test, any(target_os = "linux", target_env = "msvc")),
836 assert_instr(vpdpwsuds)
837)]
838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
839pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
840 unsafe { transmute(src:vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
841}
842
843/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
844/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
845/// 32-bit integer in src, and store the packed 32-bit results in dst.
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742)
848#[inline]
849#[target_feature(enable = "avxvnniint16")]
850#[cfg_attr(
851 all(test, any(target_os = "linux", target_env = "msvc")),
852 assert_instr(vpdpwusd)
853)]
854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
855pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
856 unsafe { transmute(src:vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
857}
858
859/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
860/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
861/// 32-bit integer in src, and store the packed 32-bit results in dst.
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743)
864#[inline]
865#[target_feature(enable = "avxvnniint16")]
866#[cfg_attr(
867 all(test, any(target_os = "linux", target_env = "msvc")),
868 assert_instr(vpdpwusd)
869)]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
872 unsafe { transmute(src:vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
873}
874
875/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
876/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
877/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744)
880#[inline]
881#[target_feature(enable = "avxvnniint16")]
882#[cfg_attr(
883 all(test, any(target_os = "linux", target_env = "msvc")),
884 assert_instr(vpdpwusds)
885)]
886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
887pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
888 unsafe { transmute(src:vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
889}
890
891/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
892/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
893/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
894///
895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745)
896#[inline]
897#[target_feature(enable = "avxvnniint16")]
898#[cfg_attr(
899 all(test, any(target_os = "linux", target_env = "msvc")),
900 assert_instr(vpdpwusds)
901)]
902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
903pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
904 unsafe { transmute(src:vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
905}
906
907/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
908/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
909/// 32-bit integer in src, and store the packed 32-bit results in dst.
910///
911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746)
912#[inline]
913#[target_feature(enable = "avxvnniint16")]
914#[cfg_attr(
915 all(test, any(target_os = "linux", target_env = "msvc")),
916 assert_instr(vpdpwuud)
917)]
918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
919pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
920 unsafe { transmute(src:vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
921}
922
923/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
924/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
925/// 32-bit integer in src, and store the packed 32-bit results in dst.
926///
927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747)
928#[inline]
929#[target_feature(enable = "avxvnniint16")]
930#[cfg_attr(
931 all(test, any(target_os = "linux", target_env = "msvc")),
932 assert_instr(vpdpwuud)
933)]
934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
935pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
936 unsafe { transmute(src:vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
937}
938
939/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
940/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
941/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
942///
943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748)
944#[inline]
945#[target_feature(enable = "avxvnniint16")]
946#[cfg_attr(
947 all(test, any(target_os = "linux", target_env = "msvc")),
948 assert_instr(vpdpwuuds)
949)]
950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
951pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
952 unsafe { transmute(src:vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
953}
954
955/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
956/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
957/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
958///
959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749)
960#[inline]
961#[target_feature(enable = "avxvnniint16")]
962#[cfg_attr(
963 all(test, any(target_os = "linux", target_env = "msvc")),
964 assert_instr(vpdpwuuds)
965)]
966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
967pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
968 unsafe { transmute(src:vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
969}
970
971#[allow(improper_ctypes)]
972unsafe extern "C" {
973 #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
974 unsafefn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
975 #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
976 unsafefn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
977 #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
978 unsafefn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
979
980 #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
981 unsafefn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
982 #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
983 unsafefn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
984 #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
985 unsafefn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
986
987 #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
988 unsafefn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
989 #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
990 unsafefn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
991 #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
992 unsafefn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
993
994 #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
995 unsafefn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
996 #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
997 unsafefn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
998 #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
999 unsafefn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1000
1001 #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
1002 unsafefn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1003 #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
1004 unsafefn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1005
1006 #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
1007 unsafefn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1008 #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
1009 unsafefn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1010
1011 #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
1012 unsafefn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1013 #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
1014 unsafefn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1015
1016 #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
1017 unsafefn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1018 #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
1019 unsafefn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1020
1021 #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
1022 unsafefn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1023 #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
1024 unsafefn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1025
1026 #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
1027 unsafefn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1028 #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
1029 unsafefn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1030
1031 #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
1032 unsafefn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1033 #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
1034 unsafefn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1035
1036 #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
1037 unsafefn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1038 #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
1039 unsafefn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1040
1041 #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
1042 unsafefn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1043 #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
1044 unsafefn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1045
1046 #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
1047 unsafefn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1048 #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
1049 unsafefn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1050
1051 #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
1052 unsafefn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1053 #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
1054 unsafefn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1055
1056 #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
1057 unsafefn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1058 #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
1059 unsafefn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1060}
1061
1062#[cfg(test)]
1063mod tests {
1064
1065 use crate::core_arch::x86::*;
1066 use stdarch_test::simd_test;
1067
1068 #[simd_test(enable = "avx512vnni")]
1069 unsafe fn test_mm512_dpwssd_epi32() {
1070 let src = _mm512_set1_epi32(1);
1071 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1072 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1073 let r = _mm512_dpwssd_epi32(src, a, b);
1074 let e = _mm512_set1_epi32(3);
1075 assert_eq_m512i(r, e);
1076 }
1077
1078 #[simd_test(enable = "avx512vnni")]
1079 unsafe fn test_mm512_mask_dpwssd_epi32() {
1080 let src = _mm512_set1_epi32(1);
1081 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1082 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1083 let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
1084 assert_eq_m512i(r, src);
1085 let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
1086 let e = _mm512_set1_epi32(3);
1087 assert_eq_m512i(r, e);
1088 }
1089
1090 #[simd_test(enable = "avx512vnni")]
1091 unsafe fn test_mm512_maskz_dpwssd_epi32() {
1092 let src = _mm512_set1_epi32(1);
1093 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1094 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1095 let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
1096 assert_eq_m512i(r, _mm512_setzero_si512());
1097 let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
1098 let e = _mm512_set1_epi32(3);
1099 assert_eq_m512i(r, e);
1100 }
1101
1102 #[simd_test(enable = "avxvnni")]
1103 unsafe fn test_mm256_dpwssd_avx_epi32() {
1104 let src = _mm256_set1_epi32(1);
1105 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1106 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1107 let r = _mm256_dpwssd_avx_epi32(src, a, b);
1108 let e = _mm256_set1_epi32(3);
1109 assert_eq_m256i(r, e);
1110 }
1111
1112 #[simd_test(enable = "avx512vnni,avx512vl")]
1113 unsafe fn test_mm256_dpwssd_epi32() {
1114 let src = _mm256_set1_epi32(1);
1115 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1116 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1117 let r = _mm256_dpwssd_epi32(src, a, b);
1118 let e = _mm256_set1_epi32(3);
1119 assert_eq_m256i(r, e);
1120 }
1121
1122 #[simd_test(enable = "avx512vnni,avx512vl")]
1123 unsafe fn test_mm256_mask_dpwssd_epi32() {
1124 let src = _mm256_set1_epi32(1);
1125 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1126 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1127 let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
1128 assert_eq_m256i(r, src);
1129 let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
1130 let e = _mm256_set1_epi32(3);
1131 assert_eq_m256i(r, e);
1132 }
1133
1134 #[simd_test(enable = "avx512vnni,avx512vl")]
1135 unsafe fn test_mm256_maskz_dpwssd_epi32() {
1136 let src = _mm256_set1_epi32(1);
1137 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1138 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1139 let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
1140 assert_eq_m256i(r, _mm256_setzero_si256());
1141 let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
1142 let e = _mm256_set1_epi32(3);
1143 assert_eq_m256i(r, e);
1144 }
1145
1146 #[simd_test(enable = "avxvnni")]
1147 unsafe fn test_mm_dpwssd_avx_epi32() {
1148 let src = _mm_set1_epi32(1);
1149 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1150 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1151 let r = _mm_dpwssd_avx_epi32(src, a, b);
1152 let e = _mm_set1_epi32(3);
1153 assert_eq_m128i(r, e);
1154 }
1155
1156 #[simd_test(enable = "avx512vnni,avx512vl")]
1157 unsafe fn test_mm_dpwssd_epi32() {
1158 let src = _mm_set1_epi32(1);
1159 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1160 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1161 let r = _mm_dpwssd_epi32(src, a, b);
1162 let e = _mm_set1_epi32(3);
1163 assert_eq_m128i(r, e);
1164 }
1165
1166 #[simd_test(enable = "avx512vnni,avx512vl")]
1167 unsafe fn test_mm_mask_dpwssd_epi32() {
1168 let src = _mm_set1_epi32(1);
1169 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1170 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1171 let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
1172 assert_eq_m128i(r, src);
1173 let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
1174 let e = _mm_set1_epi32(3);
1175 assert_eq_m128i(r, e);
1176 }
1177
1178 #[simd_test(enable = "avx512vnni,avx512vl")]
1179 unsafe fn test_mm_maskz_dpwssd_epi32() {
1180 let src = _mm_set1_epi32(1);
1181 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1182 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1183 let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
1184 assert_eq_m128i(r, _mm_setzero_si128());
1185 let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
1186 let e = _mm_set1_epi32(3);
1187 assert_eq_m128i(r, e);
1188 }
1189
1190 #[simd_test(enable = "avx512vnni")]
1191 unsafe fn test_mm512_dpwssds_epi32() {
1192 let src = _mm512_set1_epi32(1);
1193 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1194 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1195 let r = _mm512_dpwssds_epi32(src, a, b);
1196 let e = _mm512_set1_epi32(3);
1197 assert_eq_m512i(r, e);
1198 }
1199
1200 #[simd_test(enable = "avx512vnni")]
1201 unsafe fn test_mm512_mask_dpwssds_epi32() {
1202 let src = _mm512_set1_epi32(1);
1203 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1204 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1205 let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
1206 assert_eq_m512i(r, src);
1207 let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
1208 let e = _mm512_set1_epi32(3);
1209 assert_eq_m512i(r, e);
1210 }
1211
1212 #[simd_test(enable = "avx512vnni")]
1213 unsafe fn test_mm512_maskz_dpwssds_epi32() {
1214 let src = _mm512_set1_epi32(1);
1215 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1216 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1217 let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
1218 assert_eq_m512i(r, _mm512_setzero_si512());
1219 let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
1220 let e = _mm512_set1_epi32(3);
1221 assert_eq_m512i(r, e);
1222 }
1223
1224 #[simd_test(enable = "avxvnni")]
1225 unsafe fn test_mm256_dpwssds_avx_epi32() {
1226 let src = _mm256_set1_epi32(1);
1227 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1228 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1229 let r = _mm256_dpwssds_avx_epi32(src, a, b);
1230 let e = _mm256_set1_epi32(3);
1231 assert_eq_m256i(r, e);
1232 }
1233
1234 #[simd_test(enable = "avx512vnni,avx512vl")]
1235 unsafe fn test_mm256_dpwssds_epi32() {
1236 let src = _mm256_set1_epi32(1);
1237 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1238 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1239 let r = _mm256_dpwssds_epi32(src, a, b);
1240 let e = _mm256_set1_epi32(3);
1241 assert_eq_m256i(r, e);
1242 }
1243
1244 #[simd_test(enable = "avx512vnni,avx512vl")]
1245 unsafe fn test_mm256_mask_dpwssds_epi32() {
1246 let src = _mm256_set1_epi32(1);
1247 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1248 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1249 let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
1250 assert_eq_m256i(r, src);
1251 let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
1252 let e = _mm256_set1_epi32(3);
1253 assert_eq_m256i(r, e);
1254 }
1255
1256 #[simd_test(enable = "avx512vnni,avx512vl")]
1257 unsafe fn test_mm256_maskz_dpwssds_epi32() {
1258 let src = _mm256_set1_epi32(1);
1259 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1260 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1261 let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
1262 assert_eq_m256i(r, _mm256_setzero_si256());
1263 let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
1264 let e = _mm256_set1_epi32(3);
1265 assert_eq_m256i(r, e);
1266 }
1267
1268 #[simd_test(enable = "avxvnni")]
1269 unsafe fn test_mm_dpwssds_avx_epi32() {
1270 let src = _mm_set1_epi32(1);
1271 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1272 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1273 let r = _mm_dpwssds_avx_epi32(src, a, b);
1274 let e = _mm_set1_epi32(3);
1275 assert_eq_m128i(r, e);
1276 }
1277
1278 #[simd_test(enable = "avx512vnni,avx512vl")]
1279 unsafe fn test_mm_dpwssds_epi32() {
1280 let src = _mm_set1_epi32(1);
1281 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1282 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1283 let r = _mm_dpwssds_epi32(src, a, b);
1284 let e = _mm_set1_epi32(3);
1285 assert_eq_m128i(r, e);
1286 }
1287
1288 #[simd_test(enable = "avx512vnni,avx512vl")]
1289 unsafe fn test_mm_mask_dpwssds_epi32() {
1290 let src = _mm_set1_epi32(1);
1291 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1292 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1293 let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
1294 assert_eq_m128i(r, src);
1295 let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
1296 let e = _mm_set1_epi32(3);
1297 assert_eq_m128i(r, e);
1298 }
1299
1300 #[simd_test(enable = "avx512vnni,avx512vl")]
1301 unsafe fn test_mm_maskz_dpwssds_epi32() {
1302 let src = _mm_set1_epi32(1);
1303 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1304 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1305 let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
1306 assert_eq_m128i(r, _mm_setzero_si128());
1307 let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
1308 let e = _mm_set1_epi32(3);
1309 assert_eq_m128i(r, e);
1310 }
1311
1312 #[simd_test(enable = "avx512vnni")]
1313 unsafe fn test_mm512_dpbusd_epi32() {
1314 let src = _mm512_set1_epi32(1);
1315 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1316 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1317 let r = _mm512_dpbusd_epi32(src, a, b);
1318 let e = _mm512_set1_epi32(5);
1319 assert_eq_m512i(r, e);
1320 }
1321
1322 #[simd_test(enable = "avx512vnni")]
1323 unsafe fn test_mm512_mask_dpbusd_epi32() {
1324 let src = _mm512_set1_epi32(1);
1325 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1326 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1327 let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
1328 assert_eq_m512i(r, src);
1329 let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
1330 let e = _mm512_set1_epi32(5);
1331 assert_eq_m512i(r, e);
1332 }
1333
1334 #[simd_test(enable = "avx512vnni")]
1335 unsafe fn test_mm512_maskz_dpbusd_epi32() {
1336 let src = _mm512_set1_epi32(1);
1337 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1338 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1339 let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
1340 assert_eq_m512i(r, _mm512_setzero_si512());
1341 let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
1342 let e = _mm512_set1_epi32(5);
1343 assert_eq_m512i(r, e);
1344 }
1345
1346 #[simd_test(enable = "avxvnni")]
1347 unsafe fn test_mm256_dpbusd_avx_epi32() {
1348 let src = _mm256_set1_epi32(1);
1349 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1350 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1351 let r = _mm256_dpbusd_avx_epi32(src, a, b);
1352 let e = _mm256_set1_epi32(5);
1353 assert_eq_m256i(r, e);
1354 }
1355
1356 #[simd_test(enable = "avx512vnni,avx512vl")]
1357 unsafe fn test_mm256_dpbusd_epi32() {
1358 let src = _mm256_set1_epi32(1);
1359 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1360 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1361 let r = _mm256_dpbusd_epi32(src, a, b);
1362 let e = _mm256_set1_epi32(5);
1363 assert_eq_m256i(r, e);
1364 }
1365
1366 #[simd_test(enable = "avx512vnni,avx512vl")]
1367 unsafe fn test_mm256_mask_dpbusd_epi32() {
1368 let src = _mm256_set1_epi32(1);
1369 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1370 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1371 let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
1372 assert_eq_m256i(r, src);
1373 let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
1374 let e = _mm256_set1_epi32(5);
1375 assert_eq_m256i(r, e);
1376 }
1377
1378 #[simd_test(enable = "avx512vnni,avx512vl")]
1379 unsafe fn test_mm256_maskz_dpbusd_epi32() {
1380 let src = _mm256_set1_epi32(1);
1381 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1382 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1383 let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
1384 assert_eq_m256i(r, _mm256_setzero_si256());
1385 let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
1386 let e = _mm256_set1_epi32(5);
1387 assert_eq_m256i(r, e);
1388 }
1389
1390 #[simd_test(enable = "avxvnni")]
1391 unsafe fn test_mm_dpbusd_avx_epi32() {
1392 let src = _mm_set1_epi32(1);
1393 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1394 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1395 let r = _mm_dpbusd_avx_epi32(src, a, b);
1396 let e = _mm_set1_epi32(5);
1397 assert_eq_m128i(r, e);
1398 }
1399
1400 #[simd_test(enable = "avx512vnni,avx512vl")]
1401 unsafe fn test_mm_dpbusd_epi32() {
1402 let src = _mm_set1_epi32(1);
1403 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1404 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1405 let r = _mm_dpbusd_epi32(src, a, b);
1406 let e = _mm_set1_epi32(5);
1407 assert_eq_m128i(r, e);
1408 }
1409
1410 #[simd_test(enable = "avx512vnni,avx512vl")]
1411 unsafe fn test_mm_mask_dpbusd_epi32() {
1412 let src = _mm_set1_epi32(1);
1413 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1414 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1415 let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
1416 assert_eq_m128i(r, src);
1417 let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
1418 let e = _mm_set1_epi32(5);
1419 assert_eq_m128i(r, e);
1420 }
1421
1422 #[simd_test(enable = "avx512vnni,avx512vl")]
1423 unsafe fn test_mm_maskz_dpbusd_epi32() {
1424 let src = _mm_set1_epi32(1);
1425 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1426 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1427 let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
1428 assert_eq_m128i(r, _mm_setzero_si128());
1429 let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
1430 let e = _mm_set1_epi32(5);
1431 assert_eq_m128i(r, e);
1432 }
1433
1434 #[simd_test(enable = "avx512vnni")]
1435 unsafe fn test_mm512_dpbusds_epi32() {
1436 let src = _mm512_set1_epi32(1);
1437 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1438 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1439 let r = _mm512_dpbusds_epi32(src, a, b);
1440 let e = _mm512_set1_epi32(5);
1441 assert_eq_m512i(r, e);
1442 }
1443
1444 #[simd_test(enable = "avx512vnni")]
1445 unsafe fn test_mm512_mask_dpbusds_epi32() {
1446 let src = _mm512_set1_epi32(1);
1447 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1448 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1449 let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
1450 assert_eq_m512i(r, src);
1451 let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
1452 let e = _mm512_set1_epi32(5);
1453 assert_eq_m512i(r, e);
1454 }
1455
1456 #[simd_test(enable = "avx512vnni")]
1457 unsafe fn test_mm512_maskz_dpbusds_epi32() {
1458 let src = _mm512_set1_epi32(1);
1459 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1460 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1461 let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
1462 assert_eq_m512i(r, _mm512_setzero_si512());
1463 let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
1464 let e = _mm512_set1_epi32(5);
1465 assert_eq_m512i(r, e);
1466 }
1467
1468 #[simd_test(enable = "avxvnni")]
1469 unsafe fn test_mm256_dpbusds_avx_epi32() {
1470 let src = _mm256_set1_epi32(1);
1471 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1472 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1473 let r = _mm256_dpbusds_avx_epi32(src, a, b);
1474 let e = _mm256_set1_epi32(5);
1475 assert_eq_m256i(r, e);
1476 }
1477
1478 #[simd_test(enable = "avx512vnni,avx512vl")]
1479 unsafe fn test_mm256_dpbusds_epi32() {
1480 let src = _mm256_set1_epi32(1);
1481 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1482 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1483 let r = _mm256_dpbusds_epi32(src, a, b);
1484 let e = _mm256_set1_epi32(5);
1485 assert_eq_m256i(r, e);
1486 }
1487
1488 #[simd_test(enable = "avx512vnni,avx512vl")]
1489 unsafe fn test_mm256_mask_dpbusds_epi32() {
1490 let src = _mm256_set1_epi32(1);
1491 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1492 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1493 let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
1494 assert_eq_m256i(r, src);
1495 let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
1496 let e = _mm256_set1_epi32(5);
1497 assert_eq_m256i(r, e);
1498 }
1499
1500 #[simd_test(enable = "avx512vnni,avx512vl")]
1501 unsafe fn test_mm256_maskz_dpbusds_epi32() {
1502 let src = _mm256_set1_epi32(1);
1503 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1504 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1505 let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
1506 assert_eq_m256i(r, _mm256_setzero_si256());
1507 let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
1508 let e = _mm256_set1_epi32(5);
1509 assert_eq_m256i(r, e);
1510 }
1511
1512 #[simd_test(enable = "avxvnni")]
1513 unsafe fn test_mm_dpbusds_avx_epi32() {
1514 let src = _mm_set1_epi32(1);
1515 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1516 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1517 let r = _mm_dpbusds_avx_epi32(src, a, b);
1518 let e = _mm_set1_epi32(5);
1519 assert_eq_m128i(r, e);
1520 }
1521
1522 #[simd_test(enable = "avx512vnni,avx512vl")]
1523 unsafe fn test_mm_dpbusds_epi32() {
1524 let src = _mm_set1_epi32(1);
1525 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1526 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1527 let r = _mm_dpbusds_epi32(src, a, b);
1528 let e = _mm_set1_epi32(5);
1529 assert_eq_m128i(r, e);
1530 }
1531
1532 #[simd_test(enable = "avx512vnni,avx512vl")]
1533 unsafe fn test_mm_mask_dpbusds_epi32() {
1534 let src = _mm_set1_epi32(1);
1535 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1536 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1537 let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
1538 assert_eq_m128i(r, src);
1539 let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
1540 let e = _mm_set1_epi32(5);
1541 assert_eq_m128i(r, e);
1542 }
1543
1544 #[simd_test(enable = "avx512vnni,avx512vl")]
1545 unsafe fn test_mm_maskz_dpbusds_epi32() {
1546 let src = _mm_set1_epi32(1);
1547 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1548 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1549 let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
1550 assert_eq_m128i(r, _mm_setzero_si128());
1551 let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
1552 let e = _mm_set1_epi32(5);
1553 assert_eq_m128i(r, e);
1554 }
1555
1556 #[simd_test(enable = "avxvnniint8")]
1557 unsafe fn test_mm_dpbssd_epi32() {
1558 let src = _mm_set1_epi32(1);
1559 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1560 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1561 let r = _mm_dpbssd_epi32(src, a, b);
1562 let e = _mm_set1_epi32(5);
1563 assert_eq_m128i(r, e);
1564 }
1565
1566 #[simd_test(enable = "avxvnniint8")]
1567 unsafe fn test_mm256_dpbssd_epi32() {
1568 let src = _mm256_set1_epi32(1);
1569 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1570 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1571 let r = _mm256_dpbssd_epi32(src, a, b);
1572 let e = _mm256_set1_epi32(5);
1573 assert_eq_m256i(r, e);
1574 }
1575
1576 #[simd_test(enable = "avxvnniint8")]
1577 unsafe fn test_mm_dpbssds_epi32() {
1578 let src = _mm_set1_epi32(1);
1579 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1580 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1581 let r = _mm_dpbssds_epi32(src, a, b);
1582 let e = _mm_set1_epi32(5);
1583 assert_eq_m128i(r, e);
1584 }
1585
1586 #[simd_test(enable = "avxvnniint8")]
1587 unsafe fn test_mm256_dpbssds_epi32() {
1588 let src = _mm256_set1_epi32(1);
1589 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1590 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1591 let r = _mm256_dpbssds_epi32(src, a, b);
1592 let e = _mm256_set1_epi32(5);
1593 assert_eq_m256i(r, e);
1594 }
1595
1596 #[simd_test(enable = "avxvnniint8")]
1597 unsafe fn test_mm_dpbsud_epi32() {
1598 let src = _mm_set1_epi32(1);
1599 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1600 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1601 let r = _mm_dpbsud_epi32(src, a, b);
1602 let e = _mm_set1_epi32(5);
1603 assert_eq_m128i(r, e);
1604 }
1605
1606 #[simd_test(enable = "avxvnniint8")]
1607 unsafe fn test_mm256_dpbsud_epi32() {
1608 let src = _mm256_set1_epi32(1);
1609 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1610 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1611 let r = _mm256_dpbsud_epi32(src, a, b);
1612 let e = _mm256_set1_epi32(5);
1613 assert_eq_m256i(r, e);
1614 }
1615
1616 #[simd_test(enable = "avxvnniint8")]
1617 unsafe fn test_mm_dpbsuds_epi32() {
1618 let src = _mm_set1_epi32(1);
1619 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1620 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1621 let r = _mm_dpbsuds_epi32(src, a, b);
1622 let e = _mm_set1_epi32(5);
1623 assert_eq_m128i(r, e);
1624 }
1625
1626 #[simd_test(enable = "avxvnniint8")]
1627 unsafe fn test_mm256_dpbsuds_epi32() {
1628 let src = _mm256_set1_epi32(1);
1629 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1630 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1631 let r = _mm256_dpbsuds_epi32(src, a, b);
1632 let e = _mm256_set1_epi32(5);
1633 assert_eq_m256i(r, e);
1634 }
1635
1636 #[simd_test(enable = "avxvnniint8")]
1637 unsafe fn test_mm_dpbuud_epi32() {
1638 let src = _mm_set1_epi32(1);
1639 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1640 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1641 let r = _mm_dpbuud_epi32(src, a, b);
1642 let e = _mm_set1_epi32(5);
1643 assert_eq_m128i(r, e);
1644 }
1645
1646 #[simd_test(enable = "avxvnniint8")]
1647 unsafe fn test_mm256_dpbuud_epi32() {
1648 let src = _mm256_set1_epi32(1);
1649 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1650 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1651 let r = _mm256_dpbuud_epi32(src, a, b);
1652 let e = _mm256_set1_epi32(5);
1653 assert_eq_m256i(r, e);
1654 }
1655
1656 #[simd_test(enable = "avxvnniint8")]
1657 unsafe fn test_mm_dpbuuds_epi32() {
1658 let src = _mm_set1_epi32(1);
1659 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1660 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1661 let r = _mm_dpbuuds_epi32(src, a, b);
1662 let e = _mm_set1_epi32(5);
1663 assert_eq_m128i(r, e);
1664 }
1665
1666 #[simd_test(enable = "avxvnniint8")]
1667 unsafe fn test_mm256_dpbuuds_epi32() {
1668 let src = _mm256_set1_epi32(1);
1669 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1670 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1671 let r = _mm256_dpbuuds_epi32(src, a, b);
1672 let e = _mm256_set1_epi32(5);
1673 assert_eq_m256i(r, e);
1674 }
1675
1676 #[simd_test(enable = "avxvnniint16")]
1677 unsafe fn test_mm_dpwsud_epi32() {
1678 let src = _mm_set1_epi32(1);
1679 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1680 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1681 let r = _mm_dpwsud_epi32(src, a, b);
1682 let e = _mm_set1_epi32(3);
1683 assert_eq_m128i(r, e);
1684 }
1685
1686 #[simd_test(enable = "avxvnniint16")]
1687 unsafe fn test_mm256_dpwsud_epi32() {
1688 let src = _mm256_set1_epi32(1);
1689 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1690 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1691 let r = _mm256_dpwsud_epi32(src, a, b);
1692 let e = _mm256_set1_epi32(3);
1693 assert_eq_m256i(r, e);
1694 }
1695
1696 #[simd_test(enable = "avxvnniint16")]
1697 unsafe fn test_mm_dpwsuds_epi32() {
1698 let src = _mm_set1_epi32(1);
1699 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1700 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1701 let r = _mm_dpwsuds_epi32(src, a, b);
1702 let e = _mm_set1_epi32(3);
1703 assert_eq_m128i(r, e);
1704 }
1705
1706 #[simd_test(enable = "avxvnniint16")]
1707 unsafe fn test_mm256_dpwsuds_epi32() {
1708 let src = _mm256_set1_epi32(1);
1709 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1710 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1711 let r = _mm256_dpwsuds_epi32(src, a, b);
1712 let e = _mm256_set1_epi32(3);
1713 assert_eq_m256i(r, e);
1714 }
1715
1716 #[simd_test(enable = "avxvnniint16")]
1717 unsafe fn test_mm_dpwusd_epi32() {
1718 let src = _mm_set1_epi32(1);
1719 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1720 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1721 let r = _mm_dpwusd_epi32(src, a, b);
1722 let e = _mm_set1_epi32(3);
1723 assert_eq_m128i(r, e);
1724 }
1725
1726 #[simd_test(enable = "avxvnniint16")]
1727 unsafe fn test_mm256_dpwusd_epi32() {
1728 let src = _mm256_set1_epi32(1);
1729 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1730 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1731 let r = _mm256_dpwusd_epi32(src, a, b);
1732 let e = _mm256_set1_epi32(3);
1733 assert_eq_m256i(r, e);
1734 }
1735
1736 #[simd_test(enable = "avxvnniint16")]
1737 unsafe fn test_mm_dpwusds_epi32() {
1738 let src = _mm_set1_epi32(1);
1739 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1740 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1741 let r = _mm_dpwusds_epi32(src, a, b);
1742 let e = _mm_set1_epi32(3);
1743 assert_eq_m128i(r, e);
1744 }
1745
1746 #[simd_test(enable = "avxvnniint16")]
1747 unsafe fn test_mm256_dpwusds_epi32() {
1748 let src = _mm256_set1_epi32(1);
1749 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1750 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1751 let r = _mm256_dpwusds_epi32(src, a, b);
1752 let e = _mm256_set1_epi32(3);
1753 assert_eq_m256i(r, e);
1754 }
1755
1756 #[simd_test(enable = "avxvnniint16")]
1757 unsafe fn test_mm_dpwuud_epi32() {
1758 let src = _mm_set1_epi32(1);
1759 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1760 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1761 let r = _mm_dpwuud_epi32(src, a, b);
1762 let e = _mm_set1_epi32(3);
1763 assert_eq_m128i(r, e);
1764 }
1765
1766 #[simd_test(enable = "avxvnniint16")]
1767 unsafe fn test_mm256_dpwuud_epi32() {
1768 let src = _mm256_set1_epi32(1);
1769 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1770 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1771 let r = _mm256_dpwuud_epi32(src, a, b);
1772 let e = _mm256_set1_epi32(3);
1773 assert_eq_m256i(r, e);
1774 }
1775
1776 #[simd_test(enable = "avxvnniint16")]
1777 unsafe fn test_mm_dpwuuds_epi32() {
1778 let src = _mm_set1_epi32(1);
1779 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1780 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1781 let r = _mm_dpwuuds_epi32(src, a, b);
1782 let e = _mm_set1_epi32(3);
1783 assert_eq_m128i(r, e);
1784 }
1785
1786 #[simd_test(enable = "avxvnniint16")]
1787 unsafe fn test_mm256_dpwuuds_epi32() {
1788 let src = _mm256_set1_epi32(1);
1789 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1790 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1791 let r = _mm256_dpwuuds_epi32(src, a, b);
1792 let e = _mm256_set1_epi32(3);
1793 assert_eq_m256i(r, e);
1794 }
1795}
1796