1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15 unsafe { transmute(src:vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26 unsafe {
27 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
28 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
29 }
30}
31
32/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
35#[inline]
36#[target_feature(enable = "avx512vnni")]
37#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38#[cfg_attr(test, assert_instr(vpdpwssd))]
39pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
40 unsafe {
41 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
42 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
43 }
44}
45
46/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713)
49#[inline]
50#[target_feature(enable = "avxvnni")]
51#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52#[cfg_attr(test, assert_instr(vpdpwssd))]
53pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
54 unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
55}
56
57/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
58///
59/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
60#[inline]
61#[target_feature(enable = "avx512vnni,avx512vl")]
62#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63#[cfg_attr(test, assert_instr(vpdpwssd))]
64pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
65 unsafe { transmute(src:vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
66}
67
68/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
69///
70/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
71#[inline]
72#[target_feature(enable = "avx512vnni,avx512vl")]
73#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
74#[cfg_attr(test, assert_instr(vpdpwssd))]
75pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
76 unsafe {
77 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
78 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
79 }
80}
81
82/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
83///
84/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
85#[inline]
86#[target_feature(enable = "avx512vnni,avx512vl")]
87#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
88#[cfg_attr(test, assert_instr(vpdpwssd))]
89pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
90 unsafe {
91 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
92 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
93 }
94}
95
96/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
97///
98/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712)
99#[inline]
100#[target_feature(enable = "avxvnni")]
101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
102#[cfg_attr(test, assert_instr(vpdpwssd))]
103pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
104 unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
105}
106
107/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
108///
109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
110#[inline]
111#[target_feature(enable = "avx512vnni,avx512vl")]
112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
113#[cfg_attr(test, assert_instr(vpdpwssd))]
114pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
115 unsafe { transmute(src:vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
116}
117
118/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
119///
120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
121#[inline]
122#[target_feature(enable = "avx512vnni,avx512vl")]
123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
124#[cfg_attr(test, assert_instr(vpdpwssd))]
125pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
126 unsafe {
127 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
128 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
129 }
130}
131
132/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
135#[inline]
136#[target_feature(enable = "avx512vnni,avx512vl")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpdpwssd))]
139pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
140 unsafe {
141 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
142 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
143 }
144}
145
146/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
147///
148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
149#[inline]
150#[target_feature(enable = "avx512vnni")]
151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
152#[cfg_attr(test, assert_instr(vpdpwssds))]
153pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
154 unsafe { transmute(src:vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
155}
156
157/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
158///
159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
160#[inline]
161#[target_feature(enable = "avx512vnni")]
162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
163#[cfg_attr(test, assert_instr(vpdpwssds))]
164pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
165 unsafe {
166 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
167 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
168 }
169}
170
171/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
172///
173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
174#[inline]
175#[target_feature(enable = "avx512vnni")]
176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
177#[cfg_attr(test, assert_instr(vpdpwssds))]
178pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
179 unsafe {
180 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
181 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
182 }
183}
184
185/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726)
188#[inline]
189#[target_feature(enable = "avxvnni")]
190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191#[cfg_attr(test, assert_instr(vpdpwssds))]
192pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
193 unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
194}
195
196/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
199#[inline]
200#[target_feature(enable = "avx512vnni,avx512vl")]
201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
202#[cfg_attr(test, assert_instr(vpdpwssds))]
203pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
204 unsafe { transmute(src:vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
205}
206
207/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
210#[inline]
211#[target_feature(enable = "avx512vnni,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpdpwssds))]
214pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
215 unsafe {
216 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
217 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
218 }
219}
220
221/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
222///
223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
224#[inline]
225#[target_feature(enable = "avx512vnni,avx512vl")]
226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
227#[cfg_attr(test, assert_instr(vpdpwssds))]
228pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
229 unsafe {
230 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
231 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
232 }
233}
234
235/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725)
238#[inline]
239#[target_feature(enable = "avxvnni")]
240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
241#[cfg_attr(test, assert_instr(vpdpwssds))]
242pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
243 unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
244}
245
246/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
249#[inline]
250#[target_feature(enable = "avx512vnni,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr(vpdpwssds))]
253pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
254 unsafe { transmute(src:vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
255}
256
257/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
258///
259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
260#[inline]
261#[target_feature(enable = "avx512vnni,avx512vl")]
262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
263#[cfg_attr(test, assert_instr(vpdpwssds))]
264pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
265 unsafe {
266 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
267 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
268 }
269}
270
271/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
272///
273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
274#[inline]
275#[target_feature(enable = "avx512vnni,avx512vl")]
276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
277#[cfg_attr(test, assert_instr(vpdpwssds))]
278pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
279 unsafe {
280 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
281 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
282 }
283}
284
285/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
288#[inline]
289#[target_feature(enable = "avx512vnni")]
290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291#[cfg_attr(test, assert_instr(vpdpbusd))]
292pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
293 unsafe { transmute(src:vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
294}
295
296/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
299#[inline]
300#[target_feature(enable = "avx512vnni")]
301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
302#[cfg_attr(test, assert_instr(vpdpbusd))]
303pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
304 unsafe {
305 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
306 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
307 }
308}
309
310/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
311///
312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
313#[inline]
314#[target_feature(enable = "avx512vnni")]
315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
316#[cfg_attr(test, assert_instr(vpdpbusd))]
317pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
318 unsafe {
319 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
320 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
321 }
322}
323
324/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683)
327#[inline]
328#[target_feature(enable = "avxvnni")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpdpbusd))]
331pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
332 unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
333}
334
335/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
336///
337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
338#[inline]
339#[target_feature(enable = "avx512vnni,avx512vl")]
340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
341#[cfg_attr(test, assert_instr(vpdpbusd))]
342pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
343 unsafe { transmute(src:vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
344}
345
346/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
347///
348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
349#[inline]
350#[target_feature(enable = "avx512vnni,avx512vl")]
351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
352#[cfg_attr(test, assert_instr(vpdpbusd))]
353pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
354 unsafe {
355 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
356 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
357 }
358}
359
360/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
361///
362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
363#[inline]
364#[target_feature(enable = "avx512vnni,avx512vl")]
365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
366#[cfg_attr(test, assert_instr(vpdpbusd))]
367pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
368 unsafe {
369 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
370 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
371 }
372}
373
374/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682)
377#[inline]
378#[target_feature(enable = "avxvnni")]
379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
380#[cfg_attr(test, assert_instr(vpdpbusd))]
381pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
382 unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
383}
384
385/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
388#[inline]
389#[target_feature(enable = "avx512vnni,avx512vl")]
390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391#[cfg_attr(test, assert_instr(vpdpbusd))]
392pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
393 unsafe { transmute(src:vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
394}
395
396/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
399#[inline]
400#[target_feature(enable = "avx512vnni,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpdpbusd))]
403pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
404 unsafe {
405 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
406 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
407 }
408}
409
410/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
411///
412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
413#[inline]
414#[target_feature(enable = "avx512vnni,avx512vl")]
415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
416#[cfg_attr(test, assert_instr(vpdpbusd))]
417pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
418 unsafe {
419 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
420 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
421 }
422}
423
424/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
425///
426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
427#[inline]
428#[target_feature(enable = "avx512vnni")]
429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
430#[cfg_attr(test, assert_instr(vpdpbusds))]
431pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
432 unsafe { transmute(src:vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
433}
434
435/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
436///
437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
438#[inline]
439#[target_feature(enable = "avx512vnni")]
440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
441#[cfg_attr(test, assert_instr(vpdpbusds))]
442pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
443 unsafe {
444 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
445 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
446 }
447}
448
449/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
452#[inline]
453#[target_feature(enable = "avx512vnni")]
454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
455#[cfg_attr(test, assert_instr(vpdpbusds))]
456pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
457 unsafe {
458 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
459 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
460 }
461}
462
463/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
464///
465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696)
466#[inline]
467#[target_feature(enable = "avxvnni")]
468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
469#[cfg_attr(test, assert_instr(vpdpbusds))]
470pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
471 unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
472}
473
474/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
477#[inline]
478#[target_feature(enable = "avx512vnni,avx512vl")]
479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480#[cfg_attr(test, assert_instr(vpdpbusds))]
481pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
482 unsafe { transmute(src:vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
483}
484
485/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
488#[inline]
489#[target_feature(enable = "avx512vnni,avx512vl")]
490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
491#[cfg_attr(test, assert_instr(vpdpbusds))]
492pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
493 unsafe {
494 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
495 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
496 }
497}
498
499/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
502#[inline]
503#[target_feature(enable = "avx512vnni,avx512vl")]
504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
505#[cfg_attr(test, assert_instr(vpdpbusds))]
506pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
507 unsafe {
508 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
509 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
510 }
511}
512
513/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695)
516#[inline]
517#[target_feature(enable = "avxvnni")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpdpbusds))]
520pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
521 unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
522}
523
524/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
525///
526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
527#[inline]
528#[target_feature(enable = "avx512vnni,avx512vl")]
529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
530#[cfg_attr(test, assert_instr(vpdpbusds))]
531pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
532 unsafe { transmute(src:vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
533}
534
535/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
536///
537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
538#[inline]
539#[target_feature(enable = "avx512vnni,avx512vl")]
540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
541#[cfg_attr(test, assert_instr(vpdpbusds))]
542pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
543 unsafe {
544 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
545 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
546 }
547}
548
549/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
552#[inline]
553#[target_feature(enable = "avx512vnni,avx512vl")]
554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555#[cfg_attr(test, assert_instr(vpdpbusds))]
556pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
557 unsafe {
558 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
559 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
560 }
561}
562
563/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
564/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
565/// 32-bit integer in src, and store the packed 32-bit results in dst.
566///
567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674)
568#[inline]
569#[target_feature(enable = "avxvnniint8")]
570#[cfg_attr(test, assert_instr(vpdpbssd))]
571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
572pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
573 unsafe { transmute(src:vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
574}
575
576/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
577/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
578/// 32-bit integer in src, and store the packed 32-bit results in dst.
579///
580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675)
581#[inline]
582#[target_feature(enable = "avxvnniint8")]
583#[cfg_attr(test, assert_instr(vpdpbssd))]
584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
586 unsafe { transmute(src:vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
587}
588
589/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
590/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
591/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
592///
593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676)
594#[inline]
595#[target_feature(enable = "avxvnniint8")]
596#[cfg_attr(test, assert_instr(vpdpbssds))]
597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
598pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
599 unsafe { transmute(src:vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
600}
601
602/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
603/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
604/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
605///
606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677)
607#[inline]
608#[target_feature(enable = "avxvnniint8")]
609#[cfg_attr(test, assert_instr(vpdpbssds))]
610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
611pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
612 unsafe { transmute(src:vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
613}
614
615/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
616/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
617/// 32-bit integer in src, and store the packed 32-bit results in dst.
618///
619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678)
620#[inline]
621#[target_feature(enable = "avxvnniint8")]
622#[cfg_attr(test, assert_instr(vpdpbsud))]
623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
624pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
625 unsafe { transmute(src:vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
626}
627
628/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
629/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
630/// 32-bit integer in src, and store the packed 32-bit results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679)
633#[inline]
634#[target_feature(enable = "avxvnniint8")]
635#[cfg_attr(test, assert_instr(vpdpbsud))]
636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
637pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
638 unsafe { transmute(src:vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
639}
640
641/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
642/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
643/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
644///
645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680)
646#[inline]
647#[target_feature(enable = "avxvnniint8")]
648#[cfg_attr(test, assert_instr(vpdpbsuds))]
649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
650pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
651 unsafe { transmute(src:vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
652}
653
654/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
655/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
656/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
657///
658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681)
659#[inline]
660#[target_feature(enable = "avxvnniint8")]
661#[cfg_attr(test, assert_instr(vpdpbsuds))]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
664 unsafe { transmute(src:vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
665}
666
667/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
668/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
669/// 32-bit integer in src, and store the packed 32-bit results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708)
672#[inline]
673#[target_feature(enable = "avxvnniint8")]
674#[cfg_attr(test, assert_instr(vpdpbuud))]
675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
676pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
677 unsafe { transmute(src:vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
678}
679
680/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
681/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
682/// 32-bit integer in src, and store the packed 32-bit results in dst.
683///
684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709)
685#[inline]
686#[target_feature(enable = "avxvnniint8")]
687#[cfg_attr(test, assert_instr(vpdpbuud))]
688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
689pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
690 unsafe { transmute(src:vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
691}
692
693/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
694/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
695/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
696///
697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710)
698#[inline]
699#[target_feature(enable = "avxvnniint8")]
700#[cfg_attr(test, assert_instr(vpdpbuuds))]
701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
703 unsafe { transmute(src:vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
704}
705
706/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
707/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
708/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
709///
710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711)
711#[inline]
712#[target_feature(enable = "avxvnniint8")]
713#[cfg_attr(test, assert_instr(vpdpbuuds))]
714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
715pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
716 unsafe { transmute(src:vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
717}
718
719/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
720/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
721/// 32-bit integer in src, and store the packed 32-bit results in dst.
722///
723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738)
724#[inline]
725#[target_feature(enable = "avxvnniint16")]
726#[cfg_attr(test, assert_instr(vpdpwsud))]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
729 unsafe { transmute(src:vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
730}
731
732/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
733/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
734/// 32-bit integer in src, and store the packed 32-bit results in dst.
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739)
737#[inline]
738#[target_feature(enable = "avxvnniint16")]
739#[cfg_attr(test, assert_instr(vpdpwsud))]
740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
741pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
742 unsafe { transmute(src:vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
743}
744
745/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
746/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
747/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
748///
749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740)
750#[inline]
751#[target_feature(enable = "avxvnniint16")]
752#[cfg_attr(test, assert_instr(vpdpwsuds))]
753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
754pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
755 unsafe { transmute(src:vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
756}
757
758/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
759/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
760/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
761///
762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741)
763#[inline]
764#[target_feature(enable = "avxvnniint16")]
765#[cfg_attr(test, assert_instr(vpdpwsuds))]
766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
767pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
768 unsafe { transmute(src:vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
769}
770
771/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
772/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
773/// 32-bit integer in src, and store the packed 32-bit results in dst.
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742)
776#[inline]
777#[target_feature(enable = "avxvnniint16")]
778#[cfg_attr(test, assert_instr(vpdpwusd))]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
781 unsafe { transmute(src:vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
782}
783
784/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
785/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
786/// 32-bit integer in src, and store the packed 32-bit results in dst.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743)
789#[inline]
790#[target_feature(enable = "avxvnniint16")]
791#[cfg_attr(test, assert_instr(vpdpwusd))]
792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
793pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
794 unsafe { transmute(src:vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
795}
796
797/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
798/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
799/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
800///
801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744)
802#[inline]
803#[target_feature(enable = "avxvnniint16")]
804#[cfg_attr(test, assert_instr(vpdpwusds))]
805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
806pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
807 unsafe { transmute(src:vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
808}
809
810/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
811/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
812/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
813///
814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745)
815#[inline]
816#[target_feature(enable = "avxvnniint16")]
817#[cfg_attr(test, assert_instr(vpdpwusds))]
818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
820 unsafe { transmute(src:vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
821}
822
823/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
824/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
825/// 32-bit integer in src, and store the packed 32-bit results in dst.
826///
827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746)
828#[inline]
829#[target_feature(enable = "avxvnniint16")]
830#[cfg_attr(test, assert_instr(vpdpwuud))]
831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
832pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
833 unsafe { transmute(src:vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
834}
835
836/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
837/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
838/// 32-bit integer in src, and store the packed 32-bit results in dst.
839///
840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747)
841#[inline]
842#[target_feature(enable = "avxvnniint16")]
843#[cfg_attr(test, assert_instr(vpdpwuud))]
844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
845pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
846 unsafe { transmute(src:vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
847}
848
849/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
850/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
851/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
852///
853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748)
854#[inline]
855#[target_feature(enable = "avxvnniint16")]
856#[cfg_attr(test, assert_instr(vpdpwuuds))]
857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
858pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
859 unsafe { transmute(src:vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
860}
861
862/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
863/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
864/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
865///
866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749)
867#[inline]
868#[target_feature(enable = "avxvnniint16")]
869#[cfg_attr(test, assert_instr(vpdpwuuds))]
870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
871pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
872 unsafe { transmute(src:vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
873}
874
875#[allow(improper_ctypes)]
876unsafe extern "C" {
877 #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
878 unsafefn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
879 #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
880 unsafefn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
881 #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
882 unsafefn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
883
884 #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
885 unsafefn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
886 #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
887 unsafefn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
888 #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
889 unsafefn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
890
891 #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
892 unsafefn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
893 #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
894 unsafefn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
895 #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
896 unsafefn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
897
898 #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
899 unsafefn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
900 #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
901 unsafefn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
902 #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
903 unsafefn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
904
905 #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
906 unsafefn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
907 #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
908 unsafefn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
909
910 #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
911 unsafefn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
912 #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
913 unsafefn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
914
915 #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
916 unsafefn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
917 #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
918 unsafefn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
919
920 #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
921 unsafefn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
922 #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
923 unsafefn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
924
925 #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
926 unsafefn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
927 #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
928 unsafefn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
929
930 #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
931 unsafefn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
932 #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
933 unsafefn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
934
935 #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
936 unsafefn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
937 #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
938 unsafefn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
939
940 #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
941 unsafefn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
942 #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
943 unsafefn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
944
945 #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
946 unsafefn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
947 #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
948 unsafefn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
949
950 #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
951 unsafefn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
952 #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
953 unsafefn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
954
955 #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
956 unsafefn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
957 #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
958 unsafefn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
959
960 #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
961 unsafefn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
962 #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
963 unsafefn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
964}
965
966#[cfg(test)]
967mod tests {
968
969 use crate::core_arch::x86::*;
970 use stdarch_test::simd_test;
971
972 #[simd_test(enable = "avx512vnni")]
973 unsafe fn test_mm512_dpwssd_epi32() {
974 let src = _mm512_set1_epi32(1);
975 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
976 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
977 let r = _mm512_dpwssd_epi32(src, a, b);
978 let e = _mm512_set1_epi32(3);
979 assert_eq_m512i(r, e);
980 }
981
982 #[simd_test(enable = "avx512vnni")]
983 unsafe fn test_mm512_mask_dpwssd_epi32() {
984 let src = _mm512_set1_epi32(1);
985 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
986 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
987 let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
988 assert_eq_m512i(r, src);
989 let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
990 let e = _mm512_set1_epi32(3);
991 assert_eq_m512i(r, e);
992 }
993
994 #[simd_test(enable = "avx512vnni")]
995 unsafe fn test_mm512_maskz_dpwssd_epi32() {
996 let src = _mm512_set1_epi32(1);
997 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
998 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
999 let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
1000 assert_eq_m512i(r, _mm512_setzero_si512());
1001 let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
1002 let e = _mm512_set1_epi32(3);
1003 assert_eq_m512i(r, e);
1004 }
1005
1006 #[simd_test(enable = "avxvnni")]
1007 unsafe fn test_mm256_dpwssd_avx_epi32() {
1008 let src = _mm256_set1_epi32(1);
1009 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1010 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1011 let r = _mm256_dpwssd_avx_epi32(src, a, b);
1012 let e = _mm256_set1_epi32(3);
1013 assert_eq_m256i(r, e);
1014 }
1015
1016 #[simd_test(enable = "avx512vnni,avx512vl")]
1017 unsafe fn test_mm256_dpwssd_epi32() {
1018 let src = _mm256_set1_epi32(1);
1019 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1020 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1021 let r = _mm256_dpwssd_epi32(src, a, b);
1022 let e = _mm256_set1_epi32(3);
1023 assert_eq_m256i(r, e);
1024 }
1025
1026 #[simd_test(enable = "avx512vnni,avx512vl")]
1027 unsafe fn test_mm256_mask_dpwssd_epi32() {
1028 let src = _mm256_set1_epi32(1);
1029 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1030 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1031 let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
1032 assert_eq_m256i(r, src);
1033 let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
1034 let e = _mm256_set1_epi32(3);
1035 assert_eq_m256i(r, e);
1036 }
1037
1038 #[simd_test(enable = "avx512vnni,avx512vl")]
1039 unsafe fn test_mm256_maskz_dpwssd_epi32() {
1040 let src = _mm256_set1_epi32(1);
1041 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1042 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1043 let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
1044 assert_eq_m256i(r, _mm256_setzero_si256());
1045 let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
1046 let e = _mm256_set1_epi32(3);
1047 assert_eq_m256i(r, e);
1048 }
1049
1050 #[simd_test(enable = "avxvnni")]
1051 unsafe fn test_mm_dpwssd_avx_epi32() {
1052 let src = _mm_set1_epi32(1);
1053 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1054 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1055 let r = _mm_dpwssd_avx_epi32(src, a, b);
1056 let e = _mm_set1_epi32(3);
1057 assert_eq_m128i(r, e);
1058 }
1059
1060 #[simd_test(enable = "avx512vnni,avx512vl")]
1061 unsafe fn test_mm_dpwssd_epi32() {
1062 let src = _mm_set1_epi32(1);
1063 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1064 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1065 let r = _mm_dpwssd_epi32(src, a, b);
1066 let e = _mm_set1_epi32(3);
1067 assert_eq_m128i(r, e);
1068 }
1069
1070 #[simd_test(enable = "avx512vnni,avx512vl")]
1071 unsafe fn test_mm_mask_dpwssd_epi32() {
1072 let src = _mm_set1_epi32(1);
1073 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1074 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1075 let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
1076 assert_eq_m128i(r, src);
1077 let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
1078 let e = _mm_set1_epi32(3);
1079 assert_eq_m128i(r, e);
1080 }
1081
1082 #[simd_test(enable = "avx512vnni,avx512vl")]
1083 unsafe fn test_mm_maskz_dpwssd_epi32() {
1084 let src = _mm_set1_epi32(1);
1085 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1086 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1087 let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
1088 assert_eq_m128i(r, _mm_setzero_si128());
1089 let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
1090 let e = _mm_set1_epi32(3);
1091 assert_eq_m128i(r, e);
1092 }
1093
1094 #[simd_test(enable = "avx512vnni")]
1095 unsafe fn test_mm512_dpwssds_epi32() {
1096 let src = _mm512_set1_epi32(1);
1097 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1098 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1099 let r = _mm512_dpwssds_epi32(src, a, b);
1100 let e = _mm512_set1_epi32(3);
1101 assert_eq_m512i(r, e);
1102 }
1103
1104 #[simd_test(enable = "avx512vnni")]
1105 unsafe fn test_mm512_mask_dpwssds_epi32() {
1106 let src = _mm512_set1_epi32(1);
1107 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1108 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1109 let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
1110 assert_eq_m512i(r, src);
1111 let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
1112 let e = _mm512_set1_epi32(3);
1113 assert_eq_m512i(r, e);
1114 }
1115
1116 #[simd_test(enable = "avx512vnni")]
1117 unsafe fn test_mm512_maskz_dpwssds_epi32() {
1118 let src = _mm512_set1_epi32(1);
1119 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1120 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1121 let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
1122 assert_eq_m512i(r, _mm512_setzero_si512());
1123 let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
1124 let e = _mm512_set1_epi32(3);
1125 assert_eq_m512i(r, e);
1126 }
1127
1128 #[simd_test(enable = "avxvnni")]
1129 unsafe fn test_mm256_dpwssds_avx_epi32() {
1130 let src = _mm256_set1_epi32(1);
1131 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1132 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1133 let r = _mm256_dpwssds_avx_epi32(src, a, b);
1134 let e = _mm256_set1_epi32(3);
1135 assert_eq_m256i(r, e);
1136 }
1137
1138 #[simd_test(enable = "avx512vnni,avx512vl")]
1139 unsafe fn test_mm256_dpwssds_epi32() {
1140 let src = _mm256_set1_epi32(1);
1141 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1142 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1143 let r = _mm256_dpwssds_epi32(src, a, b);
1144 let e = _mm256_set1_epi32(3);
1145 assert_eq_m256i(r, e);
1146 }
1147
1148 #[simd_test(enable = "avx512vnni,avx512vl")]
1149 unsafe fn test_mm256_mask_dpwssds_epi32() {
1150 let src = _mm256_set1_epi32(1);
1151 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1152 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1153 let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
1154 assert_eq_m256i(r, src);
1155 let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
1156 let e = _mm256_set1_epi32(3);
1157 assert_eq_m256i(r, e);
1158 }
1159
1160 #[simd_test(enable = "avx512vnni,avx512vl")]
1161 unsafe fn test_mm256_maskz_dpwssds_epi32() {
1162 let src = _mm256_set1_epi32(1);
1163 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1164 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1165 let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
1166 assert_eq_m256i(r, _mm256_setzero_si256());
1167 let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
1168 let e = _mm256_set1_epi32(3);
1169 assert_eq_m256i(r, e);
1170 }
1171
1172 #[simd_test(enable = "avxvnni")]
1173 unsafe fn test_mm_dpwssds_avx_epi32() {
1174 let src = _mm_set1_epi32(1);
1175 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1176 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1177 let r = _mm_dpwssds_avx_epi32(src, a, b);
1178 let e = _mm_set1_epi32(3);
1179 assert_eq_m128i(r, e);
1180 }
1181
1182 #[simd_test(enable = "avx512vnni,avx512vl")]
1183 unsafe fn test_mm_dpwssds_epi32() {
1184 let src = _mm_set1_epi32(1);
1185 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1186 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1187 let r = _mm_dpwssds_epi32(src, a, b);
1188 let e = _mm_set1_epi32(3);
1189 assert_eq_m128i(r, e);
1190 }
1191
1192 #[simd_test(enable = "avx512vnni,avx512vl")]
1193 unsafe fn test_mm_mask_dpwssds_epi32() {
1194 let src = _mm_set1_epi32(1);
1195 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1196 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1197 let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
1198 assert_eq_m128i(r, src);
1199 let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
1200 let e = _mm_set1_epi32(3);
1201 assert_eq_m128i(r, e);
1202 }
1203
1204 #[simd_test(enable = "avx512vnni,avx512vl")]
1205 unsafe fn test_mm_maskz_dpwssds_epi32() {
1206 let src = _mm_set1_epi32(1);
1207 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1208 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1209 let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
1210 assert_eq_m128i(r, _mm_setzero_si128());
1211 let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
1212 let e = _mm_set1_epi32(3);
1213 assert_eq_m128i(r, e);
1214 }
1215
1216 #[simd_test(enable = "avx512vnni")]
1217 unsafe fn test_mm512_dpbusd_epi32() {
1218 let src = _mm512_set1_epi32(1);
1219 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1220 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1221 let r = _mm512_dpbusd_epi32(src, a, b);
1222 let e = _mm512_set1_epi32(5);
1223 assert_eq_m512i(r, e);
1224 }
1225
1226 #[simd_test(enable = "avx512vnni")]
1227 unsafe fn test_mm512_mask_dpbusd_epi32() {
1228 let src = _mm512_set1_epi32(1);
1229 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1230 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1231 let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
1232 assert_eq_m512i(r, src);
1233 let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
1234 let e = _mm512_set1_epi32(5);
1235 assert_eq_m512i(r, e);
1236 }
1237
1238 #[simd_test(enable = "avx512vnni")]
1239 unsafe fn test_mm512_maskz_dpbusd_epi32() {
1240 let src = _mm512_set1_epi32(1);
1241 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1242 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1243 let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
1244 assert_eq_m512i(r, _mm512_setzero_si512());
1245 let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
1246 let e = _mm512_set1_epi32(5);
1247 assert_eq_m512i(r, e);
1248 }
1249
1250 #[simd_test(enable = "avxvnni")]
1251 unsafe fn test_mm256_dpbusd_avx_epi32() {
1252 let src = _mm256_set1_epi32(1);
1253 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1254 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1255 let r = _mm256_dpbusd_avx_epi32(src, a, b);
1256 let e = _mm256_set1_epi32(5);
1257 assert_eq_m256i(r, e);
1258 }
1259
1260 #[simd_test(enable = "avx512vnni,avx512vl")]
1261 unsafe fn test_mm256_dpbusd_epi32() {
1262 let src = _mm256_set1_epi32(1);
1263 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1264 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1265 let r = _mm256_dpbusd_epi32(src, a, b);
1266 let e = _mm256_set1_epi32(5);
1267 assert_eq_m256i(r, e);
1268 }
1269
1270 #[simd_test(enable = "avx512vnni,avx512vl")]
1271 unsafe fn test_mm256_mask_dpbusd_epi32() {
1272 let src = _mm256_set1_epi32(1);
1273 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1274 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1275 let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
1276 assert_eq_m256i(r, src);
1277 let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
1278 let e = _mm256_set1_epi32(5);
1279 assert_eq_m256i(r, e);
1280 }
1281
1282 #[simd_test(enable = "avx512vnni,avx512vl")]
1283 unsafe fn test_mm256_maskz_dpbusd_epi32() {
1284 let src = _mm256_set1_epi32(1);
1285 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1286 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1287 let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
1288 assert_eq_m256i(r, _mm256_setzero_si256());
1289 let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
1290 let e = _mm256_set1_epi32(5);
1291 assert_eq_m256i(r, e);
1292 }
1293
1294 #[simd_test(enable = "avxvnni")]
1295 unsafe fn test_mm_dpbusd_avx_epi32() {
1296 let src = _mm_set1_epi32(1);
1297 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1298 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1299 let r = _mm_dpbusd_avx_epi32(src, a, b);
1300 let e = _mm_set1_epi32(5);
1301 assert_eq_m128i(r, e);
1302 }
1303
1304 #[simd_test(enable = "avx512vnni,avx512vl")]
1305 unsafe fn test_mm_dpbusd_epi32() {
1306 let src = _mm_set1_epi32(1);
1307 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1308 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1309 let r = _mm_dpbusd_epi32(src, a, b);
1310 let e = _mm_set1_epi32(5);
1311 assert_eq_m128i(r, e);
1312 }
1313
1314 #[simd_test(enable = "avx512vnni,avx512vl")]
1315 unsafe fn test_mm_mask_dpbusd_epi32() {
1316 let src = _mm_set1_epi32(1);
1317 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1318 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1319 let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
1320 assert_eq_m128i(r, src);
1321 let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
1322 let e = _mm_set1_epi32(5);
1323 assert_eq_m128i(r, e);
1324 }
1325
1326 #[simd_test(enable = "avx512vnni,avx512vl")]
1327 unsafe fn test_mm_maskz_dpbusd_epi32() {
1328 let src = _mm_set1_epi32(1);
1329 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1330 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1331 let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
1332 assert_eq_m128i(r, _mm_setzero_si128());
1333 let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
1334 let e = _mm_set1_epi32(5);
1335 assert_eq_m128i(r, e);
1336 }
1337
1338 #[simd_test(enable = "avx512vnni")]
1339 unsafe fn test_mm512_dpbusds_epi32() {
1340 let src = _mm512_set1_epi32(1);
1341 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1342 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1343 let r = _mm512_dpbusds_epi32(src, a, b);
1344 let e = _mm512_set1_epi32(5);
1345 assert_eq_m512i(r, e);
1346 }
1347
1348 #[simd_test(enable = "avx512vnni")]
1349 unsafe fn test_mm512_mask_dpbusds_epi32() {
1350 let src = _mm512_set1_epi32(1);
1351 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1352 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1353 let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
1354 assert_eq_m512i(r, src);
1355 let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
1356 let e = _mm512_set1_epi32(5);
1357 assert_eq_m512i(r, e);
1358 }
1359
1360 #[simd_test(enable = "avx512vnni")]
1361 unsafe fn test_mm512_maskz_dpbusds_epi32() {
1362 let src = _mm512_set1_epi32(1);
1363 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1364 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1365 let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
1366 assert_eq_m512i(r, _mm512_setzero_si512());
1367 let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
1368 let e = _mm512_set1_epi32(5);
1369 assert_eq_m512i(r, e);
1370 }
1371
1372 #[simd_test(enable = "avxvnni")]
1373 unsafe fn test_mm256_dpbusds_avx_epi32() {
1374 let src = _mm256_set1_epi32(1);
1375 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1376 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1377 let r = _mm256_dpbusds_avx_epi32(src, a, b);
1378 let e = _mm256_set1_epi32(5);
1379 assert_eq_m256i(r, e);
1380 }
1381
1382 #[simd_test(enable = "avx512vnni,avx512vl")]
1383 unsafe fn test_mm256_dpbusds_epi32() {
1384 let src = _mm256_set1_epi32(1);
1385 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1386 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1387 let r = _mm256_dpbusds_epi32(src, a, b);
1388 let e = _mm256_set1_epi32(5);
1389 assert_eq_m256i(r, e);
1390 }
1391
1392 #[simd_test(enable = "avx512vnni,avx512vl")]
1393 unsafe fn test_mm256_mask_dpbusds_epi32() {
1394 let src = _mm256_set1_epi32(1);
1395 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1396 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1397 let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
1398 assert_eq_m256i(r, src);
1399 let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
1400 let e = _mm256_set1_epi32(5);
1401 assert_eq_m256i(r, e);
1402 }
1403
1404 #[simd_test(enable = "avx512vnni,avx512vl")]
1405 unsafe fn test_mm256_maskz_dpbusds_epi32() {
1406 let src = _mm256_set1_epi32(1);
1407 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1408 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1409 let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
1410 assert_eq_m256i(r, _mm256_setzero_si256());
1411 let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
1412 let e = _mm256_set1_epi32(5);
1413 assert_eq_m256i(r, e);
1414 }
1415
1416 #[simd_test(enable = "avxvnni")]
1417 unsafe fn test_mm_dpbusds_avx_epi32() {
1418 let src = _mm_set1_epi32(1);
1419 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1420 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1421 let r = _mm_dpbusds_avx_epi32(src, a, b);
1422 let e = _mm_set1_epi32(5);
1423 assert_eq_m128i(r, e);
1424 }
1425
1426 #[simd_test(enable = "avx512vnni,avx512vl")]
1427 unsafe fn test_mm_dpbusds_epi32() {
1428 let src = _mm_set1_epi32(1);
1429 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1430 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1431 let r = _mm_dpbusds_epi32(src, a, b);
1432 let e = _mm_set1_epi32(5);
1433 assert_eq_m128i(r, e);
1434 }
1435
1436 #[simd_test(enable = "avx512vnni,avx512vl")]
1437 unsafe fn test_mm_mask_dpbusds_epi32() {
1438 let src = _mm_set1_epi32(1);
1439 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1440 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1441 let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
1442 assert_eq_m128i(r, src);
1443 let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
1444 let e = _mm_set1_epi32(5);
1445 assert_eq_m128i(r, e);
1446 }
1447
1448 #[simd_test(enable = "avx512vnni,avx512vl")]
1449 unsafe fn test_mm_maskz_dpbusds_epi32() {
1450 let src = _mm_set1_epi32(1);
1451 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1452 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1453 let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
1454 assert_eq_m128i(r, _mm_setzero_si128());
1455 let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
1456 let e = _mm_set1_epi32(5);
1457 assert_eq_m128i(r, e);
1458 }
1459
1460 #[simd_test(enable = "avxvnniint8")]
1461 unsafe fn test_mm_dpbssd_epi32() {
1462 let src = _mm_set1_epi32(1);
1463 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1464 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1465 let r = _mm_dpbssd_epi32(src, a, b);
1466 let e = _mm_set1_epi32(5);
1467 assert_eq_m128i(r, e);
1468 }
1469
1470 #[simd_test(enable = "avxvnniint8")]
1471 unsafe fn test_mm256_dpbssd_epi32() {
1472 let src = _mm256_set1_epi32(1);
1473 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1474 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1475 let r = _mm256_dpbssd_epi32(src, a, b);
1476 let e = _mm256_set1_epi32(5);
1477 assert_eq_m256i(r, e);
1478 }
1479
1480 #[simd_test(enable = "avxvnniint8")]
1481 unsafe fn test_mm_dpbssds_epi32() {
1482 let src = _mm_set1_epi32(1);
1483 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1484 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1485 let r = _mm_dpbssds_epi32(src, a, b);
1486 let e = _mm_set1_epi32(5);
1487 assert_eq_m128i(r, e);
1488 }
1489
1490 #[simd_test(enable = "avxvnniint8")]
1491 unsafe fn test_mm256_dpbssds_epi32() {
1492 let src = _mm256_set1_epi32(1);
1493 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1494 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1495 let r = _mm256_dpbssds_epi32(src, a, b);
1496 let e = _mm256_set1_epi32(5);
1497 assert_eq_m256i(r, e);
1498 }
1499
1500 #[simd_test(enable = "avxvnniint8")]
1501 unsafe fn test_mm_dpbsud_epi32() {
1502 let src = _mm_set1_epi32(1);
1503 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1504 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1505 let r = _mm_dpbsud_epi32(src, a, b);
1506 let e = _mm_set1_epi32(5);
1507 assert_eq_m128i(r, e);
1508 }
1509
1510 #[simd_test(enable = "avxvnniint8")]
1511 unsafe fn test_mm256_dpbsud_epi32() {
1512 let src = _mm256_set1_epi32(1);
1513 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1514 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1515 let r = _mm256_dpbsud_epi32(src, a, b);
1516 let e = _mm256_set1_epi32(5);
1517 assert_eq_m256i(r, e);
1518 }
1519
1520 #[simd_test(enable = "avxvnniint8")]
1521 unsafe fn test_mm_dpbsuds_epi32() {
1522 let src = _mm_set1_epi32(1);
1523 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1524 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1525 let r = _mm_dpbsuds_epi32(src, a, b);
1526 let e = _mm_set1_epi32(5);
1527 assert_eq_m128i(r, e);
1528 }
1529
1530 #[simd_test(enable = "avxvnniint8")]
1531 unsafe fn test_mm256_dpbsuds_epi32() {
1532 let src = _mm256_set1_epi32(1);
1533 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1534 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1535 let r = _mm256_dpbsuds_epi32(src, a, b);
1536 let e = _mm256_set1_epi32(5);
1537 assert_eq_m256i(r, e);
1538 }
1539
1540 #[simd_test(enable = "avxvnniint8")]
1541 unsafe fn test_mm_dpbuud_epi32() {
1542 let src = _mm_set1_epi32(1);
1543 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1544 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1545 let r = _mm_dpbuud_epi32(src, a, b);
1546 let e = _mm_set1_epi32(5);
1547 assert_eq_m128i(r, e);
1548 }
1549
1550 #[simd_test(enable = "avxvnniint8")]
1551 unsafe fn test_mm256_dpbuud_epi32() {
1552 let src = _mm256_set1_epi32(1);
1553 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1554 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1555 let r = _mm256_dpbuud_epi32(src, a, b);
1556 let e = _mm256_set1_epi32(5);
1557 assert_eq_m256i(r, e);
1558 }
1559
1560 #[simd_test(enable = "avxvnniint8")]
1561 unsafe fn test_mm_dpbuuds_epi32() {
1562 let src = _mm_set1_epi32(1);
1563 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1564 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1565 let r = _mm_dpbuuds_epi32(src, a, b);
1566 let e = _mm_set1_epi32(5);
1567 assert_eq_m128i(r, e);
1568 }
1569
1570 #[simd_test(enable = "avxvnniint8")]
1571 unsafe fn test_mm256_dpbuuds_epi32() {
1572 let src = _mm256_set1_epi32(1);
1573 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1574 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1575 let r = _mm256_dpbuuds_epi32(src, a, b);
1576 let e = _mm256_set1_epi32(5);
1577 assert_eq_m256i(r, e);
1578 }
1579
1580 #[simd_test(enable = "avxvnniint16")]
1581 unsafe fn test_mm_dpwsud_epi32() {
1582 let src = _mm_set1_epi32(1);
1583 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1584 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1585 let r = _mm_dpwsud_epi32(src, a, b);
1586 let e = _mm_set1_epi32(3);
1587 assert_eq_m128i(r, e);
1588 }
1589
1590 #[simd_test(enable = "avxvnniint16")]
1591 unsafe fn test_mm256_dpwsud_epi32() {
1592 let src = _mm256_set1_epi32(1);
1593 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1594 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1595 let r = _mm256_dpwsud_epi32(src, a, b);
1596 let e = _mm256_set1_epi32(3);
1597 assert_eq_m256i(r, e);
1598 }
1599
1600 #[simd_test(enable = "avxvnniint16")]
1601 unsafe fn test_mm_dpwsuds_epi32() {
1602 let src = _mm_set1_epi32(1);
1603 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1604 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1605 let r = _mm_dpwsuds_epi32(src, a, b);
1606 let e = _mm_set1_epi32(3);
1607 assert_eq_m128i(r, e);
1608 }
1609
1610 #[simd_test(enable = "avxvnniint16")]
1611 unsafe fn test_mm256_dpwsuds_epi32() {
1612 let src = _mm256_set1_epi32(1);
1613 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1614 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1615 let r = _mm256_dpwsuds_epi32(src, a, b);
1616 let e = _mm256_set1_epi32(3);
1617 assert_eq_m256i(r, e);
1618 }
1619
1620 #[simd_test(enable = "avxvnniint16")]
1621 unsafe fn test_mm_dpwusd_epi32() {
1622 let src = _mm_set1_epi32(1);
1623 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1624 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1625 let r = _mm_dpwusd_epi32(src, a, b);
1626 let e = _mm_set1_epi32(3);
1627 assert_eq_m128i(r, e);
1628 }
1629
1630 #[simd_test(enable = "avxvnniint16")]
1631 unsafe fn test_mm256_dpwusd_epi32() {
1632 let src = _mm256_set1_epi32(1);
1633 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1634 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1635 let r = _mm256_dpwusd_epi32(src, a, b);
1636 let e = _mm256_set1_epi32(3);
1637 assert_eq_m256i(r, e);
1638 }
1639
1640 #[simd_test(enable = "avxvnniint16")]
1641 unsafe fn test_mm_dpwusds_epi32() {
1642 let src = _mm_set1_epi32(1);
1643 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1644 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1645 let r = _mm_dpwusds_epi32(src, a, b);
1646 let e = _mm_set1_epi32(3);
1647 assert_eq_m128i(r, e);
1648 }
1649
1650 #[simd_test(enable = "avxvnniint16")]
1651 unsafe fn test_mm256_dpwusds_epi32() {
1652 let src = _mm256_set1_epi32(1);
1653 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1654 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1655 let r = _mm256_dpwusds_epi32(src, a, b);
1656 let e = _mm256_set1_epi32(3);
1657 assert_eq_m256i(r, e);
1658 }
1659
1660 #[simd_test(enable = "avxvnniint16")]
1661 unsafe fn test_mm_dpwuud_epi32() {
1662 let src = _mm_set1_epi32(1);
1663 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1664 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1665 let r = _mm_dpwuud_epi32(src, a, b);
1666 let e = _mm_set1_epi32(3);
1667 assert_eq_m128i(r, e);
1668 }
1669
1670 #[simd_test(enable = "avxvnniint16")]
1671 unsafe fn test_mm256_dpwuud_epi32() {
1672 let src = _mm256_set1_epi32(1);
1673 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1674 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1675 let r = _mm256_dpwuud_epi32(src, a, b);
1676 let e = _mm256_set1_epi32(3);
1677 assert_eq_m256i(r, e);
1678 }
1679
1680 #[simd_test(enable = "avxvnniint16")]
1681 unsafe fn test_mm_dpwuuds_epi32() {
1682 let src = _mm_set1_epi32(1);
1683 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1684 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1685 let r = _mm_dpwuuds_epi32(src, a, b);
1686 let e = _mm_set1_epi32(3);
1687 assert_eq_m128i(r, e);
1688 }
1689
1690 #[simd_test(enable = "avxvnniint16")]
1691 unsafe fn test_mm256_dpwuuds_epi32() {
1692 let src = _mm256_set1_epi32(1);
1693 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1694 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1695 let r = _mm256_dpwuuds_epi32(src, a, b);
1696 let e = _mm256_set1_epi32(3);
1697 assert_eq_m256i(r, e);
1698 }
1699}
1700