1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15 transmute(src:vpdpwssd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub unsafe fn _mm512_mask_dpwssd_epi32(
26 src: __m512i,
27 k: __mmask16,
28 a: __m512i,
29 b: __m512i,
30) -> __m512i {
31 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
32 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
33}
34
35/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
38#[inline]
39#[target_feature(enable = "avx512vnni")]
40#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41#[cfg_attr(test, assert_instr(vpdpwssd))]
42pub unsafe fn _mm512_maskz_dpwssd_epi32(
43 k: __mmask16,
44 src: __m512i,
45 a: __m512i,
46 b: __m512i,
47) -> __m512i {
48 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
49 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
50 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
51}
52
53/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
56#[inline]
57#[target_feature(enable = "avx512vnni,avx512vl")]
58#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59#[cfg_attr(test, assert_instr(vpdpwssd))]
60pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
61 transmute(src:vpdpwssd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
62}
63
64/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
65///
66/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
67#[inline]
68#[target_feature(enable = "avx512vnni,avx512vl")]
69#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
70#[cfg_attr(test, assert_instr(vpdpwssd))]
71pub unsafe fn _mm256_mask_dpwssd_epi32(
72 src: __m256i,
73 k: __mmask8,
74 a: __m256i,
75 b: __m256i,
76) -> __m256i {
77 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
78 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
79}
80
81/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
84#[inline]
85#[target_feature(enable = "avx512vnni,avx512vl")]
86#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87#[cfg_attr(test, assert_instr(vpdpwssd))]
88pub unsafe fn _mm256_maskz_dpwssd_epi32(
89 k: __mmask8,
90 src: __m256i,
91 a: __m256i,
92 b: __m256i,
93) -> __m256i {
94 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
95 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
96 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
97}
98
99/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
102#[inline]
103#[target_feature(enable = "avx512vnni,avx512vl")]
104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105#[cfg_attr(test, assert_instr(vpdpwssd))]
106pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
107 transmute(src:vpdpwssd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
108}
109
110/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
111///
112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
113#[inline]
114#[target_feature(enable = "avx512vnni,avx512vl")]
115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
116#[cfg_attr(test, assert_instr(vpdpwssd))]
117pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
118 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
119 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
120}
121
122/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
123///
124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
125#[inline]
126#[target_feature(enable = "avx512vnni,avx512vl")]
127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
128#[cfg_attr(test, assert_instr(vpdpwssd))]
129pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
130 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
131 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
132 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
133}
134
135/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
136///
137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
138#[inline]
139#[target_feature(enable = "avx512vnni")]
140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
141#[cfg_attr(test, assert_instr(vpdpwssds))]
142pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
143 transmute(src:vpdpwssds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
144}
145
146/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
147///
148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
149#[inline]
150#[target_feature(enable = "avx512vnni")]
151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
152#[cfg_attr(test, assert_instr(vpdpwssds))]
153pub unsafe fn _mm512_mask_dpwssds_epi32(
154 src: __m512i,
155 k: __mmask16,
156 a: __m512i,
157 b: __m512i,
158) -> __m512i {
159 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
160 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
161}
162
163/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
164///
165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
166#[inline]
167#[target_feature(enable = "avx512vnni")]
168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
169#[cfg_attr(test, assert_instr(vpdpwssds))]
170pub unsafe fn _mm512_maskz_dpwssds_epi32(
171 k: __mmask16,
172 src: __m512i,
173 a: __m512i,
174 b: __m512i,
175) -> __m512i {
176 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
177 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
178 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
179}
180
181/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
182///
183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
184#[inline]
185#[target_feature(enable = "avx512vnni,avx512vl")]
186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
187#[cfg_attr(test, assert_instr(vpdpwssds))]
188pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
189 transmute(src:vpdpwssds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
190}
191
192/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
195#[inline]
196#[target_feature(enable = "avx512vnni,avx512vl")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198#[cfg_attr(test, assert_instr(vpdpwssds))]
199pub unsafe fn _mm256_mask_dpwssds_epi32(
200 src: __m256i,
201 k: __mmask8,
202 a: __m256i,
203 b: __m256i,
204) -> __m256i {
205 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
206 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
207}
208
209/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
212#[inline]
213#[target_feature(enable = "avx512vnni,avx512vl")]
214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215#[cfg_attr(test, assert_instr(vpdpwssds))]
216pub unsafe fn _mm256_maskz_dpwssds_epi32(
217 k: __mmask8,
218 src: __m256i,
219 a: __m256i,
220 b: __m256i,
221) -> __m256i {
222 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
223 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
224 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
225}
226
227/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
228///
229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
230#[inline]
231#[target_feature(enable = "avx512vnni,avx512vl")]
232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
233#[cfg_attr(test, assert_instr(vpdpwssds))]
234pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
235 transmute(src:vpdpwssds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
236}
237
238/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
239///
240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
241#[inline]
242#[target_feature(enable = "avx512vnni,avx512vl")]
243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
244#[cfg_attr(test, assert_instr(vpdpwssds))]
245pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
246 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
247 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
248}
249
250/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
253#[inline]
254#[target_feature(enable = "avx512vnni,avx512vl")]
255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
256#[cfg_attr(test, assert_instr(vpdpwssds))]
257pub unsafe fn _mm_maskz_dpwssds_epi32(
258 k: __mmask8,
259 src: __m128i,
260 a: __m128i,
261 b: __m128i,
262) -> __m128i {
263 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
264 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
265 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
266}
267
268/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
271#[inline]
272#[target_feature(enable = "avx512vnni")]
273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
274#[cfg_attr(test, assert_instr(vpdpbusd))]
275pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
276 transmute(src:vpdpbusd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
277}
278
279/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
282#[inline]
283#[target_feature(enable = "avx512vnni")]
284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
285#[cfg_attr(test, assert_instr(vpdpbusd))]
286pub unsafe fn _mm512_mask_dpbusd_epi32(
287 src: __m512i,
288 k: __mmask16,
289 a: __m512i,
290 b: __m512i,
291) -> __m512i {
292 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
293 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
294}
295
296/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
299#[inline]
300#[target_feature(enable = "avx512vnni")]
301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
302#[cfg_attr(test, assert_instr(vpdpbusd))]
303pub unsafe fn _mm512_maskz_dpbusd_epi32(
304 k: __mmask16,
305 src: __m512i,
306 a: __m512i,
307 b: __m512i,
308) -> __m512i {
309 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
310 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
311 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
312}
313
314/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
315///
316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
317#[inline]
318#[target_feature(enable = "avx512vnni,avx512vl")]
319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
320#[cfg_attr(test, assert_instr(vpdpbusd))]
321pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
322 transmute(src:vpdpbusd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
323}
324
325/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
326///
327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
328#[inline]
329#[target_feature(enable = "avx512vnni,avx512vl")]
330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331#[cfg_attr(test, assert_instr(vpdpbusd))]
332pub unsafe fn _mm256_mask_dpbusd_epi32(
333 src: __m256i,
334 k: __mmask8,
335 a: __m256i,
336 b: __m256i,
337) -> __m256i {
338 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
339 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
340}
341
342/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
343///
344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
345#[inline]
346#[target_feature(enable = "avx512vnni,avx512vl")]
347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348#[cfg_attr(test, assert_instr(vpdpbusd))]
349pub unsafe fn _mm256_maskz_dpbusd_epi32(
350 k: __mmask8,
351 src: __m256i,
352 a: __m256i,
353 b: __m256i,
354) -> __m256i {
355 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
356 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
357 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
358}
359
360/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
361///
362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
363#[inline]
364#[target_feature(enable = "avx512vnni,avx512vl")]
365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
366#[cfg_attr(test, assert_instr(vpdpbusd))]
367pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
368 transmute(src:vpdpbusd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
369}
370
371/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
372///
373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
374#[inline]
375#[target_feature(enable = "avx512vnni,avx512vl")]
376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377#[cfg_attr(test, assert_instr(vpdpbusd))]
378pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
379 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
380 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
381}
382
383/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
384///
385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
386#[inline]
387#[target_feature(enable = "avx512vnni,avx512vl")]
388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
389#[cfg_attr(test, assert_instr(vpdpbusd))]
390pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
391 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
392 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
393 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
394}
395
396/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
399#[inline]
400#[target_feature(enable = "avx512vnni")]
401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402#[cfg_attr(test, assert_instr(vpdpbusds))]
403pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
404 transmute(src:vpdpbusds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
405}
406
407/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
410#[inline]
411#[target_feature(enable = "avx512vnni")]
412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413#[cfg_attr(test, assert_instr(vpdpbusds))]
414pub unsafe fn _mm512_mask_dpbusds_epi32(
415 src: __m512i,
416 k: __mmask16,
417 a: __m512i,
418 b: __m512i,
419) -> __m512i {
420 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
421 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
422}
423
424/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
425///
426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
427#[inline]
428#[target_feature(enable = "avx512vnni")]
429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
430#[cfg_attr(test, assert_instr(vpdpbusds))]
431pub unsafe fn _mm512_maskz_dpbusds_epi32(
432 k: __mmask16,
433 src: __m512i,
434 a: __m512i,
435 b: __m512i,
436) -> __m512i {
437 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
438 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
439 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
440}
441
442/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
443///
444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
445#[inline]
446#[target_feature(enable = "avx512vnni,avx512vl")]
447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
448#[cfg_attr(test, assert_instr(vpdpbusds))]
449pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
450 transmute(src:vpdpbusds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
451}
452
453/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
456#[inline]
457#[target_feature(enable = "avx512vnni,avx512vl")]
458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
459#[cfg_attr(test, assert_instr(vpdpbusds))]
460pub unsafe fn _mm256_mask_dpbusds_epi32(
461 src: __m256i,
462 k: __mmask8,
463 a: __m256i,
464 b: __m256i,
465) -> __m256i {
466 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
467 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
468}
469
470/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
471///
472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
473#[inline]
474#[target_feature(enable = "avx512vnni,avx512vl")]
475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
476#[cfg_attr(test, assert_instr(vpdpbusds))]
477pub unsafe fn _mm256_maskz_dpbusds_epi32(
478 k: __mmask8,
479 src: __m256i,
480 a: __m256i,
481 b: __m256i,
482) -> __m256i {
483 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
484 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
485 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
486}
487
488/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
489///
490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
491#[inline]
492#[target_feature(enable = "avx512vnni,avx512vl")]
493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
494#[cfg_attr(test, assert_instr(vpdpbusds))]
495pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
496 transmute(src:vpdpbusds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
497}
498
499/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
502#[inline]
503#[target_feature(enable = "avx512vnni,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vpdpbusds))]
506pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
508 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
509}
510
511/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
512///
513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
514#[inline]
515#[target_feature(enable = "avx512vnni,avx512vl")]
516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
517#[cfg_attr(test, assert_instr(vpdpbusds))]
518pub unsafe fn _mm_maskz_dpbusds_epi32(
519 k: __mmask8,
520 src: __m128i,
521 a: __m128i,
522 b: __m128i,
523) -> __m128i {
524 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
525 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
526 transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
527}
528
529#[allow(improper_ctypes)]
530extern "C" {
531 #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
532 fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
533 #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
534 fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
535 #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
536 fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
537
538 #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
539 fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
540 #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
541 fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
542 #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
543 fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
544
545 #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
546 fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
547 #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
548 fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
549 #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
550 fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
551
552 #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
553 fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
554 #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
555 fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
556 #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
557 fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
558}
559
560#[cfg(test)]
561mod tests {
562
563 use crate::core_arch::x86::*;
564 use stdarch_test::simd_test;
565
566 #[simd_test(enable = "avx512vnni")]
567 unsafe fn test_mm512_dpwssd_epi32() {
568 let src = _mm512_set1_epi32(1);
569 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
570 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
571 let r = _mm512_dpwssd_epi32(src, a, b);
572 let e = _mm512_set1_epi32(3);
573 assert_eq_m512i(r, e);
574 }
575
576 #[simd_test(enable = "avx512vnni")]
577 unsafe fn test_mm512_mask_dpwssd_epi32() {
578 let src = _mm512_set1_epi32(1);
579 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
580 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
581 let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
582 assert_eq_m512i(r, src);
583 let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
584 let e = _mm512_set1_epi32(3);
585 assert_eq_m512i(r, e);
586 }
587
588 #[simd_test(enable = "avx512vnni")]
589 unsafe fn test_mm512_maskz_dpwssd_epi32() {
590 let src = _mm512_set1_epi32(1);
591 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
592 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
593 let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
594 assert_eq_m512i(r, _mm512_setzero_si512());
595 let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
596 let e = _mm512_set1_epi32(3);
597 assert_eq_m512i(r, e);
598 }
599
600 #[simd_test(enable = "avx512vnni,avx512vl")]
601 unsafe fn test_mm256_dpwssd_epi32() {
602 let src = _mm256_set1_epi32(1);
603 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
604 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
605 let r = _mm256_dpwssd_epi32(src, a, b);
606 let e = _mm256_set1_epi32(3);
607 assert_eq_m256i(r, e);
608 }
609
610 #[simd_test(enable = "avx512vnni,avx512vl")]
611 unsafe fn test_mm256_mask_dpwssd_epi32() {
612 let src = _mm256_set1_epi32(1);
613 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
614 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
615 let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
616 assert_eq_m256i(r, src);
617 let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
618 let e = _mm256_set1_epi32(3);
619 assert_eq_m256i(r, e);
620 }
621
622 #[simd_test(enable = "avx512vnni,avx512vl")]
623 unsafe fn test_mm256_maskz_dpwssd_epi32() {
624 let src = _mm256_set1_epi32(1);
625 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
626 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
627 let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
628 assert_eq_m256i(r, _mm256_setzero_si256());
629 let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
630 let e = _mm256_set1_epi32(3);
631 assert_eq_m256i(r, e);
632 }
633
634 #[simd_test(enable = "avx512vnni,avx512vl")]
635 unsafe fn test_mm_dpwssd_epi32() {
636 let src = _mm_set1_epi32(1);
637 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
638 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
639 let r = _mm_dpwssd_epi32(src, a, b);
640 let e = _mm_set1_epi32(3);
641 assert_eq_m128i(r, e);
642 }
643
644 #[simd_test(enable = "avx512vnni,avx512vl")]
645 unsafe fn test_mm_mask_dpwssd_epi32() {
646 let src = _mm_set1_epi32(1);
647 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
648 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
649 let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
650 assert_eq_m128i(r, src);
651 let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
652 let e = _mm_set1_epi32(3);
653 assert_eq_m128i(r, e);
654 }
655
656 #[simd_test(enable = "avx512vnni,avx512vl")]
657 unsafe fn test_mm_maskz_dpwssd_epi32() {
658 let src = _mm_set1_epi32(1);
659 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
660 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
661 let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
662 assert_eq_m128i(r, _mm_setzero_si128());
663 let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
664 let e = _mm_set1_epi32(3);
665 assert_eq_m128i(r, e);
666 }
667
668 #[simd_test(enable = "avx512vnni")]
669 unsafe fn test_mm512_dpwssds_epi32() {
670 let src = _mm512_set1_epi32(1);
671 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
672 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
673 let r = _mm512_dpwssds_epi32(src, a, b);
674 let e = _mm512_set1_epi32(3);
675 assert_eq_m512i(r, e);
676 }
677
678 #[simd_test(enable = "avx512vnni")]
679 unsafe fn test_mm512_mask_dpwssds_epi32() {
680 let src = _mm512_set1_epi32(1);
681 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
682 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
683 let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
684 assert_eq_m512i(r, src);
685 let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
686 let e = _mm512_set1_epi32(3);
687 assert_eq_m512i(r, e);
688 }
689
690 #[simd_test(enable = "avx512vnni")]
691 unsafe fn test_mm512_maskz_dpwssds_epi32() {
692 let src = _mm512_set1_epi32(1);
693 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
694 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
695 let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
696 assert_eq_m512i(r, _mm512_setzero_si512());
697 let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
698 let e = _mm512_set1_epi32(3);
699 assert_eq_m512i(r, e);
700 }
701
702 #[simd_test(enable = "avx512vnni,avx512vl")]
703 unsafe fn test_mm256_dpwssds_epi32() {
704 let src = _mm256_set1_epi32(1);
705 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
706 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
707 let r = _mm256_dpwssds_epi32(src, a, b);
708 let e = _mm256_set1_epi32(3);
709 assert_eq_m256i(r, e);
710 }
711
712 #[simd_test(enable = "avx512vnni,avx512vl")]
713 unsafe fn test_mm256_mask_dpwssds_epi32() {
714 let src = _mm256_set1_epi32(1);
715 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
716 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
717 let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
718 assert_eq_m256i(r, src);
719 let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
720 let e = _mm256_set1_epi32(3);
721 assert_eq_m256i(r, e);
722 }
723
724 #[simd_test(enable = "avx512vnni,avx512vl")]
725 unsafe fn test_mm256_maskz_dpwssds_epi32() {
726 let src = _mm256_set1_epi32(1);
727 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
728 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
729 let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
730 assert_eq_m256i(r, _mm256_setzero_si256());
731 let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
732 let e = _mm256_set1_epi32(3);
733 assert_eq_m256i(r, e);
734 }
735
736 #[simd_test(enable = "avx512vnni,avx512vl")]
737 unsafe fn test_mm_dpwssds_epi32() {
738 let src = _mm_set1_epi32(1);
739 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
740 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
741 let r = _mm_dpwssds_epi32(src, a, b);
742 let e = _mm_set1_epi32(3);
743 assert_eq_m128i(r, e);
744 }
745
746 #[simd_test(enable = "avx512vnni,avx512vl")]
747 unsafe fn test_mm_mask_dpwssds_epi32() {
748 let src = _mm_set1_epi32(1);
749 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
750 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
751 let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
752 assert_eq_m128i(r, src);
753 let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
754 let e = _mm_set1_epi32(3);
755 assert_eq_m128i(r, e);
756 }
757
758 #[simd_test(enable = "avx512vnni,avx512vl")]
759 unsafe fn test_mm_maskz_dpwssds_epi32() {
760 let src = _mm_set1_epi32(1);
761 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
762 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
763 let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
764 assert_eq_m128i(r, _mm_setzero_si128());
765 let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
766 let e = _mm_set1_epi32(3);
767 assert_eq_m128i(r, e);
768 }
769
770 #[simd_test(enable = "avx512vnni")]
771 unsafe fn test_mm512_dpbusd_epi32() {
772 let src = _mm512_set1_epi32(1);
773 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
774 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
775 let r = _mm512_dpbusd_epi32(src, a, b);
776 let e = _mm512_set1_epi32(5);
777 assert_eq_m512i(r, e);
778 }
779
780 #[simd_test(enable = "avx512vnni")]
781 unsafe fn test_mm512_mask_dpbusd_epi32() {
782 let src = _mm512_set1_epi32(1);
783 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
784 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
785 let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
786 assert_eq_m512i(r, src);
787 let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
788 let e = _mm512_set1_epi32(5);
789 assert_eq_m512i(r, e);
790 }
791
792 #[simd_test(enable = "avx512vnni")]
793 unsafe fn test_mm512_maskz_dpbusd_epi32() {
794 let src = _mm512_set1_epi32(1);
795 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
796 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
797 let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
798 assert_eq_m512i(r, _mm512_setzero_si512());
799 let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
800 let e = _mm512_set1_epi32(5);
801 assert_eq_m512i(r, e);
802 }
803
804 #[simd_test(enable = "avx512vnni,avx512vl")]
805 unsafe fn test_mm256_dpbusd_epi32() {
806 let src = _mm256_set1_epi32(1);
807 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
808 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
809 let r = _mm256_dpbusd_epi32(src, a, b);
810 let e = _mm256_set1_epi32(5);
811 assert_eq_m256i(r, e);
812 }
813
814 #[simd_test(enable = "avx512vnni,avx512vl")]
815 unsafe fn test_mm256_mask_dpbusd_epi32() {
816 let src = _mm256_set1_epi32(1);
817 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
818 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
819 let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
820 assert_eq_m256i(r, src);
821 let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
822 let e = _mm256_set1_epi32(5);
823 assert_eq_m256i(r, e);
824 }
825
826 #[simd_test(enable = "avx512vnni,avx512vl")]
827 unsafe fn test_mm256_maskz_dpbusd_epi32() {
828 let src = _mm256_set1_epi32(1);
829 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
830 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
831 let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
832 assert_eq_m256i(r, _mm256_setzero_si256());
833 let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
834 let e = _mm256_set1_epi32(5);
835 assert_eq_m256i(r, e);
836 }
837
838 #[simd_test(enable = "avx512vnni,avx512vl")]
839 unsafe fn test_mm_dpbusd_epi32() {
840 let src = _mm_set1_epi32(1);
841 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
842 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
843 let r = _mm_dpbusd_epi32(src, a, b);
844 let e = _mm_set1_epi32(5);
845 assert_eq_m128i(r, e);
846 }
847
848 #[simd_test(enable = "avx512vnni,avx512vl")]
849 unsafe fn test_mm_mask_dpbusd_epi32() {
850 let src = _mm_set1_epi32(1);
851 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
852 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
853 let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
854 assert_eq_m128i(r, src);
855 let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
856 let e = _mm_set1_epi32(5);
857 assert_eq_m128i(r, e);
858 }
859
860 #[simd_test(enable = "avx512vnni,avx512vl")]
861 unsafe fn test_mm_maskz_dpbusd_epi32() {
862 let src = _mm_set1_epi32(1);
863 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
864 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
865 let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
866 assert_eq_m128i(r, _mm_setzero_si128());
867 let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
868 let e = _mm_set1_epi32(5);
869 assert_eq_m128i(r, e);
870 }
871
872 #[simd_test(enable = "avx512vnni")]
873 unsafe fn test_mm512_dpbusds_epi32() {
874 let src = _mm512_set1_epi32(1);
875 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
876 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
877 let r = _mm512_dpbusds_epi32(src, a, b);
878 let e = _mm512_set1_epi32(5);
879 assert_eq_m512i(r, e);
880 }
881
882 #[simd_test(enable = "avx512vnni")]
883 unsafe fn test_mm512_mask_dpbusds_epi32() {
884 let src = _mm512_set1_epi32(1);
885 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
886 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
887 let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
888 assert_eq_m512i(r, src);
889 let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
890 let e = _mm512_set1_epi32(5);
891 assert_eq_m512i(r, e);
892 }
893
894 #[simd_test(enable = "avx512vnni")]
895 unsafe fn test_mm512_maskz_dpbusds_epi32() {
896 let src = _mm512_set1_epi32(1);
897 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
898 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
899 let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
900 assert_eq_m512i(r, _mm512_setzero_si512());
901 let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
902 let e = _mm512_set1_epi32(5);
903 assert_eq_m512i(r, e);
904 }
905
906 #[simd_test(enable = "avx512vnni,avx512vl")]
907 unsafe fn test_mm256_dpbusds_epi32() {
908 let src = _mm256_set1_epi32(1);
909 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
910 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
911 let r = _mm256_dpbusds_epi32(src, a, b);
912 let e = _mm256_set1_epi32(5);
913 assert_eq_m256i(r, e);
914 }
915
916 #[simd_test(enable = "avx512vnni,avx512vl")]
917 unsafe fn test_mm256_mask_dpbusds_epi32() {
918 let src = _mm256_set1_epi32(1);
919 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
920 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
921 let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
922 assert_eq_m256i(r, src);
923 let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
924 let e = _mm256_set1_epi32(5);
925 assert_eq_m256i(r, e);
926 }
927
928 #[simd_test(enable = "avx512vnni,avx512vl")]
929 unsafe fn test_mm256_maskz_dpbusds_epi32() {
930 let src = _mm256_set1_epi32(1);
931 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
932 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
933 let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
934 assert_eq_m256i(r, _mm256_setzero_si256());
935 let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
936 let e = _mm256_set1_epi32(5);
937 assert_eq_m256i(r, e);
938 }
939
940 #[simd_test(enable = "avx512vnni,avx512vl")]
941 unsafe fn test_mm_dpbusds_epi32() {
942 let src = _mm_set1_epi32(1);
943 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
944 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
945 let r = _mm_dpbusds_epi32(src, a, b);
946 let e = _mm_set1_epi32(5);
947 assert_eq_m128i(r, e);
948 }
949
950 #[simd_test(enable = "avx512vnni,avx512vl")]
951 unsafe fn test_mm_mask_dpbusds_epi32() {
952 let src = _mm_set1_epi32(1);
953 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
954 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
955 let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
956 assert_eq_m128i(r, src);
957 let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
958 let e = _mm_set1_epi32(5);
959 assert_eq_m128i(r, e);
960 }
961
962 #[simd_test(enable = "avx512vnni,avx512vl")]
963 unsafe fn test_mm_maskz_dpbusds_epi32() {
964 let src = _mm_set1_epi32(1);
965 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
966 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
967 let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
968 assert_eq_m128i(r, _mm_setzero_si128());
969 let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
970 let e = _mm_set1_epi32(5);
971 assert_eq_m128i(r, e);
972 }
973}
974