1use crate::{
2 core_arch::{simd::*, simd_llvm::*, x86::*},
3 mem::transmute,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
12#[inline]
13#[target_feature(enable = "avx512vnni")]
14#[cfg_attr(test, assert_instr(vpdpwssd))]
15pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
16 transmute(src:vpdpwssd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
17}
18
19/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20///
21/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
22#[inline]
23#[target_feature(enable = "avx512vnni")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub unsafe fn _mm512_mask_dpwssd_epi32(
26 src: __m512i,
27 k: __mmask16,
28 a: __m512i,
29 b: __m512i,
30) -> __m512i {
31 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
32 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
33}
34
35/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
38#[inline]
39#[target_feature(enable = "avx512vnni")]
40#[cfg_attr(test, assert_instr(vpdpwssd))]
41pub unsafe fn _mm512_maskz_dpwssd_epi32(
42 k: __mmask16,
43 src: __m512i,
44 a: __m512i,
45 b: __m512i,
46) -> __m512i {
47 let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
48 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
49 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
50}
51
52/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
55#[inline]
56#[target_feature(enable = "avx512vnni,avx512vl")]
57#[cfg_attr(test, assert_instr(vpdpwssd))]
58pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
59 transmute(src:vpdpwssd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
60}
61
62/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
63///
64/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
65#[inline]
66#[target_feature(enable = "avx512vnni,avx512vl")]
67#[cfg_attr(test, assert_instr(vpdpwssd))]
68pub unsafe fn _mm256_mask_dpwssd_epi32(
69 src: __m256i,
70 k: __mmask8,
71 a: __m256i,
72 b: __m256i,
73) -> __m256i {
74 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
75 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
76}
77
78/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
81#[inline]
82#[target_feature(enable = "avx512vnni,avx512vl")]
83#[cfg_attr(test, assert_instr(vpdpwssd))]
84pub unsafe fn _mm256_maskz_dpwssd_epi32(
85 k: __mmask8,
86 src: __m256i,
87 a: __m256i,
88 b: __m256i,
89) -> __m256i {
90 let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
91 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
92 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
93}
94
95/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
96///
97/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
98#[inline]
99#[target_feature(enable = "avx512vnni,avx512vl")]
100#[cfg_attr(test, assert_instr(vpdpwssd))]
101pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
102 transmute(src:vpdpwssd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
103}
104
105/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
108#[inline]
109#[target_feature(enable = "avx512vnni,avx512vl")]
110#[cfg_attr(test, assert_instr(vpdpwssd))]
111pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
112 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
113 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
114}
115
116/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
119#[inline]
120#[target_feature(enable = "avx512vnni,avx512vl")]
121#[cfg_attr(test, assert_instr(vpdpwssd))]
122pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
123 let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
124 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
125 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
126}
127
128/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
131#[inline]
132#[target_feature(enable = "avx512vnni")]
133#[cfg_attr(test, assert_instr(vpdpwssds))]
134pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
135 transmute(src:vpdpwssds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
136}
137
138/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
141#[inline]
142#[target_feature(enable = "avx512vnni")]
143#[cfg_attr(test, assert_instr(vpdpwssds))]
144pub unsafe fn _mm512_mask_dpwssds_epi32(
145 src: __m512i,
146 k: __mmask16,
147 a: __m512i,
148 b: __m512i,
149) -> __m512i {
150 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
151 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
152}
153
154/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
157#[inline]
158#[target_feature(enable = "avx512vnni")]
159#[cfg_attr(test, assert_instr(vpdpwssds))]
160pub unsafe fn _mm512_maskz_dpwssds_epi32(
161 k: __mmask16,
162 src: __m512i,
163 a: __m512i,
164 b: __m512i,
165) -> __m512i {
166 let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
167 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
168 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
169}
170
171/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
172///
173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
174#[inline]
175#[target_feature(enable = "avx512vnni,avx512vl")]
176#[cfg_attr(test, assert_instr(vpdpwssds))]
177pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
178 transmute(src:vpdpwssds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
179}
180
181/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
182///
183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
184#[inline]
185#[target_feature(enable = "avx512vnni,avx512vl")]
186#[cfg_attr(test, assert_instr(vpdpwssds))]
187pub unsafe fn _mm256_mask_dpwssds_epi32(
188 src: __m256i,
189 k: __mmask8,
190 a: __m256i,
191 b: __m256i,
192) -> __m256i {
193 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
194 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
195}
196
197/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
198///
199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
200#[inline]
201#[target_feature(enable = "avx512vnni,avx512vl")]
202#[cfg_attr(test, assert_instr(vpdpwssds))]
203pub unsafe fn _mm256_maskz_dpwssds_epi32(
204 k: __mmask8,
205 src: __m256i,
206 a: __m256i,
207 b: __m256i,
208) -> __m256i {
209 let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
210 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
211 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
212}
213
214/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
217#[inline]
218#[target_feature(enable = "avx512vnni,avx512vl")]
219#[cfg_attr(test, assert_instr(vpdpwssds))]
220pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
221 transmute(src:vpdpwssds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
222}
223
224/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
225///
226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
227#[inline]
228#[target_feature(enable = "avx512vnni,avx512vl")]
229#[cfg_attr(test, assert_instr(vpdpwssds))]
230pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
231 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
232 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
233}
234
235/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
238#[inline]
239#[target_feature(enable = "avx512vnni,avx512vl")]
240#[cfg_attr(test, assert_instr(vpdpwssds))]
241pub unsafe fn _mm_maskz_dpwssds_epi32(
242 k: __mmask8,
243 src: __m128i,
244 a: __m128i,
245 b: __m128i,
246) -> __m128i {
247 let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
248 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
249 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
250}
251
252/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
253///
254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
255#[inline]
256#[target_feature(enable = "avx512vnni")]
257#[cfg_attr(test, assert_instr(vpdpbusd))]
258pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
259 transmute(src:vpdpbusd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
260}
261
262/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
263///
264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
265#[inline]
266#[target_feature(enable = "avx512vnni")]
267#[cfg_attr(test, assert_instr(vpdpbusd))]
268pub unsafe fn _mm512_mask_dpbusd_epi32(
269 src: __m512i,
270 k: __mmask16,
271 a: __m512i,
272 b: __m512i,
273) -> __m512i {
274 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
275 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
276}
277
278/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
281#[inline]
282#[target_feature(enable = "avx512vnni")]
283#[cfg_attr(test, assert_instr(vpdpbusd))]
284pub unsafe fn _mm512_maskz_dpbusd_epi32(
285 k: __mmask16,
286 src: __m512i,
287 a: __m512i,
288 b: __m512i,
289) -> __m512i {
290 let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
291 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
292 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
293}
294
295/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
296///
297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
298#[inline]
299#[target_feature(enable = "avx512vnni,avx512vl")]
300#[cfg_attr(test, assert_instr(vpdpbusd))]
301pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
302 transmute(src:vpdpbusd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
303}
304
305/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
306///
307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
308#[inline]
309#[target_feature(enable = "avx512vnni,avx512vl")]
310#[cfg_attr(test, assert_instr(vpdpbusd))]
311pub unsafe fn _mm256_mask_dpbusd_epi32(
312 src: __m256i,
313 k: __mmask8,
314 a: __m256i,
315 b: __m256i,
316) -> __m256i {
317 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
318 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
319}
320
321/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
322///
323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
324#[inline]
325#[target_feature(enable = "avx512vnni,avx512vl")]
326#[cfg_attr(test, assert_instr(vpdpbusd))]
327pub unsafe fn _mm256_maskz_dpbusd_epi32(
328 k: __mmask8,
329 src: __m256i,
330 a: __m256i,
331 b: __m256i,
332) -> __m256i {
333 let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
334 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
335 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
336}
337
338/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
339///
340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
341#[inline]
342#[target_feature(enable = "avx512vnni,avx512vl")]
343#[cfg_attr(test, assert_instr(vpdpbusd))]
344pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
345 transmute(src:vpdpbusd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
346}
347
348/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
349///
350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
351#[inline]
352#[target_feature(enable = "avx512vnni,avx512vl")]
353#[cfg_attr(test, assert_instr(vpdpbusd))]
354pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
355 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
356 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
357}
358
359/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
362#[inline]
363#[target_feature(enable = "avx512vnni,avx512vl")]
364#[cfg_attr(test, assert_instr(vpdpbusd))]
365pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
366 let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
367 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
368 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
369}
370
371/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
372///
373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
374#[inline]
375#[target_feature(enable = "avx512vnni")]
376#[cfg_attr(test, assert_instr(vpdpbusds))]
377pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
378 transmute(src:vpdpbusds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
379}
380
381/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
384#[inline]
385#[target_feature(enable = "avx512vnni")]
386#[cfg_attr(test, assert_instr(vpdpbusds))]
387pub unsafe fn _mm512_mask_dpbusds_epi32(
388 src: __m512i,
389 k: __mmask16,
390 a: __m512i,
391 b: __m512i,
392) -> __m512i {
393 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
394 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
395}
396
397/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
398///
399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
400#[inline]
401#[target_feature(enable = "avx512vnni")]
402#[cfg_attr(test, assert_instr(vpdpbusds))]
403pub unsafe fn _mm512_maskz_dpbusds_epi32(
404 k: __mmask16,
405 src: __m512i,
406 a: __m512i,
407 b: __m512i,
408) -> __m512i {
409 let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
410 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
411 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
412}
413
414/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
417#[inline]
418#[target_feature(enable = "avx512vnni,avx512vl")]
419#[cfg_attr(test, assert_instr(vpdpbusds))]
420pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
421 transmute(src:vpdpbusds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
422}
423
424/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
425///
426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
427#[inline]
428#[target_feature(enable = "avx512vnni,avx512vl")]
429#[cfg_attr(test, assert_instr(vpdpbusds))]
430pub unsafe fn _mm256_mask_dpbusds_epi32(
431 src: __m256i,
432 k: __mmask8,
433 a: __m256i,
434 b: __m256i,
435) -> __m256i {
436 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
437 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
438}
439
440/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
443#[inline]
444#[target_feature(enable = "avx512vnni,avx512vl")]
445#[cfg_attr(test, assert_instr(vpdpbusds))]
446pub unsafe fn _mm256_maskz_dpbusds_epi32(
447 k: __mmask8,
448 src: __m256i,
449 a: __m256i,
450 b: __m256i,
451) -> __m256i {
452 let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
453 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
454 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
455}
456
457/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
460#[inline]
461#[target_feature(enable = "avx512vnni,avx512vl")]
462#[cfg_attr(test, assert_instr(vpdpbusds))]
463pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
464 transmute(src:vpdpbusds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
465}
466
467/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
468///
469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
470#[inline]
471#[target_feature(enable = "avx512vnni,avx512vl")]
472#[cfg_attr(test, assert_instr(vpdpbusds))]
473pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
474 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
475 transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
476}
477
478/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
479///
480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
481#[inline]
482#[target_feature(enable = "avx512vnni,avx512vl")]
483#[cfg_attr(test, assert_instr(vpdpbusds))]
484pub unsafe fn _mm_maskz_dpbusds_epi32(
485 k: __mmask8,
486 src: __m128i,
487 a: __m128i,
488 b: __m128i,
489) -> __m128i {
490 let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
491 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
492 transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
493}
494
495#[allow(improper_ctypes)]
496extern "C" {
497 #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
498 fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
499 #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
500 fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
501 #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
502 fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
503
504 #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
505 fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
506 #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
507 fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
508 #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
509 fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
510
511 #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
512 fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
513 #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
514 fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
515 #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
516 fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
517
518 #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
519 fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
520 #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
521 fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
522 #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
523 fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
524}
525
526#[cfg(test)]
527mod tests {
528
529 use crate::core_arch::x86::*;
530 use stdarch_test::simd_test;
531
532 #[simd_test(enable = "avx512vnni")]
533 unsafe fn test_mm512_dpwssd_epi32() {
534 let src = _mm512_set1_epi32(1);
535 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
536 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
537 let r = _mm512_dpwssd_epi32(src, a, b);
538 let e = _mm512_set1_epi32(3);
539 assert_eq_m512i(r, e);
540 }
541
542 #[simd_test(enable = "avx512vnni")]
543 unsafe fn test_mm512_mask_dpwssd_epi32() {
544 let src = _mm512_set1_epi32(1);
545 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
546 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
547 let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
548 assert_eq_m512i(r, src);
549 let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
550 let e = _mm512_set1_epi32(3);
551 assert_eq_m512i(r, e);
552 }
553
554 #[simd_test(enable = "avx512vnni")]
555 unsafe fn test_mm512_maskz_dpwssd_epi32() {
556 let src = _mm512_set1_epi32(1);
557 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
558 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
559 let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
560 assert_eq_m512i(r, _mm512_setzero_si512());
561 let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
562 let e = _mm512_set1_epi32(3);
563 assert_eq_m512i(r, e);
564 }
565
566 #[simd_test(enable = "avx512vnni,avx512vl")]
567 unsafe fn test_mm256_dpwssd_epi32() {
568 let src = _mm256_set1_epi32(1);
569 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
570 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
571 let r = _mm256_dpwssd_epi32(src, a, b);
572 let e = _mm256_set1_epi32(3);
573 assert_eq_m256i(r, e);
574 }
575
576 #[simd_test(enable = "avx512vnni,avx512vl")]
577 unsafe fn test_mm256_mask_dpwssd_epi32() {
578 let src = _mm256_set1_epi32(1);
579 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
580 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
581 let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
582 assert_eq_m256i(r, src);
583 let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
584 let e = _mm256_set1_epi32(3);
585 assert_eq_m256i(r, e);
586 }
587
588 #[simd_test(enable = "avx512vnni,avx512vl")]
589 unsafe fn test_mm256_maskz_dpwssd_epi32() {
590 let src = _mm256_set1_epi32(1);
591 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
592 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
593 let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
594 assert_eq_m256i(r, _mm256_setzero_si256());
595 let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
596 let e = _mm256_set1_epi32(3);
597 assert_eq_m256i(r, e);
598 }
599
600 #[simd_test(enable = "avx512vnni,avx512vl")]
601 unsafe fn test_mm_dpwssd_epi32() {
602 let src = _mm_set1_epi32(1);
603 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
604 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
605 let r = _mm_dpwssd_epi32(src, a, b);
606 let e = _mm_set1_epi32(3);
607 assert_eq_m128i(r, e);
608 }
609
610 #[simd_test(enable = "avx512vnni,avx512vl")]
611 unsafe fn test_mm_mask_dpwssd_epi32() {
612 let src = _mm_set1_epi32(1);
613 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
614 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
615 let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
616 assert_eq_m128i(r, src);
617 let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
618 let e = _mm_set1_epi32(3);
619 assert_eq_m128i(r, e);
620 }
621
622 #[simd_test(enable = "avx512vnni,avx512vl")]
623 unsafe fn test_mm_maskz_dpwssd_epi32() {
624 let src = _mm_set1_epi32(1);
625 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
626 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
627 let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
628 assert_eq_m128i(r, _mm_setzero_si128());
629 let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
630 let e = _mm_set1_epi32(3);
631 assert_eq_m128i(r, e);
632 }
633
634 #[simd_test(enable = "avx512vnni")]
635 unsafe fn test_mm512_dpwssds_epi32() {
636 let src = _mm512_set1_epi32(1);
637 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
638 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
639 let r = _mm512_dpwssds_epi32(src, a, b);
640 let e = _mm512_set1_epi32(3);
641 assert_eq_m512i(r, e);
642 }
643
644 #[simd_test(enable = "avx512vnni")]
645 unsafe fn test_mm512_mask_dpwssds_epi32() {
646 let src = _mm512_set1_epi32(1);
647 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
648 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
649 let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
650 assert_eq_m512i(r, src);
651 let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
652 let e = _mm512_set1_epi32(3);
653 assert_eq_m512i(r, e);
654 }
655
656 #[simd_test(enable = "avx512vnni")]
657 unsafe fn test_mm512_maskz_dpwssds_epi32() {
658 let src = _mm512_set1_epi32(1);
659 let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
660 let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
661 let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
662 assert_eq_m512i(r, _mm512_setzero_si512());
663 let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
664 let e = _mm512_set1_epi32(3);
665 assert_eq_m512i(r, e);
666 }
667
668 #[simd_test(enable = "avx512vnni,avx512vl")]
669 unsafe fn test_mm256_dpwssds_epi32() {
670 let src = _mm256_set1_epi32(1);
671 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
672 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
673 let r = _mm256_dpwssds_epi32(src, a, b);
674 let e = _mm256_set1_epi32(3);
675 assert_eq_m256i(r, e);
676 }
677
678 #[simd_test(enable = "avx512vnni,avx512vl")]
679 unsafe fn test_mm256_mask_dpwssds_epi32() {
680 let src = _mm256_set1_epi32(1);
681 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
682 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
683 let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
684 assert_eq_m256i(r, src);
685 let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
686 let e = _mm256_set1_epi32(3);
687 assert_eq_m256i(r, e);
688 }
689
690 #[simd_test(enable = "avx512vnni,avx512vl")]
691 unsafe fn test_mm256_maskz_dpwssds_epi32() {
692 let src = _mm256_set1_epi32(1);
693 let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
694 let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
695 let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
696 assert_eq_m256i(r, _mm256_setzero_si256());
697 let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
698 let e = _mm256_set1_epi32(3);
699 assert_eq_m256i(r, e);
700 }
701
702 #[simd_test(enable = "avx512vnni,avx512vl")]
703 unsafe fn test_mm_dpwssds_epi32() {
704 let src = _mm_set1_epi32(1);
705 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
706 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
707 let r = _mm_dpwssds_epi32(src, a, b);
708 let e = _mm_set1_epi32(3);
709 assert_eq_m128i(r, e);
710 }
711
712 #[simd_test(enable = "avx512vnni,avx512vl")]
713 unsafe fn test_mm_mask_dpwssds_epi32() {
714 let src = _mm_set1_epi32(1);
715 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
716 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
717 let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
718 assert_eq_m128i(r, src);
719 let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
720 let e = _mm_set1_epi32(3);
721 assert_eq_m128i(r, e);
722 }
723
724 #[simd_test(enable = "avx512vnni,avx512vl")]
725 unsafe fn test_mm_maskz_dpwssds_epi32() {
726 let src = _mm_set1_epi32(1);
727 let a = _mm_set1_epi32(1 << 16 | 1 << 0);
728 let b = _mm_set1_epi32(1 << 16 | 1 << 0);
729 let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
730 assert_eq_m128i(r, _mm_setzero_si128());
731 let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
732 let e = _mm_set1_epi32(3);
733 assert_eq_m128i(r, e);
734 }
735
736 #[simd_test(enable = "avx512vnni")]
737 unsafe fn test_mm512_dpbusd_epi32() {
738 let src = _mm512_set1_epi32(1);
739 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
740 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
741 let r = _mm512_dpbusd_epi32(src, a, b);
742 let e = _mm512_set1_epi32(5);
743 assert_eq_m512i(r, e);
744 }
745
746 #[simd_test(enable = "avx512vnni")]
747 unsafe fn test_mm512_mask_dpbusd_epi32() {
748 let src = _mm512_set1_epi32(1);
749 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
750 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
751 let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
752 assert_eq_m512i(r, src);
753 let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
754 let e = _mm512_set1_epi32(5);
755 assert_eq_m512i(r, e);
756 }
757
758 #[simd_test(enable = "avx512vnni")]
759 unsafe fn test_mm512_maskz_dpbusd_epi32() {
760 let src = _mm512_set1_epi32(1);
761 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
762 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
763 let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
764 assert_eq_m512i(r, _mm512_setzero_si512());
765 let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
766 let e = _mm512_set1_epi32(5);
767 assert_eq_m512i(r, e);
768 }
769
770 #[simd_test(enable = "avx512vnni,avx512vl")]
771 unsafe fn test_mm256_dpbusd_epi32() {
772 let src = _mm256_set1_epi32(1);
773 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
774 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
775 let r = _mm256_dpbusd_epi32(src, a, b);
776 let e = _mm256_set1_epi32(5);
777 assert_eq_m256i(r, e);
778 }
779
780 #[simd_test(enable = "avx512vnni,avx512vl")]
781 unsafe fn test_mm256_mask_dpbusd_epi32() {
782 let src = _mm256_set1_epi32(1);
783 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
784 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
785 let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
786 assert_eq_m256i(r, src);
787 let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
788 let e = _mm256_set1_epi32(5);
789 assert_eq_m256i(r, e);
790 }
791
792 #[simd_test(enable = "avx512vnni,avx512vl")]
793 unsafe fn test_mm256_maskz_dpbusd_epi32() {
794 let src = _mm256_set1_epi32(1);
795 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
796 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
797 let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
798 assert_eq_m256i(r, _mm256_setzero_si256());
799 let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
800 let e = _mm256_set1_epi32(5);
801 assert_eq_m256i(r, e);
802 }
803
804 #[simd_test(enable = "avx512vnni,avx512vl")]
805 unsafe fn test_mm_dpbusd_epi32() {
806 let src = _mm_set1_epi32(1);
807 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
808 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
809 let r = _mm_dpbusd_epi32(src, a, b);
810 let e = _mm_set1_epi32(5);
811 assert_eq_m128i(r, e);
812 }
813
814 #[simd_test(enable = "avx512vnni,avx512vl")]
815 unsafe fn test_mm_mask_dpbusd_epi32() {
816 let src = _mm_set1_epi32(1);
817 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
818 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
819 let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
820 assert_eq_m128i(r, src);
821 let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
822 let e = _mm_set1_epi32(5);
823 assert_eq_m128i(r, e);
824 }
825
826 #[simd_test(enable = "avx512vnni,avx512vl")]
827 unsafe fn test_mm_maskz_dpbusd_epi32() {
828 let src = _mm_set1_epi32(1);
829 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
830 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
831 let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
832 assert_eq_m128i(r, _mm_setzero_si128());
833 let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
834 let e = _mm_set1_epi32(5);
835 assert_eq_m128i(r, e);
836 }
837
838 #[simd_test(enable = "avx512vnni")]
839 unsafe fn test_mm512_dpbusds_epi32() {
840 let src = _mm512_set1_epi32(1);
841 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
842 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
843 let r = _mm512_dpbusds_epi32(src, a, b);
844 let e = _mm512_set1_epi32(5);
845 assert_eq_m512i(r, e);
846 }
847
848 #[simd_test(enable = "avx512vnni")]
849 unsafe fn test_mm512_mask_dpbusds_epi32() {
850 let src = _mm512_set1_epi32(1);
851 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
852 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
853 let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
854 assert_eq_m512i(r, src);
855 let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
856 let e = _mm512_set1_epi32(5);
857 assert_eq_m512i(r, e);
858 }
859
860 #[simd_test(enable = "avx512vnni")]
861 unsafe fn test_mm512_maskz_dpbusds_epi32() {
862 let src = _mm512_set1_epi32(1);
863 let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
864 let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
865 let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
866 assert_eq_m512i(r, _mm512_setzero_si512());
867 let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
868 let e = _mm512_set1_epi32(5);
869 assert_eq_m512i(r, e);
870 }
871
872 #[simd_test(enable = "avx512vnni,avx512vl")]
873 unsafe fn test_mm256_dpbusds_epi32() {
874 let src = _mm256_set1_epi32(1);
875 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
876 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
877 let r = _mm256_dpbusds_epi32(src, a, b);
878 let e = _mm256_set1_epi32(5);
879 assert_eq_m256i(r, e);
880 }
881
882 #[simd_test(enable = "avx512vnni,avx512vl")]
883 unsafe fn test_mm256_mask_dpbusds_epi32() {
884 let src = _mm256_set1_epi32(1);
885 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
886 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
887 let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
888 assert_eq_m256i(r, src);
889 let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
890 let e = _mm256_set1_epi32(5);
891 assert_eq_m256i(r, e);
892 }
893
894 #[simd_test(enable = "avx512vnni,avx512vl")]
895 unsafe fn test_mm256_maskz_dpbusds_epi32() {
896 let src = _mm256_set1_epi32(1);
897 let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
898 let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
899 let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
900 assert_eq_m256i(r, _mm256_setzero_si256());
901 let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
902 let e = _mm256_set1_epi32(5);
903 assert_eq_m256i(r, e);
904 }
905
906 #[simd_test(enable = "avx512vnni,avx512vl")]
907 unsafe fn test_mm_dpbusds_epi32() {
908 let src = _mm_set1_epi32(1);
909 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
910 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
911 let r = _mm_dpbusds_epi32(src, a, b);
912 let e = _mm_set1_epi32(5);
913 assert_eq_m128i(r, e);
914 }
915
916 #[simd_test(enable = "avx512vnni,avx512vl")]
917 unsafe fn test_mm_mask_dpbusds_epi32() {
918 let src = _mm_set1_epi32(1);
919 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
920 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
921 let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
922 assert_eq_m128i(r, src);
923 let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
924 let e = _mm_set1_epi32(5);
925 assert_eq_m128i(r, e);
926 }
927
928 #[simd_test(enable = "avx512vnni,avx512vl")]
929 unsafe fn test_mm_maskz_dpbusds_epi32() {
930 let src = _mm_set1_epi32(1);
931 let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
932 let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
933 let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
934 assert_eq_m128i(r, _mm_setzero_si128());
935 let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
936 let e = _mm_set1_epi32(5);
937 assert_eq_m128i(r, e);
938 }
939}
940