avx512vnni.rs source code [crates/core_arch/src/x86/avx512vnni.rs]

1	use crate::{
2	core_arch::{simd::, simd_llvm::, x86::*},
3	mem::transmute,
4	};
5
6	#[cfg(test)]
7	use stdarch_test::assert_instr;
8
9	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
10	///
11	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
12	#[inline]
13	#[target_feature(enable = "avx512vnni")]
14	#[cfg_attr(test, assert_instr(vpdpwssd))]
15	pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
16	transmute(src:vpdpwssd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
17	}
18
19	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20	///
21	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
22	#[inline]
23	#[target_feature(enable = "avx512vnni")]
24	#[cfg_attr(test, assert_instr(vpdpwssd))]
25	pub unsafe fn _mm512_mask_dpwssd_epi32(
26	src: __m512i,
27	k: __mmask16,
28	a: __m512i,
29	b: __m512i,
30	) -> __m512i {
31	let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
32	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
33	}
34
35	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36	///
37	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
38	#[inline]
39	#[target_feature(enable = "avx512vnni")]
40	#[cfg_attr(test, assert_instr(vpdpwssd))]
41	pub unsafe fn _mm512_maskz_dpwssd_epi32(
42	k: __mmask16,
43	src: __m512i,
44	a: __m512i,
45	b: __m512i,
46	) -> __m512i {
47	let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
48	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
49	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
50	}
51
52	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
53	///
54	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
55	#[inline]
56	#[target_feature(enable = "avx512vnni,avx512vl")]
57	#[cfg_attr(test, assert_instr(vpdpwssd))]
58	pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
59	transmute(src:vpdpwssd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
60	}
61
62	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
63	///
64	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
65	#[inline]
66	#[target_feature(enable = "avx512vnni,avx512vl")]
67	#[cfg_attr(test, assert_instr(vpdpwssd))]
68	pub unsafe fn _mm256_mask_dpwssd_epi32(
69	src: __m256i,
70	k: __mmask8,
71	a: __m256i,
72	b: __m256i,
73	) -> __m256i {
74	let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
75	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
76	}
77
78	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79	///
80	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
81	#[inline]
82	#[target_feature(enable = "avx512vnni,avx512vl")]
83	#[cfg_attr(test, assert_instr(vpdpwssd))]
84	pub unsafe fn _mm256_maskz_dpwssd_epi32(
85	k: __mmask8,
86	src: __m256i,
87	a: __m256i,
88	b: __m256i,
89	) -> __m256i {
90	let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
91	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
92	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
93	}
94
95	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
96	///
97	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
98	#[inline]
99	#[target_feature(enable = "avx512vnni,avx512vl")]
100	#[cfg_attr(test, assert_instr(vpdpwssd))]
101	pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
102	transmute(src:vpdpwssd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
103	}
104
105	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
106	///
107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
108	#[inline]
109	#[target_feature(enable = "avx512vnni,avx512vl")]
110	#[cfg_attr(test, assert_instr(vpdpwssd))]
111	pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
112	let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
113	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
114	}
115
116	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
117	///
118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
119	#[inline]
120	#[target_feature(enable = "avx512vnni,avx512vl")]
121	#[cfg_attr(test, assert_instr(vpdpwssd))]
122	pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
123	let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
124	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
125	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
126	}
127
128	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
129	///
130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
131	#[inline]
132	#[target_feature(enable = "avx512vnni")]
133	#[cfg_attr(test, assert_instr(vpdpwssds))]
134	pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
135	transmute(src:vpdpwssds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
136	}
137
138	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
139	///
140	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
141	#[inline]
142	#[target_feature(enable = "avx512vnni")]
143	#[cfg_attr(test, assert_instr(vpdpwssds))]
144	pub unsafe fn _mm512_mask_dpwssds_epi32(
145	src: __m512i,
146	k: __mmask16,
147	a: __m512i,
148	b: __m512i,
149	) -> __m512i {
150	let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
151	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
152	}
153
154	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155	///
156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
157	#[inline]
158	#[target_feature(enable = "avx512vnni")]
159	#[cfg_attr(test, assert_instr(vpdpwssds))]
160	pub unsafe fn _mm512_maskz_dpwssds_epi32(
161	k: __mmask16,
162	src: __m512i,
163	a: __m512i,
164	b: __m512i,
165	) -> __m512i {
166	let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
167	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
168	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
169	}
170
171	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
172	///
173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
174	#[inline]
175	#[target_feature(enable = "avx512vnni,avx512vl")]
176	#[cfg_attr(test, assert_instr(vpdpwssds))]
177	pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
178	transmute(src:vpdpwssds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
179	}
180
181	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
182	///
183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
184	#[inline]
185	#[target_feature(enable = "avx512vnni,avx512vl")]
186	#[cfg_attr(test, assert_instr(vpdpwssds))]
187	pub unsafe fn _mm256_mask_dpwssds_epi32(
188	src: __m256i,
189	k: __mmask8,
190	a: __m256i,
191	b: __m256i,
192	) -> __m256i {
193	let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
194	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
195	}
196
197	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
198	///
199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
200	#[inline]
201	#[target_feature(enable = "avx512vnni,avx512vl")]
202	#[cfg_attr(test, assert_instr(vpdpwssds))]
203	pub unsafe fn _mm256_maskz_dpwssds_epi32(
204	k: __mmask8,
205	src: __m256i,
206	a: __m256i,
207	b: __m256i,
208	) -> __m256i {
209	let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
210	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
211	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
212	}
213
214	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
215	///
216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
217	#[inline]
218	#[target_feature(enable = "avx512vnni,avx512vl")]
219	#[cfg_attr(test, assert_instr(vpdpwssds))]
220	pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
221	transmute(src:vpdpwssds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
222	}
223
224	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
225	///
226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
227	#[inline]
228	#[target_feature(enable = "avx512vnni,avx512vl")]
229	#[cfg_attr(test, assert_instr(vpdpwssds))]
230	pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
231	let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
232	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
233	}
234
235	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
236	///
237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
238	#[inline]
239	#[target_feature(enable = "avx512vnni,avx512vl")]
240	#[cfg_attr(test, assert_instr(vpdpwssds))]
241	pub unsafe fn _mm_maskz_dpwssds_epi32(
242	k: __mmask8,
243	src: __m128i,
244	a: __m128i,
245	b: __m128i,
246	) -> __m128i {
247	let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
248	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
249	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
250	}
251
252	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
253	///
254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
255	#[inline]
256	#[target_feature(enable = "avx512vnni")]
257	#[cfg_attr(test, assert_instr(vpdpbusd))]
258	pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
259	transmute(src:vpdpbusd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
260	}
261
262	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
263	///
264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
265	#[inline]
266	#[target_feature(enable = "avx512vnni")]
267	#[cfg_attr(test, assert_instr(vpdpbusd))]
268	pub unsafe fn _mm512_mask_dpbusd_epi32(
269	src: __m512i,
270	k: __mmask16,
271	a: __m512i,
272	b: __m512i,
273	) -> __m512i {
274	let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
275	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
276	}
277
278	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
279	///
280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
281	#[inline]
282	#[target_feature(enable = "avx512vnni")]
283	#[cfg_attr(test, assert_instr(vpdpbusd))]
284	pub unsafe fn _mm512_maskz_dpbusd_epi32(
285	k: __mmask16,
286	src: __m512i,
287	a: __m512i,
288	b: __m512i,
289	) -> __m512i {
290	let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
291	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
292	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
293	}
294
295	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
296	///
297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
298	#[inline]
299	#[target_feature(enable = "avx512vnni,avx512vl")]
300	#[cfg_attr(test, assert_instr(vpdpbusd))]
301	pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
302	transmute(src:vpdpbusd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
303	}
304
305	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
306	///
307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
308	#[inline]
309	#[target_feature(enable = "avx512vnni,avx512vl")]
310	#[cfg_attr(test, assert_instr(vpdpbusd))]
311	pub unsafe fn _mm256_mask_dpbusd_epi32(
312	src: __m256i,
313	k: __mmask8,
314	a: __m256i,
315	b: __m256i,
316	) -> __m256i {
317	let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
318	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
319	}
320
321	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
322	///
323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
324	#[inline]
325	#[target_feature(enable = "avx512vnni,avx512vl")]
326	#[cfg_attr(test, assert_instr(vpdpbusd))]
327	pub unsafe fn _mm256_maskz_dpbusd_epi32(
328	k: __mmask8,
329	src: __m256i,
330	a: __m256i,
331	b: __m256i,
332	) -> __m256i {
333	let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
334	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
335	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
336	}
337
338	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
339	///
340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
341	#[inline]
342	#[target_feature(enable = "avx512vnni,avx512vl")]
343	#[cfg_attr(test, assert_instr(vpdpbusd))]
344	pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
345	transmute(src:vpdpbusd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
346	}
347
348	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
349	///
350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
351	#[inline]
352	#[target_feature(enable = "avx512vnni,avx512vl")]
353	#[cfg_attr(test, assert_instr(vpdpbusd))]
354	pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
355	let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
356	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
357	}
358
359	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360	///
361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
362	#[inline]
363	#[target_feature(enable = "avx512vnni,avx512vl")]
364	#[cfg_attr(test, assert_instr(vpdpbusd))]
365	pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
366	let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
367	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
368	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
369	}
370
371	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
372	///
373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
374	#[inline]
375	#[target_feature(enable = "avx512vnni")]
376	#[cfg_attr(test, assert_instr(vpdpbusds))]
377	pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
378	transmute(src:vpdpbusds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
379	}
380
381	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
382	///
383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
384	#[inline]
385	#[target_feature(enable = "avx512vnni")]
386	#[cfg_attr(test, assert_instr(vpdpbusds))]
387	pub unsafe fn _mm512_mask_dpbusds_epi32(
388	src: __m512i,
389	k: __mmask16,
390	a: __m512i,
391	b: __m512i,
392	) -> __m512i {
393	let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
394	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x16()))
395	}
396
397	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
398	///
399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
400	#[inline]
401	#[target_feature(enable = "avx512vnni")]
402	#[cfg_attr(test, assert_instr(vpdpbusds))]
403	pub unsafe fn _mm512_maskz_dpbusds_epi32(
404	k: __mmask16,
405	src: __m512i,
406	a: __m512i,
407	b: __m512i,
408	) -> __m512i {
409	let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
410	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
411	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
412	}
413
414	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
415	///
416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
417	#[inline]
418	#[target_feature(enable = "avx512vnni,avx512vl")]
419	#[cfg_attr(test, assert_instr(vpdpbusds))]
420	pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
421	transmute(src:vpdpbusds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
422	}
423
424	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
425	///
426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
427	#[inline]
428	#[target_feature(enable = "avx512vnni,avx512vl")]
429	#[cfg_attr(test, assert_instr(vpdpbusds))]
430	pub unsafe fn _mm256_mask_dpbusds_epi32(
431	src: __m256i,
432	k: __mmask8,
433	a: __m256i,
434	b: __m256i,
435	) -> __m256i {
436	let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
437	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x8()))
438	}
439
440	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441	///
442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
443	#[inline]
444	#[target_feature(enable = "avx512vnni,avx512vl")]
445	#[cfg_attr(test, assert_instr(vpdpbusds))]
446	pub unsafe fn _mm256_maskz_dpbusds_epi32(
447	k: __mmask8,
448	src: __m256i,
449	a: __m256i,
450	b: __m256i,
451	) -> __m256i {
452	let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
453	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
454	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
455	}
456
457	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
458	///
459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
460	#[inline]
461	#[target_feature(enable = "avx512vnni,avx512vl")]
462	#[cfg_attr(test, assert_instr(vpdpbusds))]
463	pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
464	transmute(src:vpdpbusds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
465	}
466
467	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
468	///
469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
470	#[inline]
471	#[target_feature(enable = "avx512vnni,avx512vl")]
472	#[cfg_attr(test, assert_instr(vpdpbusds))]
473	pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
474	let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
475	transmute(src:simd_select_bitmask(m:k, a:r, b:src.as_i32x4()))
476	}
477
478	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
479	///
480	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
481	#[inline]
482	#[target_feature(enable = "avx512vnni,avx512vl")]
483	#[cfg_attr(test, assert_instr(vpdpbusds))]
484	pub unsafe fn _mm_maskz_dpbusds_epi32(
485	k: __mmask8,
486	src: __m128i,
487	a: __m128i,
488	b: __m128i,
489	) -> __m128i {
490	let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
491	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
492	transmute(src:simd_select_bitmask(m:k, a:r, b:zero))
493	}
494
495	#[allow(improper_ctypes)]
496	extern "C" {
497	#[link_name = "llvm.x86.avx512.vpdpwssd.512"]
498	fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
499	#[link_name = "llvm.x86.avx512.vpdpwssd.256"]
500	fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
501	#[link_name = "llvm.x86.avx512.vpdpwssd.128"]
502	fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
503
504	#[link_name = "llvm.x86.avx512.vpdpwssds.512"]
505	fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
506	#[link_name = "llvm.x86.avx512.vpdpwssds.256"]
507	fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
508	#[link_name = "llvm.x86.avx512.vpdpwssds.128"]
509	fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
510
511	#[link_name = "llvm.x86.avx512.vpdpbusd.512"]
512	fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
513	#[link_name = "llvm.x86.avx512.vpdpbusd.256"]
514	fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
515	#[link_name = "llvm.x86.avx512.vpdpbusd.128"]
516	fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
517
518	#[link_name = "llvm.x86.avx512.vpdpbusds.512"]
519	fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
520	#[link_name = "llvm.x86.avx512.vpdpbusds.256"]
521	fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
522	#[link_name = "llvm.x86.avx512.vpdpbusds.128"]
523	fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
524	}
525
526	#[cfg(test)]
527	mod tests {
528
529	use crate::core_arch::x86::*;
530	use stdarch_test::simd_test;
531
532	#[simd_test(enable = "avx512vnni")]
533	unsafe fn test_mm512_dpwssd_epi32() {
534	let src = _mm512_set1_epi32(`1`);
535	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
536	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
537	let r = _mm512_dpwssd_epi32(src, a, b);
538	let e = _mm512_set1_epi32(`3`);
539	assert_eq_m512i(r, e);
540	}
541
542	#[simd_test(enable = "avx512vnni")]
543	unsafe fn test_mm512_mask_dpwssd_epi32() {
544	let src = _mm512_set1_epi32(`1`);
545	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
546	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
547	let r = _mm512_mask_dpwssd_epi32(src, `0b00000000_00000000`, a, b);
548	assert_eq_m512i(r, src);
549	let r = _mm512_mask_dpwssd_epi32(src, `0b11111111_11111111`, a, b);
550	let e = _mm512_set1_epi32(`3`);
551	assert_eq_m512i(r, e);
552	}
553
554	#[simd_test(enable = "avx512vnni")]
555	unsafe fn test_mm512_maskz_dpwssd_epi32() {
556	let src = _mm512_set1_epi32(`1`);
557	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
558	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
559	let r = _mm512_maskz_dpwssd_epi32(`0b00000000_00000000`, src, a, b);
560	assert_eq_m512i(r, _mm512_setzero_si512());
561	let r = _mm512_maskz_dpwssd_epi32(`0b11111111_11111111`, src, a, b);
562	let e = _mm512_set1_epi32(`3`);
563	assert_eq_m512i(r, e);
564	}
565
566	#[simd_test(enable = "avx512vnni,avx512vl")]
567	unsafe fn test_mm256_dpwssd_epi32() {
568	let src = _mm256_set1_epi32(`1`);
569	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
570	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
571	let r = _mm256_dpwssd_epi32(src, a, b);
572	let e = _mm256_set1_epi32(`3`);
573	assert_eq_m256i(r, e);
574	}
575
576	#[simd_test(enable = "avx512vnni,avx512vl")]
577	unsafe fn test_mm256_mask_dpwssd_epi32() {
578	let src = _mm256_set1_epi32(`1`);
579	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
580	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
581	let r = _mm256_mask_dpwssd_epi32(src, `0b00000000`, a, b);
582	assert_eq_m256i(r, src);
583	let r = _mm256_mask_dpwssd_epi32(src, `0b11111111`, a, b);
584	let e = _mm256_set1_epi32(`3`);
585	assert_eq_m256i(r, e);
586	}
587
588	#[simd_test(enable = "avx512vnni,avx512vl")]
589	unsafe fn test_mm256_maskz_dpwssd_epi32() {
590	let src = _mm256_set1_epi32(`1`);
591	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
592	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
593	let r = _mm256_maskz_dpwssd_epi32(`0b00000000`, src, a, b);
594	assert_eq_m256i(r, _mm256_setzero_si256());
595	let r = _mm256_maskz_dpwssd_epi32(`0b11111111`, src, a, b);
596	let e = _mm256_set1_epi32(`3`);
597	assert_eq_m256i(r, e);
598	}
599
600	#[simd_test(enable = "avx512vnni,avx512vl")]
601	unsafe fn test_mm_dpwssd_epi32() {
602	let src = _mm_set1_epi32(`1`);
603	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
604	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
605	let r = _mm_dpwssd_epi32(src, a, b);
606	let e = _mm_set1_epi32(`3`);
607	assert_eq_m128i(r, e);
608	}
609
610	#[simd_test(enable = "avx512vnni,avx512vl")]
611	unsafe fn test_mm_mask_dpwssd_epi32() {
612	let src = _mm_set1_epi32(`1`);
613	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
614	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
615	let r = _mm_mask_dpwssd_epi32(src, `0b00000000`, a, b);
616	assert_eq_m128i(r, src);
617	let r = _mm_mask_dpwssd_epi32(src, `0b00001111`, a, b);
618	let e = _mm_set1_epi32(`3`);
619	assert_eq_m128i(r, e);
620	}
621
622	#[simd_test(enable = "avx512vnni,avx512vl")]
623	unsafe fn test_mm_maskz_dpwssd_epi32() {
624	let src = _mm_set1_epi32(`1`);
625	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
626	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
627	let r = _mm_maskz_dpwssd_epi32(`0b00000000`, src, a, b);
628	assert_eq_m128i(r, _mm_setzero_si128());
629	let r = _mm_maskz_dpwssd_epi32(`0b00001111`, src, a, b);
630	let e = _mm_set1_epi32(`3`);
631	assert_eq_m128i(r, e);
632	}
633
634	#[simd_test(enable = "avx512vnni")]
635	unsafe fn test_mm512_dpwssds_epi32() {
636	let src = _mm512_set1_epi32(`1`);
637	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
638	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
639	let r = _mm512_dpwssds_epi32(src, a, b);
640	let e = _mm512_set1_epi32(`3`);
641	assert_eq_m512i(r, e);
642	}
643
644	#[simd_test(enable = "avx512vnni")]
645	unsafe fn test_mm512_mask_dpwssds_epi32() {
646	let src = _mm512_set1_epi32(`1`);
647	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
648	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
649	let r = _mm512_mask_dpwssds_epi32(src, `0b00000000_00000000`, a, b);
650	assert_eq_m512i(r, src);
651	let r = _mm512_mask_dpwssds_epi32(src, `0b11111111_11111111`, a, b);
652	let e = _mm512_set1_epi32(`3`);
653	assert_eq_m512i(r, e);
654	}
655
656	#[simd_test(enable = "avx512vnni")]
657	unsafe fn test_mm512_maskz_dpwssds_epi32() {
658	let src = _mm512_set1_epi32(`1`);
659	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
660	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
661	let r = _mm512_maskz_dpwssds_epi32(`0b00000000_00000000`, src, a, b);
662	assert_eq_m512i(r, _mm512_setzero_si512());
663	let r = _mm512_maskz_dpwssds_epi32(`0b11111111_11111111`, src, a, b);
664	let e = _mm512_set1_epi32(`3`);
665	assert_eq_m512i(r, e);
666	}
667
668	#[simd_test(enable = "avx512vnni,avx512vl")]
669	unsafe fn test_mm256_dpwssds_epi32() {
670	let src = _mm256_set1_epi32(`1`);
671	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
672	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
673	let r = _mm256_dpwssds_epi32(src, a, b);
674	let e = _mm256_set1_epi32(`3`);
675	assert_eq_m256i(r, e);
676	}
677
678	#[simd_test(enable = "avx512vnni,avx512vl")]
679	unsafe fn test_mm256_mask_dpwssds_epi32() {
680	let src = _mm256_set1_epi32(`1`);
681	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
682	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
683	let r = _mm256_mask_dpwssds_epi32(src, `0b00000000`, a, b);
684	assert_eq_m256i(r, src);
685	let r = _mm256_mask_dpwssds_epi32(src, `0b11111111`, a, b);
686	let e = _mm256_set1_epi32(`3`);
687	assert_eq_m256i(r, e);
688	}
689
690	#[simd_test(enable = "avx512vnni,avx512vl")]
691	unsafe fn test_mm256_maskz_dpwssds_epi32() {
692	let src = _mm256_set1_epi32(`1`);
693	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
694	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
695	let r = _mm256_maskz_dpwssds_epi32(`0b00000000`, src, a, b);
696	assert_eq_m256i(r, _mm256_setzero_si256());
697	let r = _mm256_maskz_dpwssds_epi32(`0b11111111`, src, a, b);
698	let e = _mm256_set1_epi32(`3`);
699	assert_eq_m256i(r, e);
700	}
701
702	#[simd_test(enable = "avx512vnni,avx512vl")]
703	unsafe fn test_mm_dpwssds_epi32() {
704	let src = _mm_set1_epi32(`1`);
705	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
706	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
707	let r = _mm_dpwssds_epi32(src, a, b);
708	let e = _mm_set1_epi32(`3`);
709	assert_eq_m128i(r, e);
710	}
711
712	#[simd_test(enable = "avx512vnni,avx512vl")]
713	unsafe fn test_mm_mask_dpwssds_epi32() {
714	let src = _mm_set1_epi32(`1`);
715	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
716	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
717	let r = _mm_mask_dpwssds_epi32(src, `0b00000000`, a, b);
718	assert_eq_m128i(r, src);
719	let r = _mm_mask_dpwssds_epi32(src, `0b00001111`, a, b);
720	let e = _mm_set1_epi32(`3`);
721	assert_eq_m128i(r, e);
722	}
723
724	#[simd_test(enable = "avx512vnni,avx512vl")]
725	unsafe fn test_mm_maskz_dpwssds_epi32() {
726	let src = _mm_set1_epi32(`1`);
727	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
728	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
729	let r = _mm_maskz_dpwssds_epi32(`0b00000000`, src, a, b);
730	assert_eq_m128i(r, _mm_setzero_si128());
731	let r = _mm_maskz_dpwssds_epi32(`0b00001111`, src, a, b);
732	let e = _mm_set1_epi32(`3`);
733	assert_eq_m128i(r, e);
734	}
735
736	#[simd_test(enable = "avx512vnni")]
737	unsafe fn test_mm512_dpbusd_epi32() {
738	let src = _mm512_set1_epi32(`1`);
739	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
740	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
741	let r = _mm512_dpbusd_epi32(src, a, b);
742	let e = _mm512_set1_epi32(`5`);
743	assert_eq_m512i(r, e);
744	}
745
746	#[simd_test(enable = "avx512vnni")]
747	unsafe fn test_mm512_mask_dpbusd_epi32() {
748	let src = _mm512_set1_epi32(`1`);
749	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
750	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
751	let r = _mm512_mask_dpbusd_epi32(src, `0b00000000_00000000`, a, b);
752	assert_eq_m512i(r, src);
753	let r = _mm512_mask_dpbusd_epi32(src, `0b11111111_11111111`, a, b);
754	let e = _mm512_set1_epi32(`5`);
755	assert_eq_m512i(r, e);
756	}
757
758	#[simd_test(enable = "avx512vnni")]
759	unsafe fn test_mm512_maskz_dpbusd_epi32() {
760	let src = _mm512_set1_epi32(`1`);
761	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
762	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
763	let r = _mm512_maskz_dpbusd_epi32(`0b00000000_00000000`, src, a, b);
764	assert_eq_m512i(r, _mm512_setzero_si512());
765	let r = _mm512_maskz_dpbusd_epi32(`0b11111111_11111111`, src, a, b);
766	let e = _mm512_set1_epi32(`5`);
767	assert_eq_m512i(r, e);
768	}
769
770	#[simd_test(enable = "avx512vnni,avx512vl")]
771	unsafe fn test_mm256_dpbusd_epi32() {
772	let src = _mm256_set1_epi32(`1`);
773	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
774	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
775	let r = _mm256_dpbusd_epi32(src, a, b);
776	let e = _mm256_set1_epi32(`5`);
777	assert_eq_m256i(r, e);
778	}
779
780	#[simd_test(enable = "avx512vnni,avx512vl")]
781	unsafe fn test_mm256_mask_dpbusd_epi32() {
782	let src = _mm256_set1_epi32(`1`);
783	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
784	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
785	let r = _mm256_mask_dpbusd_epi32(src, `0b00000000`, a, b);
786	assert_eq_m256i(r, src);
787	let r = _mm256_mask_dpbusd_epi32(src, `0b11111111`, a, b);
788	let e = _mm256_set1_epi32(`5`);
789	assert_eq_m256i(r, e);
790	}
791
792	#[simd_test(enable = "avx512vnni,avx512vl")]
793	unsafe fn test_mm256_maskz_dpbusd_epi32() {
794	let src = _mm256_set1_epi32(`1`);
795	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
796	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
797	let r = _mm256_maskz_dpbusd_epi32(`0b00000000`, src, a, b);
798	assert_eq_m256i(r, _mm256_setzero_si256());
799	let r = _mm256_maskz_dpbusd_epi32(`0b11111111`, src, a, b);
800	let e = _mm256_set1_epi32(`5`);
801	assert_eq_m256i(r, e);
802	}
803
804	#[simd_test(enable = "avx512vnni,avx512vl")]
805	unsafe fn test_mm_dpbusd_epi32() {
806	let src = _mm_set1_epi32(`1`);
807	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
808	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
809	let r = _mm_dpbusd_epi32(src, a, b);
810	let e = _mm_set1_epi32(`5`);
811	assert_eq_m128i(r, e);
812	}
813
814	#[simd_test(enable = "avx512vnni,avx512vl")]
815	unsafe fn test_mm_mask_dpbusd_epi32() {
816	let src = _mm_set1_epi32(`1`);
817	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
818	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
819	let r = _mm_mask_dpbusd_epi32(src, `0b00000000`, a, b);
820	assert_eq_m128i(r, src);
821	let r = _mm_mask_dpbusd_epi32(src, `0b00001111`, a, b);
822	let e = _mm_set1_epi32(`5`);
823	assert_eq_m128i(r, e);
824	}
825
826	#[simd_test(enable = "avx512vnni,avx512vl")]
827	unsafe fn test_mm_maskz_dpbusd_epi32() {
828	let src = _mm_set1_epi32(`1`);
829	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
830	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
831	let r = _mm_maskz_dpbusd_epi32(`0b00000000`, src, a, b);
832	assert_eq_m128i(r, _mm_setzero_si128());
833	let r = _mm_maskz_dpbusd_epi32(`0b00001111`, src, a, b);
834	let e = _mm_set1_epi32(`5`);
835	assert_eq_m128i(r, e);
836	}
837
838	#[simd_test(enable = "avx512vnni")]
839	unsafe fn test_mm512_dpbusds_epi32() {
840	let src = _mm512_set1_epi32(`1`);
841	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
842	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
843	let r = _mm512_dpbusds_epi32(src, a, b);
844	let e = _mm512_set1_epi32(`5`);
845	assert_eq_m512i(r, e);
846	}
847
848	#[simd_test(enable = "avx512vnni")]
849	unsafe fn test_mm512_mask_dpbusds_epi32() {
850	let src = _mm512_set1_epi32(`1`);
851	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
852	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
853	let r = _mm512_mask_dpbusds_epi32(src, `0b00000000_00000000`, a, b);
854	assert_eq_m512i(r, src);
855	let r = _mm512_mask_dpbusds_epi32(src, `0b11111111_11111111`, a, b);
856	let e = _mm512_set1_epi32(`5`);
857	assert_eq_m512i(r, e);
858	}
859
860	#[simd_test(enable = "avx512vnni")]
861	unsafe fn test_mm512_maskz_dpbusds_epi32() {
862	let src = _mm512_set1_epi32(`1`);
863	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
864	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
865	let r = _mm512_maskz_dpbusds_epi32(`0b00000000_00000000`, src, a, b);
866	assert_eq_m512i(r, _mm512_setzero_si512());
867	let r = _mm512_maskz_dpbusds_epi32(`0b11111111_11111111`, src, a, b);
868	let e = _mm512_set1_epi32(`5`);
869	assert_eq_m512i(r, e);
870	}
871
872	#[simd_test(enable = "avx512vnni,avx512vl")]
873	unsafe fn test_mm256_dpbusds_epi32() {
874	let src = _mm256_set1_epi32(`1`);
875	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
876	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
877	let r = _mm256_dpbusds_epi32(src, a, b);
878	let e = _mm256_set1_epi32(`5`);
879	assert_eq_m256i(r, e);
880	}
881
882	#[simd_test(enable = "avx512vnni,avx512vl")]
883	unsafe fn test_mm256_mask_dpbusds_epi32() {
884	let src = _mm256_set1_epi32(`1`);
885	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
886	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
887	let r = _mm256_mask_dpbusds_epi32(src, `0b00000000`, a, b);
888	assert_eq_m256i(r, src);
889	let r = _mm256_mask_dpbusds_epi32(src, `0b11111111`, a, b);
890	let e = _mm256_set1_epi32(`5`);
891	assert_eq_m256i(r, e);
892	}
893
894	#[simd_test(enable = "avx512vnni,avx512vl")]
895	unsafe fn test_mm256_maskz_dpbusds_epi32() {
896	let src = _mm256_set1_epi32(`1`);
897	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
898	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
899	let r = _mm256_maskz_dpbusds_epi32(`0b00000000`, src, a, b);
900	assert_eq_m256i(r, _mm256_setzero_si256());
901	let r = _mm256_maskz_dpbusds_epi32(`0b11111111`, src, a, b);
902	let e = _mm256_set1_epi32(`5`);
903	assert_eq_m256i(r, e);
904	}
905
906	#[simd_test(enable = "avx512vnni,avx512vl")]
907	unsafe fn test_mm_dpbusds_epi32() {
908	let src = _mm_set1_epi32(`1`);
909	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
910	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
911	let r = _mm_dpbusds_epi32(src, a, b);
912	let e = _mm_set1_epi32(`5`);
913	assert_eq_m128i(r, e);
914	}
915
916	#[simd_test(enable = "avx512vnni,avx512vl")]
917	unsafe fn test_mm_mask_dpbusds_epi32() {
918	let src = _mm_set1_epi32(`1`);
919	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
920	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
921	let r = _mm_mask_dpbusds_epi32(src, `0b00000000`, a, b);
922	assert_eq_m128i(r, src);
923	let r = _mm_mask_dpbusds_epi32(src, `0b00001111`, a, b);
924	let e = _mm_set1_epi32(`5`);
925	assert_eq_m128i(r, e);
926	}
927
928	#[simd_test(enable = "avx512vnni,avx512vl")]
929	unsafe fn test_mm_maskz_dpbusds_epi32() {
930	let src = _mm_set1_epi32(`1`);
931	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
932	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
933	let r = _mm_maskz_dpbusds_epi32(`0b00000000`, src, a, b);
934	assert_eq_m128i(r, _mm_setzero_si128());
935	let r = _mm_maskz_dpbusds_epi32(`0b00001111`, src, a, b);
936	let e = _mm_set1_epi32(`5`);
937	assert_eq_m128i(r, e);
938	}
939	}
940