avx512vnni.rs source code [crates/core_arch/src/x86/avx512vnni.rs]

1	use crate::core_arch::{simd::, x86::};
2	use crate::intrinsics::simd::*;
3
4	#[cfg(test)]
5	use stdarch_test::assert_instr;
6
7	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8	///
9	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10	#[inline]
11	#[target_feature(enable = "avx512vnni")]
12	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13	#[cfg_attr(test, assert_instr(vpdpwssd))]
14	pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15	transmute(src:vpdpwssd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
16	}
17
18	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19	///
20	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21	#[inline]
22	#[target_feature(enable = "avx512vnni")]
23	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24	#[cfg_attr(test, assert_instr(vpdpwssd))]
25	pub unsafe fn _mm512_mask_dpwssd_epi32(
26	src: __m512i,
27	k: __mmask16,
28	a: __m512i,
29	b: __m512i,
30	) -> __m512i {
31	let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
32	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
33	}
34
35	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36	///
37	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
38	#[inline]
39	#[target_feature(enable = "avx512vnni")]
40	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41	#[cfg_attr(test, assert_instr(vpdpwssd))]
42	pub unsafe fn _mm512_maskz_dpwssd_epi32(
43	k: __mmask16,
44	src: __m512i,
45	a: __m512i,
46	b: __m512i,
47	) -> __m512i {
48	let r: i32x16 = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
49	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
50	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
51	}
52
53	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
54	///
55	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
56	#[inline]
57	#[target_feature(enable = "avx512vnni,avx512vl")]
58	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59	#[cfg_attr(test, assert_instr(vpdpwssd))]
60	pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
61	transmute(src:vpdpwssd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
62	}
63
64	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
65	///
66	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
67	#[inline]
68	#[target_feature(enable = "avx512vnni,avx512vl")]
69	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
70	#[cfg_attr(test, assert_instr(vpdpwssd))]
71	pub unsafe fn _mm256_mask_dpwssd_epi32(
72	src: __m256i,
73	k: __mmask8,
74	a: __m256i,
75	b: __m256i,
76	) -> __m256i {
77	let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
78	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
79	}
80
81	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
82	///
83	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
84	#[inline]
85	#[target_feature(enable = "avx512vnni,avx512vl")]
86	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87	#[cfg_attr(test, assert_instr(vpdpwssd))]
88	pub unsafe fn _mm256_maskz_dpwssd_epi32(
89	k: __mmask8,
90	src: __m256i,
91	a: __m256i,
92	b: __m256i,
93	) -> __m256i {
94	let r: i32x8 = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
95	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
96	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
97	}
98
99	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
100	///
101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
102	#[inline]
103	#[target_feature(enable = "avx512vnni,avx512vl")]
104	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105	#[cfg_attr(test, assert_instr(vpdpwssd))]
106	pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
107	transmute(src:vpdpwssd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
108	}
109
110	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
111	///
112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
113	#[inline]
114	#[target_feature(enable = "avx512vnni,avx512vl")]
115	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
116	#[cfg_attr(test, assert_instr(vpdpwssd))]
117	pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
118	let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
119	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
120	}
121
122	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
123	///
124	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
125	#[inline]
126	#[target_feature(enable = "avx512vnni,avx512vl")]
127	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
128	#[cfg_attr(test, assert_instr(vpdpwssd))]
129	pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
130	let r: i32x4 = _mm_dpwssd_epi32(src, a, b).as_i32x4();
131	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
132	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
133	}
134
135	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
136	///
137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
138	#[inline]
139	#[target_feature(enable = "avx512vnni")]
140	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
141	#[cfg_attr(test, assert_instr(vpdpwssds))]
142	pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
143	transmute(src:vpdpwssds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
144	}
145
146	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
147	///
148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
149	#[inline]
150	#[target_feature(enable = "avx512vnni")]
151	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
152	#[cfg_attr(test, assert_instr(vpdpwssds))]
153	pub unsafe fn _mm512_mask_dpwssds_epi32(
154	src: __m512i,
155	k: __mmask16,
156	a: __m512i,
157	b: __m512i,
158	) -> __m512i {
159	let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
160	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
161	}
162
163	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
164	///
165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
166	#[inline]
167	#[target_feature(enable = "avx512vnni")]
168	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
169	#[cfg_attr(test, assert_instr(vpdpwssds))]
170	pub unsafe fn _mm512_maskz_dpwssds_epi32(
171	k: __mmask16,
172	src: __m512i,
173	a: __m512i,
174	b: __m512i,
175	) -> __m512i {
176	let r: i32x16 = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
177	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
178	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
179	}
180
181	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
182	///
183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
184	#[inline]
185	#[target_feature(enable = "avx512vnni,avx512vl")]
186	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
187	#[cfg_attr(test, assert_instr(vpdpwssds))]
188	pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
189	transmute(src:vpdpwssds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
190	}
191
192	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
193	///
194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
195	#[inline]
196	#[target_feature(enable = "avx512vnni,avx512vl")]
197	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198	#[cfg_attr(test, assert_instr(vpdpwssds))]
199	pub unsafe fn _mm256_mask_dpwssds_epi32(
200	src: __m256i,
201	k: __mmask8,
202	a: __m256i,
203	b: __m256i,
204	) -> __m256i {
205	let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
206	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
207	}
208
209	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
210	///
211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
212	#[inline]
213	#[target_feature(enable = "avx512vnni,avx512vl")]
214	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
215	#[cfg_attr(test, assert_instr(vpdpwssds))]
216	pub unsafe fn _mm256_maskz_dpwssds_epi32(
217	k: __mmask8,
218	src: __m256i,
219	a: __m256i,
220	b: __m256i,
221	) -> __m256i {
222	let r: i32x8 = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
223	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
224	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
225	}
226
227	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
228	///
229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
230	#[inline]
231	#[target_feature(enable = "avx512vnni,avx512vl")]
232	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
233	#[cfg_attr(test, assert_instr(vpdpwssds))]
234	pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
235	transmute(src:vpdpwssds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
236	}
237
238	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
239	///
240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
241	#[inline]
242	#[target_feature(enable = "avx512vnni,avx512vl")]
243	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
244	#[cfg_attr(test, assert_instr(vpdpwssds))]
245	pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
246	let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
247	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
248	}
249
250	/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
251	///
252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
253	#[inline]
254	#[target_feature(enable = "avx512vnni,avx512vl")]
255	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
256	#[cfg_attr(test, assert_instr(vpdpwssds))]
257	pub unsafe fn _mm_maskz_dpwssds_epi32(
258	k: __mmask8,
259	src: __m128i,
260	a: __m128i,
261	b: __m128i,
262	) -> __m128i {
263	let r: i32x4 = _mm_dpwssds_epi32(src, a, b).as_i32x4();
264	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
265	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
266	}
267
268	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
269	///
270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
271	#[inline]
272	#[target_feature(enable = "avx512vnni")]
273	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
274	#[cfg_attr(test, assert_instr(vpdpbusd))]
275	pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
276	transmute(src:vpdpbusd(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
277	}
278
279	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
280	///
281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
282	#[inline]
283	#[target_feature(enable = "avx512vnni")]
284	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
285	#[cfg_attr(test, assert_instr(vpdpbusd))]
286	pub unsafe fn _mm512_mask_dpbusd_epi32(
287	src: __m512i,
288	k: __mmask16,
289	a: __m512i,
290	b: __m512i,
291	) -> __m512i {
292	let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
293	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
294	}
295
296	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
297	///
298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
299	#[inline]
300	#[target_feature(enable = "avx512vnni")]
301	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
302	#[cfg_attr(test, assert_instr(vpdpbusd))]
303	pub unsafe fn _mm512_maskz_dpbusd_epi32(
304	k: __mmask16,
305	src: __m512i,
306	a: __m512i,
307	b: __m512i,
308	) -> __m512i {
309	let r: i32x16 = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
310	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
311	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
312	}
313
314	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
315	///
316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
317	#[inline]
318	#[target_feature(enable = "avx512vnni,avx512vl")]
319	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
320	#[cfg_attr(test, assert_instr(vpdpbusd))]
321	pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
322	transmute(src:vpdpbusd256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
323	}
324
325	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
326	///
327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
328	#[inline]
329	#[target_feature(enable = "avx512vnni,avx512vl")]
330	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331	#[cfg_attr(test, assert_instr(vpdpbusd))]
332	pub unsafe fn _mm256_mask_dpbusd_epi32(
333	src: __m256i,
334	k: __mmask8,
335	a: __m256i,
336	b: __m256i,
337	) -> __m256i {
338	let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
339	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
340	}
341
342	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
343	///
344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
345	#[inline]
346	#[target_feature(enable = "avx512vnni,avx512vl")]
347	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
348	#[cfg_attr(test, assert_instr(vpdpbusd))]
349	pub unsafe fn _mm256_maskz_dpbusd_epi32(
350	k: __mmask8,
351	src: __m256i,
352	a: __m256i,
353	b: __m256i,
354	) -> __m256i {
355	let r: i32x8 = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
356	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
357	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
358	}
359
360	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
361	///
362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
363	#[inline]
364	#[target_feature(enable = "avx512vnni,avx512vl")]
365	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
366	#[cfg_attr(test, assert_instr(vpdpbusd))]
367	pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
368	transmute(src:vpdpbusd128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
369	}
370
371	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
372	///
373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
374	#[inline]
375	#[target_feature(enable = "avx512vnni,avx512vl")]
376	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
377	#[cfg_attr(test, assert_instr(vpdpbusd))]
378	pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
379	let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
380	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
381	}
382
383	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
384	///
385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
386	#[inline]
387	#[target_feature(enable = "avx512vnni,avx512vl")]
388	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
389	#[cfg_attr(test, assert_instr(vpdpbusd))]
390	pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
391	let r: i32x4 = _mm_dpbusd_epi32(src, a, b).as_i32x4();
392	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
393	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
394	}
395
396	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
397	///
398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
399	#[inline]
400	#[target_feature(enable = "avx512vnni")]
401	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
402	#[cfg_attr(test, assert_instr(vpdpbusds))]
403	pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
404	transmute(src:vpdpbusds(src:src.as_i32x16(), a:a.as_i32x16(), b:b.as_i32x16()))
405	}
406
407	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408	///
409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
410	#[inline]
411	#[target_feature(enable = "avx512vnni")]
412	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
413	#[cfg_attr(test, assert_instr(vpdpbusds))]
414	pub unsafe fn _mm512_mask_dpbusds_epi32(
415	src: __m512i,
416	k: __mmask16,
417	a: __m512i,
418	b: __m512i,
419	) -> __m512i {
420	let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
421	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
422	}
423
424	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
425	///
426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
427	#[inline]
428	#[target_feature(enable = "avx512vnni")]
429	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
430	#[cfg_attr(test, assert_instr(vpdpbusds))]
431	pub unsafe fn _mm512_maskz_dpbusds_epi32(
432	k: __mmask16,
433	src: __m512i,
434	a: __m512i,
435	b: __m512i,
436	) -> __m512i {
437	let r: i32x16 = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
438	let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
439	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
440	}
441
442	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
443	///
444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
445	#[inline]
446	#[target_feature(enable = "avx512vnni,avx512vl")]
447	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
448	#[cfg_attr(test, assert_instr(vpdpbusds))]
449	pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
450	transmute(src:vpdpbusds256(src:src.as_i32x8(), a:a.as_i32x8(), b:b.as_i32x8()))
451	}
452
453	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
454	///
455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
456	#[inline]
457	#[target_feature(enable = "avx512vnni,avx512vl")]
458	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
459	#[cfg_attr(test, assert_instr(vpdpbusds))]
460	pub unsafe fn _mm256_mask_dpbusds_epi32(
461	src: __m256i,
462	k: __mmask8,
463	a: __m256i,
464	b: __m256i,
465	) -> __m256i {
466	let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
467	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
468	}
469
470	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
471	///
472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
473	#[inline]
474	#[target_feature(enable = "avx512vnni,avx512vl")]
475	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
476	#[cfg_attr(test, assert_instr(vpdpbusds))]
477	pub unsafe fn _mm256_maskz_dpbusds_epi32(
478	k: __mmask8,
479	src: __m256i,
480	a: __m256i,
481	b: __m256i,
482	) -> __m256i {
483	let r: i32x8 = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
484	let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
485	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
486	}
487
488	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
489	///
490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
491	#[inline]
492	#[target_feature(enable = "avx512vnni,avx512vl")]
493	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
494	#[cfg_attr(test, assert_instr(vpdpbusds))]
495	pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
496	transmute(src:vpdpbusds128(src:src.as_i32x4(), a:a.as_i32x4(), b:b.as_i32x4()))
497	}
498
499	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
500	///
501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
502	#[inline]
503	#[target_feature(enable = "avx512vnni,avx512vl")]
504	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505	#[cfg_attr(test, assert_instr(vpdpbusds))]
506	pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507	let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
508	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
509	}
510
511	/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
512	///
513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
514	#[inline]
515	#[target_feature(enable = "avx512vnni,avx512vl")]
516	#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
517	#[cfg_attr(test, assert_instr(vpdpbusds))]
518	pub unsafe fn _mm_maskz_dpbusds_epi32(
519	k: __mmask8,
520	src: __m128i,
521	a: __m128i,
522	b: __m128i,
523	) -> __m128i {
524	let r: i32x4 = _mm_dpbusds_epi32(src, a, b).as_i32x4();
525	let zero: i32x4 = _mm_setzero_si128().as_i32x4();
526	transmute(src:simd_select_bitmask(m:k, yes:r, no:zero))
527	}
528
529	#[allow(improper_ctypes)]
530	extern "C" {
531	#[link_name = "llvm.x86.avx512.vpdpwssd.512"]
532	fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
533	#[link_name = "llvm.x86.avx512.vpdpwssd.256"]
534	fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
535	#[link_name = "llvm.x86.avx512.vpdpwssd.128"]
536	fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
537
538	#[link_name = "llvm.x86.avx512.vpdpwssds.512"]
539	fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
540	#[link_name = "llvm.x86.avx512.vpdpwssds.256"]
541	fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
542	#[link_name = "llvm.x86.avx512.vpdpwssds.128"]
543	fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
544
545	#[link_name = "llvm.x86.avx512.vpdpbusd.512"]
546	fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
547	#[link_name = "llvm.x86.avx512.vpdpbusd.256"]
548	fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
549	#[link_name = "llvm.x86.avx512.vpdpbusd.128"]
550	fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
551
552	#[link_name = "llvm.x86.avx512.vpdpbusds.512"]
553	fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
554	#[link_name = "llvm.x86.avx512.vpdpbusds.256"]
555	fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
556	#[link_name = "llvm.x86.avx512.vpdpbusds.128"]
557	fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
558	}
559
560	#[cfg(test)]
561	mod tests {
562
563	use crate::core_arch::x86::*;
564	use stdarch_test::simd_test;
565
566	#[simd_test(enable = "avx512vnni")]
567	unsafe fn test_mm512_dpwssd_epi32() {
568	let src = _mm512_set1_epi32(`1`);
569	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
570	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
571	let r = _mm512_dpwssd_epi32(src, a, b);
572	let e = _mm512_set1_epi32(`3`);
573	assert_eq_m512i(r, e);
574	}
575
576	#[simd_test(enable = "avx512vnni")]
577	unsafe fn test_mm512_mask_dpwssd_epi32() {
578	let src = _mm512_set1_epi32(`1`);
579	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
580	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
581	let r = _mm512_mask_dpwssd_epi32(src, `0b00000000_00000000`, a, b);
582	assert_eq_m512i(r, src);
583	let r = _mm512_mask_dpwssd_epi32(src, `0b11111111_11111111`, a, b);
584	let e = _mm512_set1_epi32(`3`);
585	assert_eq_m512i(r, e);
586	}
587
588	#[simd_test(enable = "avx512vnni")]
589	unsafe fn test_mm512_maskz_dpwssd_epi32() {
590	let src = _mm512_set1_epi32(`1`);
591	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
592	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
593	let r = _mm512_maskz_dpwssd_epi32(`0b00000000_00000000`, src, a, b);
594	assert_eq_m512i(r, _mm512_setzero_si512());
595	let r = _mm512_maskz_dpwssd_epi32(`0b11111111_11111111`, src, a, b);
596	let e = _mm512_set1_epi32(`3`);
597	assert_eq_m512i(r, e);
598	}
599
600	#[simd_test(enable = "avx512vnni,avx512vl")]
601	unsafe fn test_mm256_dpwssd_epi32() {
602	let src = _mm256_set1_epi32(`1`);
603	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
604	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
605	let r = _mm256_dpwssd_epi32(src, a, b);
606	let e = _mm256_set1_epi32(`3`);
607	assert_eq_m256i(r, e);
608	}
609
610	#[simd_test(enable = "avx512vnni,avx512vl")]
611	unsafe fn test_mm256_mask_dpwssd_epi32() {
612	let src = _mm256_set1_epi32(`1`);
613	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
614	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
615	let r = _mm256_mask_dpwssd_epi32(src, `0b00000000`, a, b);
616	assert_eq_m256i(r, src);
617	let r = _mm256_mask_dpwssd_epi32(src, `0b11111111`, a, b);
618	let e = _mm256_set1_epi32(`3`);
619	assert_eq_m256i(r, e);
620	}
621
622	#[simd_test(enable = "avx512vnni,avx512vl")]
623	unsafe fn test_mm256_maskz_dpwssd_epi32() {
624	let src = _mm256_set1_epi32(`1`);
625	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
626	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
627	let r = _mm256_maskz_dpwssd_epi32(`0b00000000`, src, a, b);
628	assert_eq_m256i(r, _mm256_setzero_si256());
629	let r = _mm256_maskz_dpwssd_epi32(`0b11111111`, src, a, b);
630	let e = _mm256_set1_epi32(`3`);
631	assert_eq_m256i(r, e);
632	}
633
634	#[simd_test(enable = "avx512vnni,avx512vl")]
635	unsafe fn test_mm_dpwssd_epi32() {
636	let src = _mm_set1_epi32(`1`);
637	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
638	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
639	let r = _mm_dpwssd_epi32(src, a, b);
640	let e = _mm_set1_epi32(`3`);
641	assert_eq_m128i(r, e);
642	}
643
644	#[simd_test(enable = "avx512vnni,avx512vl")]
645	unsafe fn test_mm_mask_dpwssd_epi32() {
646	let src = _mm_set1_epi32(`1`);
647	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
648	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
649	let r = _mm_mask_dpwssd_epi32(src, `0b00000000`, a, b);
650	assert_eq_m128i(r, src);
651	let r = _mm_mask_dpwssd_epi32(src, `0b00001111`, a, b);
652	let e = _mm_set1_epi32(`3`);
653	assert_eq_m128i(r, e);
654	}
655
656	#[simd_test(enable = "avx512vnni,avx512vl")]
657	unsafe fn test_mm_maskz_dpwssd_epi32() {
658	let src = _mm_set1_epi32(`1`);
659	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
660	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
661	let r = _mm_maskz_dpwssd_epi32(`0b00000000`, src, a, b);
662	assert_eq_m128i(r, _mm_setzero_si128());
663	let r = _mm_maskz_dpwssd_epi32(`0b00001111`, src, a, b);
664	let e = _mm_set1_epi32(`3`);
665	assert_eq_m128i(r, e);
666	}
667
668	#[simd_test(enable = "avx512vnni")]
669	unsafe fn test_mm512_dpwssds_epi32() {
670	let src = _mm512_set1_epi32(`1`);
671	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
672	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
673	let r = _mm512_dpwssds_epi32(src, a, b);
674	let e = _mm512_set1_epi32(`3`);
675	assert_eq_m512i(r, e);
676	}
677
678	#[simd_test(enable = "avx512vnni")]
679	unsafe fn test_mm512_mask_dpwssds_epi32() {
680	let src = _mm512_set1_epi32(`1`);
681	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
682	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
683	let r = _mm512_mask_dpwssds_epi32(src, `0b00000000_00000000`, a, b);
684	assert_eq_m512i(r, src);
685	let r = _mm512_mask_dpwssds_epi32(src, `0b11111111_11111111`, a, b);
686	let e = _mm512_set1_epi32(`3`);
687	assert_eq_m512i(r, e);
688	}
689
690	#[simd_test(enable = "avx512vnni")]
691	unsafe fn test_mm512_maskz_dpwssds_epi32() {
692	let src = _mm512_set1_epi32(`1`);
693	let a = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
694	let b = _mm512_set1_epi32(`1` << `16` \| `1` << `0`);
695	let r = _mm512_maskz_dpwssds_epi32(`0b00000000_00000000`, src, a, b);
696	assert_eq_m512i(r, _mm512_setzero_si512());
697	let r = _mm512_maskz_dpwssds_epi32(`0b11111111_11111111`, src, a, b);
698	let e = _mm512_set1_epi32(`3`);
699	assert_eq_m512i(r, e);
700	}
701
702	#[simd_test(enable = "avx512vnni,avx512vl")]
703	unsafe fn test_mm256_dpwssds_epi32() {
704	let src = _mm256_set1_epi32(`1`);
705	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
706	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
707	let r = _mm256_dpwssds_epi32(src, a, b);
708	let e = _mm256_set1_epi32(`3`);
709	assert_eq_m256i(r, e);
710	}
711
712	#[simd_test(enable = "avx512vnni,avx512vl")]
713	unsafe fn test_mm256_mask_dpwssds_epi32() {
714	let src = _mm256_set1_epi32(`1`);
715	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
716	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
717	let r = _mm256_mask_dpwssds_epi32(src, `0b00000000`, a, b);
718	assert_eq_m256i(r, src);
719	let r = _mm256_mask_dpwssds_epi32(src, `0b11111111`, a, b);
720	let e = _mm256_set1_epi32(`3`);
721	assert_eq_m256i(r, e);
722	}
723
724	#[simd_test(enable = "avx512vnni,avx512vl")]
725	unsafe fn test_mm256_maskz_dpwssds_epi32() {
726	let src = _mm256_set1_epi32(`1`);
727	let a = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
728	let b = _mm256_set1_epi32(`1` << `16` \| `1` << `0`);
729	let r = _mm256_maskz_dpwssds_epi32(`0b00000000`, src, a, b);
730	assert_eq_m256i(r, _mm256_setzero_si256());
731	let r = _mm256_maskz_dpwssds_epi32(`0b11111111`, src, a, b);
732	let e = _mm256_set1_epi32(`3`);
733	assert_eq_m256i(r, e);
734	}
735
736	#[simd_test(enable = "avx512vnni,avx512vl")]
737	unsafe fn test_mm_dpwssds_epi32() {
738	let src = _mm_set1_epi32(`1`);
739	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
740	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
741	let r = _mm_dpwssds_epi32(src, a, b);
742	let e = _mm_set1_epi32(`3`);
743	assert_eq_m128i(r, e);
744	}
745
746	#[simd_test(enable = "avx512vnni,avx512vl")]
747	unsafe fn test_mm_mask_dpwssds_epi32() {
748	let src = _mm_set1_epi32(`1`);
749	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
750	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
751	let r = _mm_mask_dpwssds_epi32(src, `0b00000000`, a, b);
752	assert_eq_m128i(r, src);
753	let r = _mm_mask_dpwssds_epi32(src, `0b00001111`, a, b);
754	let e = _mm_set1_epi32(`3`);
755	assert_eq_m128i(r, e);
756	}
757
758	#[simd_test(enable = "avx512vnni,avx512vl")]
759	unsafe fn test_mm_maskz_dpwssds_epi32() {
760	let src = _mm_set1_epi32(`1`);
761	let a = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
762	let b = _mm_set1_epi32(`1` << `16` \| `1` << `0`);
763	let r = _mm_maskz_dpwssds_epi32(`0b00000000`, src, a, b);
764	assert_eq_m128i(r, _mm_setzero_si128());
765	let r = _mm_maskz_dpwssds_epi32(`0b00001111`, src, a, b);
766	let e = _mm_set1_epi32(`3`);
767	assert_eq_m128i(r, e);
768	}
769
770	#[simd_test(enable = "avx512vnni")]
771	unsafe fn test_mm512_dpbusd_epi32() {
772	let src = _mm512_set1_epi32(`1`);
773	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
774	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
775	let r = _mm512_dpbusd_epi32(src, a, b);
776	let e = _mm512_set1_epi32(`5`);
777	assert_eq_m512i(r, e);
778	}
779
780	#[simd_test(enable = "avx512vnni")]
781	unsafe fn test_mm512_mask_dpbusd_epi32() {
782	let src = _mm512_set1_epi32(`1`);
783	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
784	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
785	let r = _mm512_mask_dpbusd_epi32(src, `0b00000000_00000000`, a, b);
786	assert_eq_m512i(r, src);
787	let r = _mm512_mask_dpbusd_epi32(src, `0b11111111_11111111`, a, b);
788	let e = _mm512_set1_epi32(`5`);
789	assert_eq_m512i(r, e);
790	}
791
792	#[simd_test(enable = "avx512vnni")]
793	unsafe fn test_mm512_maskz_dpbusd_epi32() {
794	let src = _mm512_set1_epi32(`1`);
795	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
796	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
797	let r = _mm512_maskz_dpbusd_epi32(`0b00000000_00000000`, src, a, b);
798	assert_eq_m512i(r, _mm512_setzero_si512());
799	let r = _mm512_maskz_dpbusd_epi32(`0b11111111_11111111`, src, a, b);
800	let e = _mm512_set1_epi32(`5`);
801	assert_eq_m512i(r, e);
802	}
803
804	#[simd_test(enable = "avx512vnni,avx512vl")]
805	unsafe fn test_mm256_dpbusd_epi32() {
806	let src = _mm256_set1_epi32(`1`);
807	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
808	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
809	let r = _mm256_dpbusd_epi32(src, a, b);
810	let e = _mm256_set1_epi32(`5`);
811	assert_eq_m256i(r, e);
812	}
813
814	#[simd_test(enable = "avx512vnni,avx512vl")]
815	unsafe fn test_mm256_mask_dpbusd_epi32() {
816	let src = _mm256_set1_epi32(`1`);
817	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
818	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
819	let r = _mm256_mask_dpbusd_epi32(src, `0b00000000`, a, b);
820	assert_eq_m256i(r, src);
821	let r = _mm256_mask_dpbusd_epi32(src, `0b11111111`, a, b);
822	let e = _mm256_set1_epi32(`5`);
823	assert_eq_m256i(r, e);
824	}
825
826	#[simd_test(enable = "avx512vnni,avx512vl")]
827	unsafe fn test_mm256_maskz_dpbusd_epi32() {
828	let src = _mm256_set1_epi32(`1`);
829	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
830	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
831	let r = _mm256_maskz_dpbusd_epi32(`0b00000000`, src, a, b);
832	assert_eq_m256i(r, _mm256_setzero_si256());
833	let r = _mm256_maskz_dpbusd_epi32(`0b11111111`, src, a, b);
834	let e = _mm256_set1_epi32(`5`);
835	assert_eq_m256i(r, e);
836	}
837
838	#[simd_test(enable = "avx512vnni,avx512vl")]
839	unsafe fn test_mm_dpbusd_epi32() {
840	let src = _mm_set1_epi32(`1`);
841	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
842	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
843	let r = _mm_dpbusd_epi32(src, a, b);
844	let e = _mm_set1_epi32(`5`);
845	assert_eq_m128i(r, e);
846	}
847
848	#[simd_test(enable = "avx512vnni,avx512vl")]
849	unsafe fn test_mm_mask_dpbusd_epi32() {
850	let src = _mm_set1_epi32(`1`);
851	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
852	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
853	let r = _mm_mask_dpbusd_epi32(src, `0b00000000`, a, b);
854	assert_eq_m128i(r, src);
855	let r = _mm_mask_dpbusd_epi32(src, `0b00001111`, a, b);
856	let e = _mm_set1_epi32(`5`);
857	assert_eq_m128i(r, e);
858	}
859
860	#[simd_test(enable = "avx512vnni,avx512vl")]
861	unsafe fn test_mm_maskz_dpbusd_epi32() {
862	let src = _mm_set1_epi32(`1`);
863	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
864	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
865	let r = _mm_maskz_dpbusd_epi32(`0b00000000`, src, a, b);
866	assert_eq_m128i(r, _mm_setzero_si128());
867	let r = _mm_maskz_dpbusd_epi32(`0b00001111`, src, a, b);
868	let e = _mm_set1_epi32(`5`);
869	assert_eq_m128i(r, e);
870	}
871
872	#[simd_test(enable = "avx512vnni")]
873	unsafe fn test_mm512_dpbusds_epi32() {
874	let src = _mm512_set1_epi32(`1`);
875	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
876	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
877	let r = _mm512_dpbusds_epi32(src, a, b);
878	let e = _mm512_set1_epi32(`5`);
879	assert_eq_m512i(r, e);
880	}
881
882	#[simd_test(enable = "avx512vnni")]
883	unsafe fn test_mm512_mask_dpbusds_epi32() {
884	let src = _mm512_set1_epi32(`1`);
885	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
886	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
887	let r = _mm512_mask_dpbusds_epi32(src, `0b00000000_00000000`, a, b);
888	assert_eq_m512i(r, src);
889	let r = _mm512_mask_dpbusds_epi32(src, `0b11111111_11111111`, a, b);
890	let e = _mm512_set1_epi32(`5`);
891	assert_eq_m512i(r, e);
892	}
893
894	#[simd_test(enable = "avx512vnni")]
895	unsafe fn test_mm512_maskz_dpbusds_epi32() {
896	let src = _mm512_set1_epi32(`1`);
897	let a = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
898	let b = _mm512_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
899	let r = _mm512_maskz_dpbusds_epi32(`0b00000000_00000000`, src, a, b);
900	assert_eq_m512i(r, _mm512_setzero_si512());
901	let r = _mm512_maskz_dpbusds_epi32(`0b11111111_11111111`, src, a, b);
902	let e = _mm512_set1_epi32(`5`);
903	assert_eq_m512i(r, e);
904	}
905
906	#[simd_test(enable = "avx512vnni,avx512vl")]
907	unsafe fn test_mm256_dpbusds_epi32() {
908	let src = _mm256_set1_epi32(`1`);
909	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
910	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
911	let r = _mm256_dpbusds_epi32(src, a, b);
912	let e = _mm256_set1_epi32(`5`);
913	assert_eq_m256i(r, e);
914	}
915
916	#[simd_test(enable = "avx512vnni,avx512vl")]
917	unsafe fn test_mm256_mask_dpbusds_epi32() {
918	let src = _mm256_set1_epi32(`1`);
919	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
920	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
921	let r = _mm256_mask_dpbusds_epi32(src, `0b00000000`, a, b);
922	assert_eq_m256i(r, src);
923	let r = _mm256_mask_dpbusds_epi32(src, `0b11111111`, a, b);
924	let e = _mm256_set1_epi32(`5`);
925	assert_eq_m256i(r, e);
926	}
927
928	#[simd_test(enable = "avx512vnni,avx512vl")]
929	unsafe fn test_mm256_maskz_dpbusds_epi32() {
930	let src = _mm256_set1_epi32(`1`);
931	let a = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
932	let b = _mm256_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
933	let r = _mm256_maskz_dpbusds_epi32(`0b00000000`, src, a, b);
934	assert_eq_m256i(r, _mm256_setzero_si256());
935	let r = _mm256_maskz_dpbusds_epi32(`0b11111111`, src, a, b);
936	let e = _mm256_set1_epi32(`5`);
937	assert_eq_m256i(r, e);
938	}
939
940	#[simd_test(enable = "avx512vnni,avx512vl")]
941	unsafe fn test_mm_dpbusds_epi32() {
942	let src = _mm_set1_epi32(`1`);
943	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
944	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
945	let r = _mm_dpbusds_epi32(src, a, b);
946	let e = _mm_set1_epi32(`5`);
947	assert_eq_m128i(r, e);
948	}
949
950	#[simd_test(enable = "avx512vnni,avx512vl")]
951	unsafe fn test_mm_mask_dpbusds_epi32() {
952	let src = _mm_set1_epi32(`1`);
953	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
954	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
955	let r = _mm_mask_dpbusds_epi32(src, `0b00000000`, a, b);
956	assert_eq_m128i(r, src);
957	let r = _mm_mask_dpbusds_epi32(src, `0b00001111`, a, b);
958	let e = _mm_set1_epi32(`5`);
959	assert_eq_m128i(r, e);
960	}
961
962	#[simd_test(enable = "avx512vnni,avx512vl")]
963	unsafe fn test_mm_maskz_dpbusds_epi32() {
964	let src = _mm_set1_epi32(`1`);
965	let a = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
966	let b = _mm_set1_epi32(`1` << `24` \| `1` << `16` \| `1` << `8` \| `1` << `0`);
967	let r = _mm_maskz_dpbusds_epi32(`0b00000000`, src, a, b);
968	assert_eq_m128i(r, _mm_setzero_si128());
969	let r = _mm_maskz_dpbusds_epi32(`0b00001111`, src, a, b);
970	let e = _mm_set1_epi32(`5`);
971	assert_eq_m128i(r, e);
972	}
973	}
974