avx512fp16.rs source code [crates/core_arch/src/x86/avx512fp16.rs]

1	use crate::arch::asm;
2	use crate::core_arch::{simd::, x86::};
3	use crate::intrinsics::{fmaf16, simd::*};
4	use crate::ptr;
5
6	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7	///
8	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9	#[inline]
10	#[target_feature(enable = "avx512fp16")]
11	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12	pub fn _mm_set_ph(
13	e7: f16,
14	e6: f16,
15	e5: f16,
16	e4: f16,
17	e3: f16,
18	e2: f16,
19	e1: f16,
20	e0: f16,
21	) -> __m128h {
22	__m128h([e0, e1, e2, e3, e4, e5, e6, e7])
23	}
24
25	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
26	///
27	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
28	#[inline]
29	#[target_feature(enable = "avx512fp16")]
30	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
31	pub fn _mm256_set_ph(
32	e15: f16,
33	e14: f16,
34	e13: f16,
35	e12: f16,
36	e11: f16,
37	e10: f16,
38	e9: f16,
39	e8: f16,
40	e7: f16,
41	e6: f16,
42	e5: f16,
43	e4: f16,
44	e3: f16,
45	e2: f16,
46	e1: f16,
47	e0: f16,
48	) -> __m256h {
49	__m256h([
50	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
51	])
52	}
53
54	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
55	///
56	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
57	#[inline]
58	#[target_feature(enable = "avx512fp16")]
59	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
60	pub fn _mm512_set_ph(
61	e31: f16,
62	e30: f16,
63	e29: f16,
64	e28: f16,
65	e27: f16,
66	e26: f16,
67	e25: f16,
68	e24: f16,
69	e23: f16,
70	e22: f16,
71	e21: f16,
72	e20: f16,
73	e19: f16,
74	e18: f16,
75	e17: f16,
76	e16: f16,
77	e15: f16,
78	e14: f16,
79	e13: f16,
80	e12: f16,
81	e11: f16,
82	e10: f16,
83	e9: f16,
84	e8: f16,
85	e7: f16,
86	e6: f16,
87	e5: f16,
88	e4: f16,
89	e3: f16,
90	e2: f16,
91	e1: f16,
92	e0: f16,
93	) -> __m512h {
94	__m512h([
95	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
96	e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
97	])
98	}
99
100	/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
101	/// the upper 7 elements.
102	///
103	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
104	#[inline]
105	#[target_feature(enable = "avx512fp16")]
106	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
107	pub fn _mm_set_sh(a: f16) -> __m128h {
108	__m128h([a, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`])
109	}
110
111	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
112	///
113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
114	#[inline]
115	#[target_feature(enable = "avx512fp16")]
116	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
117	pub fn _mm_set1_ph(a: f16) -> __m128h {
118	unsafe { transmute(src:f16x8::splat(a)) }
119	}
120
121	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
122	///
123	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
124	#[inline]
125	#[target_feature(enable = "avx512fp16")]
126	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127	pub fn _mm256_set1_ph(a: f16) -> __m256h {
128	unsafe { transmute(src:f16x16::splat(a)) }
129	}
130
131	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
132	///
133	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
134	#[inline]
135	#[target_feature(enable = "avx512fp16")]
136	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
137	pub fn _mm512_set1_ph(a: f16) -> __m512h {
138	unsafe { transmute(src:f16x32::splat(a)) }
139	}
140
141	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
142	///
143	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
144	#[inline]
145	#[target_feature(enable = "avx512fp16")]
146	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
147	pub fn _mm_setr_ph(
148	e0: f16,
149	e1: f16,
150	e2: f16,
151	e3: f16,
152	e4: f16,
153	e5: f16,
154	e6: f16,
155	e7: f16,
156	) -> __m128h {
157	__m128h([e0, e1, e2, e3, e4, e5, e6, e7])
158	}
159
160	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
161	///
162	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
163	#[inline]
164	#[target_feature(enable = "avx512fp16")]
165	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
166	pub fn _mm256_setr_ph(
167	e0: f16,
168	e1: f16,
169	e2: f16,
170	e3: f16,
171	e4: f16,
172	e5: f16,
173	e6: f16,
174	e7: f16,
175	e8: f16,
176	e9: f16,
177	e10: f16,
178	e11: f16,
179	e12: f16,
180	e13: f16,
181	e14: f16,
182	e15: f16,
183	) -> __m256h {
184	__m256h([
185	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
186	])
187	}
188
189	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
190	///
191	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
192	#[inline]
193	#[target_feature(enable = "avx512fp16")]
194	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
195	pub fn _mm512_setr_ph(
196	e0: f16,
197	e1: f16,
198	e2: f16,
199	e3: f16,
200	e4: f16,
201	e5: f16,
202	e6: f16,
203	e7: f16,
204	e8: f16,
205	e9: f16,
206	e10: f16,
207	e11: f16,
208	e12: f16,
209	e13: f16,
210	e14: f16,
211	e15: f16,
212	e16: f16,
213	e17: f16,
214	e18: f16,
215	e19: f16,
216	e20: f16,
217	e21: f16,
218	e22: f16,
219	e23: f16,
220	e24: f16,
221	e25: f16,
222	e26: f16,
223	e27: f16,
224	e28: f16,
225	e29: f16,
226	e30: f16,
227	e31: f16,
228	) -> __m512h {
229	__m512h([
230	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
231	e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
232	])
233	}
234
235	/// Return vector of type __m128h with all elements set to zero.
236	///
237	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
238	#[inline]
239	#[target_feature(enable = "avx512fp16,avx512vl")]
240	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
241	pub fn _mm_setzero_ph() -> __m128h {
242	unsafe { transmute(src:f16x8::ZERO) }
243	}
244
245	/// Return vector of type __m256h with all elements set to zero.
246	///
247	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
248	#[inline]
249	#[target_feature(enable = "avx512fp16,avx512vl")]
250	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
251	pub fn _mm256_setzero_ph() -> __m256h {
252	unsafe { transmute(src:f16x16::ZERO) }
253	}
254
255	/// Return vector of type __m512h with all elements set to zero.
256	///
257	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
258	#[inline]
259	#[target_feature(enable = "avx512fp16")]
260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
261	pub fn _mm512_setzero_ph() -> __m512h {
262	unsafe { transmute(src:f16x32::ZERO) }
263	}
264
265	/// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero
266	/// vector.
267	///
268	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
269	#[inline]
270	#[target_feature(enable = "avx512fp16,avx512vl")]
271	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
272	pub fn _mm_undefined_ph() -> __m128h {
273	unsafe { transmute(src:f16x8::ZERO) }
274	}
275
276	/// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero
277	/// vector.
278	///
279	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
280	#[inline]
281	#[target_feature(enable = "avx512fp16,avx512vl")]
282	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
283	pub fn _mm256_undefined_ph() -> __m256h {
284	unsafe { transmute(src:f16x16::ZERO) }
285	}
286
287	/// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero
288	/// vector.
289	///
290	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
291	#[inline]
292	#[target_feature(enable = "avx512fp16")]
293	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
294	pub fn _mm512_undefined_ph() -> __m512h {
295	unsafe { transmute(src:f16x32::ZERO) }
296	}
297
298	/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
299	/// does not generate any instructions, thus it has zero latency.
300	///
301	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
302	#[inline]
303	#[target_feature(enable = "avx512fp16")]
304	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
305	pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
306	unsafe { transmute(src:a) }
307	}
308
309	/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
310	/// does not generate any instructions, thus it has zero latency.
311	///
312	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
313	#[inline]
314	#[target_feature(enable = "avx512fp16")]
315	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
316	pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
317	unsafe { transmute(src:a) }
318	}
319
320	/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
321	/// does not generate any instructions, thus it has zero latency.
322	///
323	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
324	#[inline]
325	#[target_feature(enable = "avx512fp16")]
326	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
327	pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
328	unsafe { transmute(src:a) }
329	}
330
331	/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
332	/// does not generate any instructions, thus it has zero latency.
333	///
334	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
335	#[inline]
336	#[target_feature(enable = "avx512fp16")]
337	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
338	pub fn _mm_castph_pd(a: __m128h) -> __m128d {
339	unsafe { transmute(src:a) }
340	}
341
342	/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
343	/// does not generate any instructions, thus it has zero latency.
344	///
345	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
346	#[inline]
347	#[target_feature(enable = "avx512fp16")]
348	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
349	pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
350	unsafe { transmute(src:a) }
351	}
352
353	/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
354	/// does not generate any instructions, thus it has zero latency.
355	///
356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
357	#[inline]
358	#[target_feature(enable = "avx512fp16")]
359	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
360	pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
361	unsafe { transmute(src:a) }
362	}
363
364	/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
365	/// does not generate any instructions, thus it has zero latency.
366	///
367	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
368	#[inline]
369	#[target_feature(enable = "avx512fp16")]
370	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
371	pub fn _mm_castps_ph(a: __m128) -> __m128h {
372	unsafe { transmute(src:a) }
373	}
374
375	/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
376	/// does not generate any instructions, thus it has zero latency.
377	///
378	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
379	#[inline]
380	#[target_feature(enable = "avx512fp16")]
381	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
382	pub fn _mm256_castps_ph(a: __m256) -> __m256h {
383	unsafe { transmute(src:a) }
384	}
385
386	/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
387	/// does not generate any instructions, thus it has zero latency.
388	///
389	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
390	#[inline]
391	#[target_feature(enable = "avx512fp16")]
392	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
393	pub fn _mm512_castps_ph(a: __m512) -> __m512h {
394	unsafe { transmute(src:a) }
395	}
396
397	/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
398	/// does not generate any instructions, thus it has zero latency.
399	///
400	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
401	#[inline]
402	#[target_feature(enable = "avx512fp16")]
403	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
404	pub fn _mm_castph_ps(a: __m128h) -> __m128 {
405	unsafe { transmute(src:a) }
406	}
407
408	/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
409	/// does not generate any instructions, thus it has zero latency.
410	///
411	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
412	#[inline]
413	#[target_feature(enable = "avx512fp16")]
414	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
415	pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
416	unsafe { transmute(src:a) }
417	}
418
419	/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
420	/// does not generate any instructions, thus it has zero latency.
421	///
422	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
423	#[inline]
424	#[target_feature(enable = "avx512fp16")]
425	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
426	pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
427	unsafe { transmute(src:a) }
428	}
429
430	/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
431	/// does not generate any instructions, thus it has zero latency.
432	///
433	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
434	#[inline]
435	#[target_feature(enable = "avx512fp16")]
436	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
437	pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
438	unsafe { transmute(src:a) }
439	}
440
441	/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
442	/// does not generate any instructions, thus it has zero latency.
443	///
444	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
445	#[inline]
446	#[target_feature(enable = "avx512fp16")]
447	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
448	pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
449	unsafe { transmute(src:a) }
450	}
451
452	/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
453	/// does not generate any instructions, thus it has zero latency.
454	///
455	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
456	#[inline]
457	#[target_feature(enable = "avx512fp16")]
458	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
459	pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
460	unsafe { transmute(src:a) }
461	}
462
463	/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
464	/// does not generate any instructions, thus it has zero latency.
465	///
466	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
467	#[inline]
468	#[target_feature(enable = "avx512fp16")]
469	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
470	pub fn _mm_castph_si128(a: __m128h) -> __m128i {
471	unsafe { transmute(src:a) }
472	}
473
474	/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
475	/// does not generate any instructions, thus it has zero latency.
476	///
477	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
478	#[inline]
479	#[target_feature(enable = "avx512fp16")]
480	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
481	pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
482	unsafe { transmute(src:a) }
483	}
484
485	/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
486	/// does not generate any instructions, thus it has zero latency.
487	///
488	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
489	#[inline]
490	#[target_feature(enable = "avx512fp16")]
491	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
492	pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
493	unsafe { transmute(src:a) }
494	}
495
496	/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
497	/// does not generate any instructions, thus it has zero latency.
498	///
499	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
500	#[inline]
501	#[target_feature(enable = "avx512fp16")]
502	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
503	pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
504	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
505	}
506
507	/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
508	/// does not generate any instructions, thus it has zero latency.
509	///
510	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
511	#[inline]
512	#[target_feature(enable = "avx512fp16")]
513	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
514	pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
515	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
516	}
517
518	/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
519	/// does not generate any instructions, thus it has zero latency.
520	///
521	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
522	#[inline]
523	#[target_feature(enable = "avx512fp16")]
524	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
525	pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
526	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]) }
527	}
528
529	/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
530	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
531	/// but most of the time it does not generate any instructions.
532	///
533	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
534	#[inline]
535	#[target_feature(enable = "avx512fp16")]
536	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
537	pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
538	unsafe {
539	simd_shuffle!(
540	a,
541	_mm_undefined_ph(),
542	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`]
543	)
544	}
545	}
546
547	/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
548	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
549	/// but most of the time it does not generate any instructions.
550	///
551	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
552	#[inline]
553	#[target_feature(enable = "avx512fp16")]
554	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
555	pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
556	unsafe {
557	simd_shuffle!(
558	a,
559	_mm_undefined_ph(),
560	[
561	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`,
562	`8`, `8`, `8`, `8`
563	]
564	)
565	}
566	}
567
568	/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
569	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
570	/// but most of the time it does not generate any instructions.
571	///
572	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
573	#[inline]
574	#[target_feature(enable = "avx512fp16")]
575	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
576	pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
577	unsafe {
578	simd_shuffle!(
579	a,
580	_mm256_undefined_ph(),
581	[
582	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `16`, `16`, `16`, `16`, `16`, `16`,
583	`16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`
584	]
585	)
586	}
587	}
588
589	/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
590	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
591	/// any instructions.
592	///
593	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
594	#[inline]
595	#[target_feature(enable = "avx512fp16")]
596	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
597	pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
598	unsafe {
599	simd_shuffle!(
600	a,
601	_mm_setzero_ph(),
602	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`]
603	)
604	}
605	}
606
607	/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
608	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
609	/// any instructions.
610	///
611	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
612	#[inline]
613	#[target_feature(enable = "avx512fp16")]
614	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
615	pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
616	unsafe {
617	simd_shuffle!(
618	a,
619	_mm256_setzero_ph(),
620	[
621	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `16`, `16`, `16`, `16`, `16`, `16`,
622	`16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`
623	]
624	)
625	}
626	}
627
628	/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
629	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
630	/// any instructions.
631	///
632	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
633	#[inline]
634	#[target_feature(enable = "avx512fp16")]
635	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
636	pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
637	unsafe {
638	simd_shuffle!(
639	a,
640	_mm_setzero_ph(),
641	[
642	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`,
643	`8`, `8`, `8`, `8`
644	]
645	)
646	}
647	}
648
649	macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
650	($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
651	let dst: $mask_type;
652	asm!(
653	"vcmpph {k}, {a}, {b}, {imm8}",
654	k = lateout(kreg) dst,
655	a = in($reg) $a,
656	b = in($reg) $b,
657	imm8 = const IMM5,
658	options(pure, nomem, nostack)
659	);
660	dst
661	}};
662	($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
663	let dst: $mask_type;
664	asm!(
665	"vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
666	k = lateout(kreg) dst,
667	mask = in(kreg) $mask,
668	a = in($reg) $a,
669	b = in($reg) $b,
670	imm8 = const IMM5,
671	options(pure, nomem, nostack)
672	);
673	dst
674	}};
675	}
676
677	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
678	/// operand specified by imm8, and store the results in mask vector k.
679	///
680	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
681	#[inline]
682	#[target_feature(enable = "avx512fp16,avx512vl")]
683	#[rustc_legacy_const_generics(`2`)]
684	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
685	pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
686	unsafe {
687	static_assert_uimm_bits!(IMM5, `5`);
688	cmp_asm!(__mmask8, xmm_reg, a, b)
689	}
690	}
691
692	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
693	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
694	/// zeroed out when the corresponding mask bit is not set).
695	///
696	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
697	#[inline]
698	#[target_feature(enable = "avx512fp16,avx512vl")]
699	#[rustc_legacy_const_generics(`3`)]
700	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
701	pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
702	unsafe {
703	static_assert_uimm_bits!(IMM5, `5`);
704	cmp_asm!(__mmask8, k1, xmm_reg, a, b)
705	}
706	}
707
708	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
709	/// operand specified by imm8, and store the results in mask vector k.
710	///
711	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
712	#[inline]
713	#[target_feature(enable = "avx512fp16,avx512vl")]
714	#[rustc_legacy_const_generics(`2`)]
715	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
716	pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
717	unsafe {
718	static_assert_uimm_bits!(IMM5, `5`);
719	cmp_asm!(__mmask16, ymm_reg, a, b)
720	}
721	}
722
723	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
724	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
725	/// zeroed out when the corresponding mask bit is not set).
726	///
727	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
728	#[inline]
729	#[target_feature(enable = "avx512fp16,avx512vl")]
730	#[rustc_legacy_const_generics(`3`)]
731	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
732	pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
733	k1: __mmask16,
734	a: __m256h,
735	b: __m256h,
736	) -> __mmask16 {
737	unsafe {
738	static_assert_uimm_bits!(IMM5, `5`);
739	cmp_asm!(__mmask16, k1, ymm_reg, a, b)
740	}
741	}
742
743	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
744	/// operand specified by imm8, and store the results in mask vector k.
745	///
746	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
747	#[inline]
748	#[target_feature(enable = "avx512fp16")]
749	#[rustc_legacy_const_generics(`2`)]
750	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
751	pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
752	unsafe {
753	static_assert_uimm_bits!(IMM5, `5`);
754	cmp_asm!(__mmask32, zmm_reg, a, b)
755	}
756	}
757
758	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
759	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
760	/// zeroed out when the corresponding mask bit is not set).
761	///
762	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
763	#[inline]
764	#[target_feature(enable = "avx512fp16")]
765	#[rustc_legacy_const_generics(`3`)]
766	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
767	pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
768	k1: __mmask32,
769	a: __m512h,
770	b: __m512h,
771	) -> __mmask32 {
772	unsafe {
773	static_assert_uimm_bits!(IMM5, `5`);
774	cmp_asm!(__mmask32, k1, zmm_reg, a, b)
775	}
776	}
777
778	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
779	/// operand specified by imm8, and store the results in mask vector k.
780	///
781	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
782	///
783	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
784	#[inline]
785	#[target_feature(enable = "avx512fp16")]
786	#[rustc_legacy_const_generics(`2`, `3`)]
787	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
788	pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
789	a: __m512h,
790	b: __m512h,
791	) -> __mmask32 {
792	unsafe {
793	static_assert_uimm_bits!(IMM5, `5`);
794	static_assert_sae!(SAE);
795	if SAE == _MM_FROUND_NO_EXC {
796	let dst: __mmask32;
797	asm!(
798	"vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
799	k = lateout(kreg) dst,
800	a = in(zmm_reg) a,
801	b = in(zmm_reg) b,
802	imm8 = const IMM5,
803	options(pure, nomem, nostack)
804	);
805	dst
806	} else {
807	cmp_asm!(__mmask32, zmm_reg, a, b)
808	}
809	}
810	}
811
812	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
813	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
814	/// zeroed out when the corresponding mask bit is not set).
815	///
816	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
817	///
818	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
819	#[inline]
820	#[target_feature(enable = "avx512fp16")]
821	#[rustc_legacy_const_generics(`3`, `4`)]
822	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
823	pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
824	k1: __mmask32,
825	a: __m512h,
826	b: __m512h,
827	) -> __mmask32 {
828	unsafe {
829	static_assert_uimm_bits!(IMM5, `5`);
830	static_assert_sae!(SAE);
831	if SAE == _MM_FROUND_NO_EXC {
832	let dst: __mmask32;
833	asm!(
834	"vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
835	k = lateout(kreg) dst,
836	k1 = in(kreg) k1,
837	a = in(zmm_reg) a,
838	b = in(zmm_reg) b,
839	imm8 = const IMM5,
840	options(pure, nomem, nostack)
841	);
842	dst
843	} else {
844	cmp_asm!(__mmask32, k1, zmm_reg, a, b)
845	}
846	}
847	}
848
849	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
850	/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
851	/// passing _MM_FROUND_NO_EXC in the sae parameter.
852	///
853	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
854	#[inline]
855	#[target_feature(enable = "avx512fp16")]
856	#[rustc_legacy_const_generics(`2`, `3`)]
857	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
858	pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
859	static_assert_uimm_bits!(IMM5, `5`);
860	static_assert_sae!(SAE);
861	_mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:`0xff`, a, b)
862	}
863
864	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
865	/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
866	/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
867	///
868	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
869	#[inline]
870	#[target_feature(enable = "avx512fp16")]
871	#[rustc_legacy_const_generics(`3`, `4`)]
872	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
873	pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
874	k1: __mmask8,
875	a: __m128h,
876	b: __m128h,
877	) -> __mmask8 {
878	unsafe {
879	static_assert_uimm_bits!(IMM5, `5`);
880	static_assert_sae!(SAE);
881	vcmpsh(a, b, IMM5, mask:k1, SAE)
882	}
883	}
884
885	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
886	/// operand specified by imm8, and store the result in mask vector k.
887	///
888	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
889	#[inline]
890	#[target_feature(enable = "avx512fp16")]
891	#[rustc_legacy_const_generics(`2`)]
892	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
893	pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
894	static_assert_uimm_bits!(IMM5, `5`);
895	_mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
896	}
897
898	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
899	/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
900	///
901	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
902	#[inline]
903	#[target_feature(enable = "avx512fp16")]
904	#[rustc_legacy_const_generics(`3`)]
905	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
906	pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
907	static_assert_uimm_bits!(IMM5, `5`);
908	_mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
909	}
910
911	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
912	/// operand specified by imm8, and return the boolean result (0 or 1).
913	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
914	///
915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
916	#[inline]
917	#[target_feature(enable = "avx512fp16")]
918	#[rustc_legacy_const_generics(`2`, `3`)]
919	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
920	pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
921	unsafe {
922	static_assert_uimm_bits!(IMM5, `5`);
923	static_assert_sae!(SAE);
924	vcomish(a, b, IMM5, SAE)
925	}
926	}
927
928	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
929	/// operand specified by imm8, and return the boolean result (0 or 1).
930	///
931	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
932	#[inline]
933	#[target_feature(enable = "avx512fp16")]
934	#[rustc_legacy_const_generics(`2`)]
935	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
936	pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
937	static_assert_uimm_bits!(IMM5, `5`);
938	_mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
939	}
940
941	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
942	/// the boolean result (0 or 1).
943	///
944	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
945	#[inline]
946	#[target_feature(enable = "avx512fp16")]
947	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
948	pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
949	_mm_comi_sh::<_CMP_EQ_OS>(a, b)
950	}
951
952	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
953	/// and return the boolean result (0 or 1).
954	///
955	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
956	#[inline]
957	#[target_feature(enable = "avx512fp16")]
958	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
959	pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
960	_mm_comi_sh::<_CMP_GE_OS>(a, b)
961	}
962
963	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
964	/// the boolean result (0 or 1).
965	///
966	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
967	#[inline]
968	#[target_feature(enable = "avx512fp16")]
969	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
970	pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
971	_mm_comi_sh::<_CMP_GT_OS>(a, b)
972	}
973
974	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
975	/// return the boolean result (0 or 1).
976	///
977	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
978	#[inline]
979	#[target_feature(enable = "avx512fp16")]
980	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
981	pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
982	_mm_comi_sh::<_CMP_LE_OS>(a, b)
983	}
984
985	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
986	/// the boolean result (0 or 1).
987	///
988	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
989	#[inline]
990	#[target_feature(enable = "avx512fp16")]
991	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
992	pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
993	_mm_comi_sh::<_CMP_LT_OS>(a, b)
994	}
995
996	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
997	/// the boolean result (0 or 1).
998	///
999	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1000	#[inline]
1001	#[target_feature(enable = "avx512fp16")]
1002	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1003	pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1004	_mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1005	}
1006
1007	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1008	/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1009	///
1010	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1011	#[inline]
1012	#[target_feature(enable = "avx512fp16")]
1013	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1014	pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1015	_mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1016	}
1017
1018	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1019	/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1020	///
1021	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1022	#[inline]
1023	#[target_feature(enable = "avx512fp16")]
1024	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1025	pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1026	_mm_comi_sh::<_CMP_GE_OQ>(a, b)
1027	}
1028
1029	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1030	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1031	///
1032	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1033	#[inline]
1034	#[target_feature(enable = "avx512fp16")]
1035	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1036	pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1037	_mm_comi_sh::<_CMP_GT_OQ>(a, b)
1038	}
1039
1040	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1041	/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1042	///
1043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1044	#[inline]
1045	#[target_feature(enable = "avx512fp16")]
1046	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1047	pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1048	_mm_comi_sh::<_CMP_LE_OQ>(a, b)
1049	}
1050
1051	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1052	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1053	///
1054	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1055	#[inline]
1056	#[target_feature(enable = "avx512fp16")]
1057	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1058	pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1059	_mm_comi_sh::<_CMP_LT_OQ>(a, b)
1060	}
1061
1062	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1063	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1064	///
1065	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1066	#[inline]
1067	#[target_feature(enable = "avx512fp16")]
1068	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1069	pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1070	_mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1071	}
1072
1073	/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1074	/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1075	///
1076	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1077	#[inline]
1078	#[target_feature(enable = "avx512fp16,avx512vl")]
1079	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1080	pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1081	*mem_addr.cast()
1082	}
1083
1084	/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1085	/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1086	///
1087	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1088	#[inline]
1089	#[target_feature(enable = "avx512fp16,avx512vl")]
1090	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1091	pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1092	*mem_addr.cast()
1093	}
1094
1095	/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1096	/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1097	///
1098	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1099	#[inline]
1100	#[target_feature(enable = "avx512fp16")]
1101	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1102	pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1103	*mem_addr.cast()
1104	}
1105
1106	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1107	/// and zero the upper elements
1108	///
1109	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1110	#[inline]
1111	#[target_feature(enable = "avx512fp16")]
1112	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1113	pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1114	_mm_set_sh(*mem_addr)
1115	}
1116
1117	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1118	/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1119	///
1120	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1121	#[inline]
1122	#[target_feature(enable = "avx512fp16")]
1123	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1124	pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1125	let mut dst: __m128h = src;
1126	asm!(
1127	vpl!("vmovsh {dst}{{{k}}}"),
1128	dst = inout(xmm_reg) dst,
1129	k = in(kreg) k,
1130	p = in(reg) mem_addr,
1131	options(pure, readonly, nostack, preserves_flags)
1132	);
1133	dst
1134	}
1135
1136	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1137	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1138	///
1139	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1140	#[inline]
1141	#[target_feature(enable = "avx512fp16")]
1142	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1143	pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1144	let mut dst: __m128h;
1145	asm!(
1146	vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1147	dst = out(xmm_reg) dst,
1148	k = in(kreg) k,
1149	p = in(reg) mem_addr,
1150	options(pure, readonly, nostack, preserves_flags)
1151	);
1152	dst
1153	}
1154
1155	/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1156	/// a new vector. The address does not need to be aligned to any particular boundary.
1157	///
1158	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1159	#[inline]
1160	#[target_feature(enable = "avx512fp16,avx512vl")]
1161	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1162	pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1163	ptr::read_unaligned(src:mem_addr.cast())
1164	}
1165
1166	/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1167	/// a new vector. The address does not need to be aligned to any particular boundary.
1168	///
1169	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1170	#[inline]
1171	#[target_feature(enable = "avx512fp16,avx512vl")]
1172	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1173	pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1174	ptr::read_unaligned(src:mem_addr.cast())
1175	}
1176
1177	/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1178	/// a new vector. The address does not need to be aligned to any particular boundary.
1179	///
1180	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1181	#[inline]
1182	#[target_feature(enable = "avx512fp16")]
1183	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1184	pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1185	ptr::read_unaligned(src:mem_addr.cast())
1186	}
1187
1188	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1189	/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1190	/// 7 packed elements from a to the upper elements of dst.
1191	///
1192	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1193	#[inline]
1194	#[target_feature(enable = "avx512fp16")]
1195	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1196	pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1197	unsafe {
1198	let mut mov: f16 = simd_extract!(src, `0`);
1199	if (k & `1`) != `0` {
1200	mov = simd_extract!(b, `0`);
1201	}
1202	simd_insert!(a, `0`, mov)
1203	}
1204	}
1205
1206	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1207	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1208	/// elements from a to the upper elements of dst.
1209	///
1210	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1211	#[inline]
1212	#[target_feature(enable = "avx512fp16")]
1213	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1214	pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1215	unsafe {
1216	let mut mov: f16 = `0.`;
1217	if (k & `1`) != `0` {
1218	mov = simd_extract!(b, `0`);
1219	}
1220	simd_insert!(a, `0`, mov)
1221	}
1222	}
1223
1224	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1225	/// and copy the upper 7 packed elements from a to the upper elements of dst.
1226	///
1227	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1228	#[inline]
1229	#[target_feature(enable = "avx512fp16")]
1230	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1231	pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1232	unsafe {
1233	let mov: f16 = simd_extract!(b, `0`);
1234	simd_insert!(a, `0`, mov)
1235	}
1236	}
1237
1238	/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1239	/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1240	///
1241	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1242	#[inline]
1243	#[target_feature(enable = "avx512fp16,avx512vl")]
1244	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1245	pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1246	*mem_addr.cast() = a;
1247	}
1248
1249	/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1250	/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1251	///
1252	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1253	#[inline]
1254	#[target_feature(enable = "avx512fp16,avx512vl")]
1255	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1256	pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1257	*mem_addr.cast() = a;
1258	}
1259
1260	/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1261	/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1262	///
1263	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1264	#[inline]
1265	#[target_feature(enable = "avx512fp16")]
1266	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1267	pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1268	*mem_addr.cast() = a;
1269	}
1270
1271	/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1272	///
1273	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1274	#[inline]
1275	#[target_feature(enable = "avx512fp16")]
1276	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1277	pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1278	*mem_addr = simd_extract!(a, `0`);
1279	}
1280
1281	/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1282	///
1283	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1284	#[inline]
1285	#[target_feature(enable = "avx512fp16")]
1286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1287	pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1288	asm!(
1289	vps!("vmovdqu16", "{{{k}}}, {src}"),
1290	p = in(reg) mem_addr,
1291	k = in(kreg) k,
1292	src = in(xmm_reg) a,
1293	options(nostack, preserves_flags)
1294	);
1295	}
1296
1297	/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1298	/// The address does not need to be aligned to any particular boundary.
1299	///
1300	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1301	#[inline]
1302	#[target_feature(enable = "avx512fp16,avx512vl")]
1303	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1304	pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1305	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1306	}
1307
1308	/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1309	/// The address does not need to be aligned to any particular boundary.
1310	///
1311	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1312	#[inline]
1313	#[target_feature(enable = "avx512fp16,avx512vl")]
1314	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1315	pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1316	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1317	}
1318
1319	/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1320	/// The address does not need to be aligned to any particular boundary.
1321	///
1322	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1323	#[inline]
1324	#[target_feature(enable = "avx512fp16")]
1325	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1326	pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1327	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1328	}
1329
1330	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1331	///
1332	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1333	#[inline]
1334	#[target_feature(enable = "avx512fp16,avx512vl")]
1335	#[cfg_attr(test, assert_instr(vaddph))]
1336	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1337	pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1338	unsafe { simd_add(x:a, y:b) }
1339	}
1340
1341	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1342	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1343	///
1344	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1345	#[inline]
1346	#[target_feature(enable = "avx512fp16,avx512vl")]
1347	#[cfg_attr(test, assert_instr(vaddph))]
1348	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1349	pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1350	unsafe {
1351	let r: __m128h = _mm_add_ph(a, b);
1352	simd_select_bitmask(m:k, yes:r, no:src)
1353	}
1354	}
1355
1356	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1357	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1358	///
1359	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1360	#[inline]
1361	#[target_feature(enable = "avx512fp16,avx512vl")]
1362	#[cfg_attr(test, assert_instr(vaddph))]
1363	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1364	pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1365	unsafe {
1366	let r: __m128h = _mm_add_ph(a, b);
1367	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1368	}
1369	}
1370
1371	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1372	///
1373	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1374	#[inline]
1375	#[target_feature(enable = "avx512fp16,avx512vl")]
1376	#[cfg_attr(test, assert_instr(vaddph))]
1377	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1378	pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1379	unsafe { simd_add(x:a, y:b) }
1380	}
1381
1382	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1383	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1384	///
1385	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1386	#[inline]
1387	#[target_feature(enable = "avx512fp16,avx512vl")]
1388	#[cfg_attr(test, assert_instr(vaddph))]
1389	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1390	pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1391	unsafe {
1392	let r: __m256h = _mm256_add_ph(a, b);
1393	simd_select_bitmask(m:k, yes:r, no:src)
1394	}
1395	}
1396
1397	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1398	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1399	///
1400	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1401	#[inline]
1402	#[target_feature(enable = "avx512fp16,avx512vl")]
1403	#[cfg_attr(test, assert_instr(vaddph))]
1404	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1405	pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1406	unsafe {
1407	let r: __m256h = _mm256_add_ph(a, b);
1408	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1409	}
1410	}
1411
1412	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1413	///
1414	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1415	#[inline]
1416	#[target_feature(enable = "avx512fp16")]
1417	#[cfg_attr(test, assert_instr(vaddph))]
1418	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1419	pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1420	unsafe { simd_add(x:a, y:b) }
1421	}
1422
1423	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1424	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1425	///
1426	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1427	#[inline]
1428	#[target_feature(enable = "avx512fp16")]
1429	#[cfg_attr(test, assert_instr(vaddph))]
1430	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1431	pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1432	unsafe {
1433	let r: __m512h = _mm512_add_ph(a, b);
1434	simd_select_bitmask(m:k, yes:r, no:src)
1435	}
1436	}
1437
1438	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1439	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1440	///
1441	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1442	#[inline]
1443	#[target_feature(enable = "avx512fp16")]
1444	#[cfg_attr(test, assert_instr(vaddph))]
1445	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1446	pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1447	unsafe {
1448	let r: __m512h = _mm512_add_ph(a, b);
1449	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1450	}
1451	}
1452
1453	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1454	/// Rounding is done according to the rounding parameter, which can be one of:
1455	///
1456	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1457	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1458	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1459	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1460	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1461	///
1462	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1463	#[inline]
1464	#[target_feature(enable = "avx512fp16")]
1465	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1466	#[rustc_legacy_const_generics(`2`)]
1467	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1468	pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1469	unsafe {
1470	static_assert_rounding!(ROUNDING);
1471	vaddph(a, b, ROUNDING)
1472	}
1473	}
1474
1475	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1476	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1477	/// Rounding is done according to the rounding parameter, which can be one of:
1478	///
1479	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1480	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1481	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1482	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1483	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1484	///
1485	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1486	#[inline]
1487	#[target_feature(enable = "avx512fp16")]
1488	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1489	#[rustc_legacy_const_generics(`4`)]
1490	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1491	pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1492	src: __m512h,
1493	k: __mmask32,
1494	a: __m512h,
1495	b: __m512h,
1496	) -> __m512h {
1497	unsafe {
1498	static_assert_rounding!(ROUNDING);
1499	let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1500	simd_select_bitmask(m:k, yes:r, no:src)
1501	}
1502	}
1503
1504	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1505	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1506	/// Rounding is done according to the rounding parameter, which can be one of:
1507	///
1508	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1509	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1510	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1511	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1512	///
1513	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1514	#[inline]
1515	#[target_feature(enable = "avx512fp16")]
1516	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1517	#[rustc_legacy_const_generics(`3`)]
1518	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1519	pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1520	k: __mmask32,
1521	a: __m512h,
1522	b: __m512h,
1523	) -> __m512h {
1524	unsafe {
1525	static_assert_rounding!(ROUNDING);
1526	let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1527	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1528	}
1529	}
1530
1531	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1532	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1533	/// Rounding is done according to the rounding parameter, which can be one of:
1534	///
1535	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1536	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1537	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1538	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1539	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1540	///
1541	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1542	#[inline]
1543	#[target_feature(enable = "avx512fp16")]
1544	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1545	#[rustc_legacy_const_generics(`2`)]
1546	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1547	pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1548	static_assert_rounding!(ROUNDING);
1549	_mm_mask_add_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
1550	}
1551
1552	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1553	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1554	/// writemask k (the element is copied from src when mask bit 0 is not set).
1555	/// Rounding is done according to the rounding parameter, which can be one of:
1556	///
1557	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1558	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1559	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1560	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1561	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1562	///
1563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1564	#[inline]
1565	#[target_feature(enable = "avx512fp16")]
1566	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1567	#[rustc_legacy_const_generics(`4`)]
1568	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1569	pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1570	src: __m128h,
1571	k: __mmask8,
1572	a: __m128h,
1573	b: __m128h,
1574	) -> __m128h {
1575	unsafe {
1576	static_assert_rounding!(ROUNDING);
1577	vaddsh(a, b, src, k, ROUNDING)
1578	}
1579	}
1580
1581	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1582	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1583	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1584	/// Rounding is done according to the rounding parameter, which can be one of:
1585	///
1586	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1587	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1588	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1589	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1590	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1591	///
1592	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1593	#[inline]
1594	#[target_feature(enable = "avx512fp16")]
1595	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1596	#[rustc_legacy_const_generics(`3`)]
1597	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1598	pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1599	static_assert_rounding!(ROUNDING);
1600	_mm_mask_add_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
1601	}
1602
1603	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1604	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1605	///
1606	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1607	#[inline]
1608	#[target_feature(enable = "avx512fp16")]
1609	#[cfg_attr(test, assert_instr(vaddsh))]
1610	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1611	pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1612	_mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1613	}
1614
1615	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1616	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1617	/// writemask k (the element is copied from src when mask bit 0 is not set).
1618	///
1619	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1620	#[inline]
1621	#[target_feature(enable = "avx512fp16")]
1622	#[cfg_attr(test, assert_instr(vaddsh))]
1623	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1624	pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1625	_mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1626	}
1627
1628	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1629	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1630	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1631	///
1632	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1633	#[inline]
1634	#[target_feature(enable = "avx512fp16")]
1635	#[cfg_attr(test, assert_instr(vaddsh))]
1636	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1637	pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1638	_mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1639	}
1640
1641	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1642	///
1643	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1644	#[inline]
1645	#[target_feature(enable = "avx512fp16,avx512vl")]
1646	#[cfg_attr(test, assert_instr(vsubph))]
1647	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1648	pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1649	unsafe { simd_sub(lhs:a, rhs:b) }
1650	}
1651
1652	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1653	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1654	///
1655	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1656	#[inline]
1657	#[target_feature(enable = "avx512fp16,avx512vl")]
1658	#[cfg_attr(test, assert_instr(vsubph))]
1659	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1660	pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1661	unsafe {
1662	let r: __m128h = _mm_sub_ph(a, b);
1663	simd_select_bitmask(m:k, yes:r, no:src)
1664	}
1665	}
1666
1667	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1668	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669	///
1670	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1671	#[inline]
1672	#[target_feature(enable = "avx512fp16,avx512vl")]
1673	#[cfg_attr(test, assert_instr(vsubph))]
1674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1675	pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1676	unsafe {
1677	let r: __m128h = _mm_sub_ph(a, b);
1678	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1679	}
1680	}
1681
1682	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1683	///
1684	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1685	#[inline]
1686	#[target_feature(enable = "avx512fp16,avx512vl")]
1687	#[cfg_attr(test, assert_instr(vsubph))]
1688	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1689	pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1690	unsafe { simd_sub(lhs:a, rhs:b) }
1691	}
1692
1693	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1694	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1695	///
1696	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1697	#[inline]
1698	#[target_feature(enable = "avx512fp16,avx512vl")]
1699	#[cfg_attr(test, assert_instr(vsubph))]
1700	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1701	pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1702	unsafe {
1703	let r: __m256h = _mm256_sub_ph(a, b);
1704	simd_select_bitmask(m:k, yes:r, no:src)
1705	}
1706	}
1707
1708	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1709	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1710	///
1711	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1712	#[inline]
1713	#[target_feature(enable = "avx512fp16,avx512vl")]
1714	#[cfg_attr(test, assert_instr(vsubph))]
1715	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1716	pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1717	unsafe {
1718	let r: __m256h = _mm256_sub_ph(a, b);
1719	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1720	}
1721	}
1722
1723	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1724	///
1725	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1726	#[inline]
1727	#[target_feature(enable = "avx512fp16")]
1728	#[cfg_attr(test, assert_instr(vsubph))]
1729	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1730	pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1731	unsafe { simd_sub(lhs:a, rhs:b) }
1732	}
1733
1734	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1735	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1736	///
1737	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1738	#[inline]
1739	#[target_feature(enable = "avx512fp16")]
1740	#[cfg_attr(test, assert_instr(vsubph))]
1741	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1742	pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1743	unsafe {
1744	let r: __m512h = _mm512_sub_ph(a, b);
1745	simd_select_bitmask(m:k, yes:r, no:src)
1746	}
1747	}
1748
1749	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1750	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1751	///
1752	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1753	#[inline]
1754	#[target_feature(enable = "avx512fp16")]
1755	#[cfg_attr(test, assert_instr(vsubph))]
1756	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1757	pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1758	unsafe {
1759	let r: __m512h = _mm512_sub_ph(a, b);
1760	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1761	}
1762	}
1763
1764	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1765	/// Rounding is done according to the rounding parameter, which can be one of:
1766	///
1767	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1768	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1769	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1770	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1771	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1772	///
1773	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1774	#[inline]
1775	#[target_feature(enable = "avx512fp16")]
1776	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1777	#[rustc_legacy_const_generics(`2`)]
1778	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1779	pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1780	unsafe {
1781	static_assert_rounding!(ROUNDING);
1782	vsubph(a, b, ROUNDING)
1783	}
1784	}
1785
1786	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1787	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1788	/// Rounding is done according to the rounding parameter, which can be one of:
1789	///
1790	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1791	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1792	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1793	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1794	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1795	///
1796	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1797	#[inline]
1798	#[target_feature(enable = "avx512fp16")]
1799	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1800	#[rustc_legacy_const_generics(`4`)]
1801	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1802	pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1803	src: __m512h,
1804	k: __mmask32,
1805	a: __m512h,
1806	b: __m512h,
1807	) -> __m512h {
1808	unsafe {
1809	static_assert_rounding!(ROUNDING);
1810	let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1811	simd_select_bitmask(m:k, yes:r, no:src)
1812	}
1813	}
1814
1815	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1816	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1817	/// Rounding is done according to the rounding parameter, which can be one of:
1818	///
1819	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1820	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1821	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1822	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1823	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1824	///
1825	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1826	#[inline]
1827	#[target_feature(enable = "avx512fp16")]
1828	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1829	#[rustc_legacy_const_generics(`3`)]
1830	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1831	pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1832	k: __mmask32,
1833	a: __m512h,
1834	b: __m512h,
1835	) -> __m512h {
1836	unsafe {
1837	static_assert_rounding!(ROUNDING);
1838	let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1839	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1840	}
1841	}
1842
1843	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1844	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1845	/// Rounding is done according to the rounding parameter, which can be one of:
1846	///
1847	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1848	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1849	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1850	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1851	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1852	///
1853	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1854	#[inline]
1855	#[target_feature(enable = "avx512fp16")]
1856	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1857	#[rustc_legacy_const_generics(`2`)]
1858	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1859	pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1860	static_assert_rounding!(ROUNDING);
1861	_mm_mask_sub_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
1862	}
1863
1864	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1865	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1866	/// writemask k (the element is copied from src when mask bit 0 is not set).
1867	/// Rounding is done according to the rounding parameter, which can be one of:
1868	///
1869	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1870	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1871	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1872	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1873	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1874	///
1875	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1876	#[inline]
1877	#[target_feature(enable = "avx512fp16")]
1878	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1879	#[rustc_legacy_const_generics(`4`)]
1880	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1881	pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1882	src: __m128h,
1883	k: __mmask8,
1884	a: __m128h,
1885	b: __m128h,
1886	) -> __m128h {
1887	unsafe {
1888	static_assert_rounding!(ROUNDING);
1889	vsubsh(a, b, src, k, ROUNDING)
1890	}
1891	}
1892
1893	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1894	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1895	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1896	/// Rounding is done according to the rounding parameter, which can be one of:
1897	///
1898	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1899	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1900	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1901	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1902	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1903	///
1904	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
1905	#[inline]
1906	#[target_feature(enable = "avx512fp16")]
1907	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1908	#[rustc_legacy_const_generics(`3`)]
1909	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1910	pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1911	static_assert_rounding!(ROUNDING);
1912	_mm_mask_sub_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
1913	}
1914
1915	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1916	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1917	///
1918	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
1919	#[inline]
1920	#[target_feature(enable = "avx512fp16")]
1921	#[cfg_attr(test, assert_instr(vsubsh))]
1922	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1923	pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
1924	_mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1925	}
1926
1927	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1928	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1929	/// writemask k (the element is copied from src when mask bit 0 is not set).
1930	///
1931	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
1932	#[inline]
1933	#[target_feature(enable = "avx512fp16")]
1934	#[cfg_attr(test, assert_instr(vsubsh))]
1935	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1936	pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1937	_mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1938	}
1939
1940	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1941	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1942	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1943	///
1944	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
1945	#[inline]
1946	#[target_feature(enable = "avx512fp16")]
1947	#[cfg_attr(test, assert_instr(vsubsh))]
1948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1949	pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1950	_mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1951	}
1952
1953	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1954	///
1955	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
1956	#[inline]
1957	#[target_feature(enable = "avx512fp16,avx512vl")]
1958	#[cfg_attr(test, assert_instr(vmulph))]
1959	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1960	pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
1961	unsafe { simd_mul(x:a, y:b) }
1962	}
1963
1964	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1965	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1966	///
1967	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
1968	#[inline]
1969	#[target_feature(enable = "avx512fp16,avx512vl")]
1970	#[cfg_attr(test, assert_instr(vmulph))]
1971	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1972	pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1973	unsafe {
1974	let r: __m128h = _mm_mul_ph(a, b);
1975	simd_select_bitmask(m:k, yes:r, no:src)
1976	}
1977	}
1978
1979	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1980	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1981	///
1982	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
1983	#[inline]
1984	#[target_feature(enable = "avx512fp16,avx512vl")]
1985	#[cfg_attr(test, assert_instr(vmulph))]
1986	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1987	pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1988	unsafe {
1989	let r: __m128h = _mm_mul_ph(a, b);
1990	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1991	}
1992	}
1993
1994	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1995	///
1996	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
1997	#[inline]
1998	#[target_feature(enable = "avx512fp16,avx512vl")]
1999	#[cfg_attr(test, assert_instr(vmulph))]
2000	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2001	pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2002	unsafe { simd_mul(x:a, y:b) }
2003	}
2004
2005	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2006	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2007	///
2008	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2009	#[inline]
2010	#[target_feature(enable = "avx512fp16,avx512vl")]
2011	#[cfg_attr(test, assert_instr(vmulph))]
2012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2013	pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2014	unsafe {
2015	let r: __m256h = _mm256_mul_ph(a, b);
2016	simd_select_bitmask(m:k, yes:r, no:src)
2017	}
2018	}
2019
2020	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2021	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022	///
2023	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2024	#[inline]
2025	#[target_feature(enable = "avx512fp16,avx512vl")]
2026	#[cfg_attr(test, assert_instr(vmulph))]
2027	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2028	pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2029	unsafe {
2030	let r: __m256h = _mm256_mul_ph(a, b);
2031	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2032	}
2033	}
2034
2035	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2036	///
2037	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2038	#[inline]
2039	#[target_feature(enable = "avx512fp16")]
2040	#[cfg_attr(test, assert_instr(vmulph))]
2041	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2042	pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2043	unsafe { simd_mul(x:a, y:b) }
2044	}
2045
2046	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2047	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2048	///
2049	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2050	#[inline]
2051	#[target_feature(enable = "avx512fp16")]
2052	#[cfg_attr(test, assert_instr(vmulph))]
2053	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2054	pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2055	unsafe {
2056	let r: __m512h = _mm512_mul_ph(a, b);
2057	simd_select_bitmask(m:k, yes:r, no:src)
2058	}
2059	}
2060
2061	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2062	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2063	///
2064	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2065	#[inline]
2066	#[target_feature(enable = "avx512fp16")]
2067	#[cfg_attr(test, assert_instr(vmulph))]
2068	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2069	pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2070	unsafe {
2071	let r: __m512h = _mm512_mul_ph(a, b);
2072	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2073	}
2074	}
2075
2076	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2077	/// Rounding is done according to the rounding parameter, which can be one of:
2078	///
2079	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2080	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2081	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2082	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2083	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2084	///
2085	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2086	#[inline]
2087	#[target_feature(enable = "avx512fp16")]
2088	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2089	#[rustc_legacy_const_generics(`2`)]
2090	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2091	pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2092	unsafe {
2093	static_assert_rounding!(ROUNDING);
2094	vmulph(a, b, ROUNDING)
2095	}
2096	}
2097
2098	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2099	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2100	/// Rounding is done according to the rounding parameter, which can be one of:
2101	///
2102	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2103	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2104	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2105	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2106	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2107	///
2108	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2109	#[inline]
2110	#[target_feature(enable = "avx512fp16")]
2111	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2112	#[rustc_legacy_const_generics(`4`)]
2113	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2114	pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2115	src: __m512h,
2116	k: __mmask32,
2117	a: __m512h,
2118	b: __m512h,
2119	) -> __m512h {
2120	unsafe {
2121	static_assert_rounding!(ROUNDING);
2122	let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2123	simd_select_bitmask(m:k, yes:r, no:src)
2124	}
2125	}
2126
2127	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2128	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2129	/// Rounding is done according to the rounding parameter, which can be one of:
2130	///
2131	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2132	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2133	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2134	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2135	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2136	///
2137	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2138	#[inline]
2139	#[target_feature(enable = "avx512fp16")]
2140	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2141	#[rustc_legacy_const_generics(`3`)]
2142	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2143	pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2144	k: __mmask32,
2145	a: __m512h,
2146	b: __m512h,
2147	) -> __m512h {
2148	unsafe {
2149	static_assert_rounding!(ROUNDING);
2150	let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2151	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2152	}
2153	}
2154
2155	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2156	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2157	/// Rounding is done according to the rounding parameter, which can be one of:
2158	///
2159	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2160	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2161	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2162	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2163	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2164	///
2165	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2166	#[inline]
2167	#[target_feature(enable = "avx512fp16")]
2168	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2169	#[rustc_legacy_const_generics(`2`)]
2170	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2171	pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2172	static_assert_rounding!(ROUNDING);
2173	_mm_mask_mul_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
2174	}
2175
2176	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2177	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2178	/// writemask k (the element is copied from src when mask bit 0 is not set).
2179	/// Rounding is done according to the rounding parameter, which can be one of:
2180	///
2181	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2182	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2183	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2184	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2185	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2186	///
2187	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2188	#[inline]
2189	#[target_feature(enable = "avx512fp16")]
2190	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2191	#[rustc_legacy_const_generics(`4`)]
2192	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2193	pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2194	src: __m128h,
2195	k: __mmask8,
2196	a: __m128h,
2197	b: __m128h,
2198	) -> __m128h {
2199	unsafe {
2200	static_assert_rounding!(ROUNDING);
2201	vmulsh(a, b, src, k, ROUNDING)
2202	}
2203	}
2204
2205	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2206	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2207	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2208	/// Rounding is done according to the rounding parameter, which can be one of:
2209	///
2210	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2211	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2212	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2213	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2214	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2215	///
2216	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2217	#[inline]
2218	#[target_feature(enable = "avx512fp16")]
2219	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2220	#[rustc_legacy_const_generics(`3`)]
2221	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2222	pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2223	static_assert_rounding!(ROUNDING);
2224	_mm_mask_mul_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2225	}
2226
2227	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2228	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2229	///
2230	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2231	#[inline]
2232	#[target_feature(enable = "avx512fp16")]
2233	#[cfg_attr(test, assert_instr(vmulsh))]
2234	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2235	pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2236	_mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2237	}
2238
2239	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2240	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2241	/// writemask k (the element is copied from src when mask bit 0 is not set).
2242	///
2243	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2244	#[inline]
2245	#[target_feature(enable = "avx512fp16")]
2246	#[cfg_attr(test, assert_instr(vmulsh))]
2247	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2248	pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2249	_mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2250	}
2251
2252	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2253	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2254	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2255	///
2256	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2257	#[inline]
2258	#[target_feature(enable = "avx512fp16")]
2259	#[cfg_attr(test, assert_instr(vmulsh))]
2260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2261	pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2262	_mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2263	}
2264
2265	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2266	///
2267	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2268	#[inline]
2269	#[target_feature(enable = "avx512fp16,avx512vl")]
2270	#[cfg_attr(test, assert_instr(vdivph))]
2271	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2272	pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2273	unsafe { simd_div(lhs:a, rhs:b) }
2274	}
2275
2276	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2277	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2278	///
2279	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2280	#[inline]
2281	#[target_feature(enable = "avx512fp16,avx512vl")]
2282	#[cfg_attr(test, assert_instr(vdivph))]
2283	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2284	pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2285	unsafe {
2286	let r: __m128h = _mm_div_ph(a, b);
2287	simd_select_bitmask(m:k, yes:r, no:src)
2288	}
2289	}
2290
2291	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2292	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2293	///
2294	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2295	#[inline]
2296	#[target_feature(enable = "avx512fp16,avx512vl")]
2297	#[cfg_attr(test, assert_instr(vdivph))]
2298	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2299	pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2300	unsafe {
2301	let r: __m128h = _mm_div_ph(a, b);
2302	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2303	}
2304	}
2305
2306	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2307	///
2308	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2309	#[inline]
2310	#[target_feature(enable = "avx512fp16,avx512vl")]
2311	#[cfg_attr(test, assert_instr(vdivph))]
2312	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2313	pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2314	unsafe { simd_div(lhs:a, rhs:b) }
2315	}
2316
2317	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2318	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2319	///
2320	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2321	#[inline]
2322	#[target_feature(enable = "avx512fp16,avx512vl")]
2323	#[cfg_attr(test, assert_instr(vdivph))]
2324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2325	pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2326	unsafe {
2327	let r: __m256h = _mm256_div_ph(a, b);
2328	simd_select_bitmask(m:k, yes:r, no:src)
2329	}
2330	}
2331
2332	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2333	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2334	///
2335	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2336	#[inline]
2337	#[target_feature(enable = "avx512fp16,avx512vl")]
2338	#[cfg_attr(test, assert_instr(vdivph))]
2339	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2340	pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2341	unsafe {
2342	let r: __m256h = _mm256_div_ph(a, b);
2343	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2344	}
2345	}
2346
2347	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2348	///
2349	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2350	#[inline]
2351	#[target_feature(enable = "avx512fp16")]
2352	#[cfg_attr(test, assert_instr(vdivph))]
2353	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2354	pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2355	unsafe { simd_div(lhs:a, rhs:b) }
2356	}
2357
2358	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2359	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2360	///
2361	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2362	#[inline]
2363	#[target_feature(enable = "avx512fp16")]
2364	#[cfg_attr(test, assert_instr(vdivph))]
2365	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2366	pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2367	unsafe {
2368	let r: __m512h = _mm512_div_ph(a, b);
2369	simd_select_bitmask(m:k, yes:r, no:src)
2370	}
2371	}
2372
2373	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2374	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2375	///
2376	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2377	#[inline]
2378	#[target_feature(enable = "avx512fp16")]
2379	#[cfg_attr(test, assert_instr(vdivph))]
2380	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2381	pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2382	unsafe {
2383	let r: __m512h = _mm512_div_ph(a, b);
2384	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2385	}
2386	}
2387
2388	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2389	/// Rounding is done according to the rounding parameter, which can be one of:
2390	///
2391	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2392	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2393	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2394	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2395	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2396	///
2397	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2398	#[inline]
2399	#[target_feature(enable = "avx512fp16")]
2400	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2401	#[rustc_legacy_const_generics(`2`)]
2402	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2403	pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2404	unsafe {
2405	static_assert_rounding!(ROUNDING);
2406	vdivph(a, b, ROUNDING)
2407	}
2408	}
2409
2410	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2411	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2412	/// Rounding is done according to the rounding parameter, which can be one of:
2413	///
2414	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2415	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2416	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2417	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2418	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2419	///
2420	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2421	#[inline]
2422	#[target_feature(enable = "avx512fp16")]
2423	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2424	#[rustc_legacy_const_generics(`4`)]
2425	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2426	pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2427	src: __m512h,
2428	k: __mmask32,
2429	a: __m512h,
2430	b: __m512h,
2431	) -> __m512h {
2432	unsafe {
2433	static_assert_rounding!(ROUNDING);
2434	let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2435	simd_select_bitmask(m:k, yes:r, no:src)
2436	}
2437	}
2438
2439	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2440	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441	/// Rounding is done according to the rounding parameter, which can be one of:
2442	///
2443	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2444	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2445	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2446	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2447	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2448	///
2449	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2450	#[inline]
2451	#[target_feature(enable = "avx512fp16")]
2452	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2453	#[rustc_legacy_const_generics(`3`)]
2454	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2455	pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2456	k: __mmask32,
2457	a: __m512h,
2458	b: __m512h,
2459	) -> __m512h {
2460	unsafe {
2461	static_assert_rounding!(ROUNDING);
2462	let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2463	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2464	}
2465	}
2466
2467	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2468	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2469	/// Rounding is done according to the rounding parameter, which can be one of:
2470	///
2471	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2472	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2473	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2474	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2475	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2476	///
2477	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2478	#[inline]
2479	#[target_feature(enable = "avx512fp16")]
2480	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2481	#[rustc_legacy_const_generics(`2`)]
2482	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2483	pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2484	static_assert_rounding!(ROUNDING);
2485	_mm_mask_div_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
2486	}
2487
2488	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2489	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2490	/// writemask k (the element is copied from src when mask bit 0 is not set).
2491	/// Rounding is done according to the rounding parameter, which can be one of:
2492	///
2493	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2494	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2495	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2496	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2497	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2498	///
2499	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2500	#[inline]
2501	#[target_feature(enable = "avx512fp16")]
2502	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2503	#[rustc_legacy_const_generics(`4`)]
2504	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2505	pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2506	src: __m128h,
2507	k: __mmask8,
2508	a: __m128h,
2509	b: __m128h,
2510	) -> __m128h {
2511	unsafe {
2512	static_assert_rounding!(ROUNDING);
2513	vdivsh(a, b, src, k, ROUNDING)
2514	}
2515	}
2516
2517	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2518	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2519	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2520	/// Rounding is done according to the rounding parameter, which can be one of:
2521	///
2522	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2523	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2524	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2525	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2526	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2527	///
2528	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2529	#[inline]
2530	#[target_feature(enable = "avx512fp16")]
2531	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2532	#[rustc_legacy_const_generics(`3`)]
2533	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2534	pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2535	static_assert_rounding!(ROUNDING);
2536	_mm_mask_div_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2537	}
2538
2539	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2540	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2541	///
2542	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2543	#[inline]
2544	#[target_feature(enable = "avx512fp16")]
2545	#[cfg_attr(test, assert_instr(vdivsh))]
2546	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2547	pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2548	_mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2549	}
2550
2551	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2552	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2553	/// writemask k (the element is copied from src when mask bit 0 is not set).
2554	///
2555	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2556	#[inline]
2557	#[target_feature(enable = "avx512fp16")]
2558	#[cfg_attr(test, assert_instr(vdivsh))]
2559	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2560	pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2561	_mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2562	}
2563
2564	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2565	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2566	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2567	///
2568	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2569	#[inline]
2570	#[target_feature(enable = "avx512fp16")]
2571	#[cfg_attr(test, assert_instr(vdivsh))]
2572	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2573	pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2574	_mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2575	}
2576
2577	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2578	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2579	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2580	///
2581	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2582	#[inline]
2583	#[target_feature(enable = "avx512fp16,avx512vl")]
2584	#[cfg_attr(test, assert_instr(vfmulcph))]
2585	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2586	pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2587	_mm_mask_mul_pch(src:_mm_undefined_ph(), k:`0xff`, a, b)
2588	}
2589
2590	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2591	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2592	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2593	///
2594	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2595	#[inline]
2596	#[target_feature(enable = "avx512fp16,avx512vl")]
2597	#[cfg_attr(test, assert_instr(vfmulcph))]
2598	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2599	pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2600	unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2601	}
2602
2603	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2604	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2605	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2606	///
2607	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2608	#[inline]
2609	#[target_feature(enable = "avx512fp16,avx512vl")]
2610	#[cfg_attr(test, assert_instr(vfmulcph))]
2611	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2612	pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2613	_mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2614	}
2615
2616	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2617	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2618	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2619	///
2620	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2621	#[inline]
2622	#[target_feature(enable = "avx512fp16,avx512vl")]
2623	#[cfg_attr(test, assert_instr(vfmulcph))]
2624	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2625	pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2626	_mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:`0xff`, a, b)
2627	}
2628
2629	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2630	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2631	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2632	///
2633	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2634	#[inline]
2635	#[target_feature(enable = "avx512fp16,avx512vl")]
2636	#[cfg_attr(test, assert_instr(vfmulcph))]
2637	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2638	pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2639	unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2640	}
2641
2642	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2643	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2644	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2645	///
2646	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2647	#[inline]
2648	#[target_feature(enable = "avx512fp16,avx512vl")]
2649	#[cfg_attr(test, assert_instr(vfmulcph))]
2650	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2651	pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2652	_mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2653	}
2654
2655	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2656	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2657	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2658	///
2659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2660	#[inline]
2661	#[target_feature(enable = "avx512fp16")]
2662	#[cfg_attr(test, assert_instr(vfmulcph))]
2663	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2664	pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2665	_mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
2666	}
2667
2668	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2669	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2670	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2671	///
2672	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2673	#[inline]
2674	#[target_feature(enable = "avx512fp16")]
2675	#[cfg_attr(test, assert_instr(vfmulcph))]
2676	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2677	pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2678	_mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2679	}
2680
2681	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2682	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2683	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2684	///
2685	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2686	#[inline]
2687	#[target_feature(enable = "avx512fp16")]
2688	#[cfg_attr(test, assert_instr(vfmulcph))]
2689	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2690	pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2691	_mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2692	}
2693
2694	/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2695	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2696	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2697	///
2698	/// Rounding is done according to the rounding parameter, which can be one of:
2699	///
2700	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2701	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2702	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2703	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2704	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2705	///
2706	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2707	#[inline]
2708	#[target_feature(enable = "avx512fp16")]
2709	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2710	#[rustc_legacy_const_generics(`2`)]
2711	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2712	pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2713	static_assert_rounding!(ROUNDING);
2714	_mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
2715	}
2716
2717	/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2718	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2719	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2720	///
2721	/// Rounding is done according to the rounding parameter, which can be one of:
2722	///
2723	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2724	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2725	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2726	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2727	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2728	///
2729	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2730	#[inline]
2731	#[target_feature(enable = "avx512fp16")]
2732	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2733	#[rustc_legacy_const_generics(`4`)]
2734	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2735	pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2736	src: __m512h,
2737	k: __mmask16,
2738	a: __m512h,
2739	b: __m512h,
2740	) -> __m512h {
2741	unsafe {
2742	static_assert_rounding!(ROUNDING);
2743	transmute(src:vfmulcph_512(
2744	a:transmute(a),
2745	b:transmute(b),
2746	src:transmute(src),
2747	k,
2748	ROUNDING,
2749	))
2750	}
2751	}
2752
2753	/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2754	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2755	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2756	///
2757	/// Rounding is done according to the rounding parameter, which can be one of:
2758	///
2759	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2760	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2761	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2762	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2763	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2764	///
2765	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2766	#[inline]
2767	#[target_feature(enable = "avx512fp16")]
2768	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2769	#[rustc_legacy_const_generics(`3`)]
2770	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2771	pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2772	k: __mmask16,
2773	a: __m512h,
2774	b: __m512h,
2775	) -> __m512h {
2776	static_assert_rounding!(ROUNDING);
2777	_mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2778	}
2779
2780	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2781	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2782	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2783	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2784	///
2785	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2786	#[inline]
2787	#[target_feature(enable = "avx512fp16")]
2788	#[cfg_attr(test, assert_instr(vfmulcsh))]
2789	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2790	pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2791	_mm_mask_mul_sch(src:_mm_undefined_ph(), k:`0xff`, a, b)
2792	}
2793
2794	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2795	/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2796	/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2797	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2798	///
2799	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2800	#[inline]
2801	#[target_feature(enable = "avx512fp16")]
2802	#[cfg_attr(test, assert_instr(vfmulcsh))]
2803	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2804	pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2805	_mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2806	}
2807
2808	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2809	/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2810	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2811	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2812	///
2813	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2814	#[inline]
2815	#[target_feature(enable = "avx512fp16")]
2816	#[cfg_attr(test, assert_instr(vfmulcsh))]
2817	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2818	pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2819	_mm_mask_mul_sch(src:_mm_setzero_ph(), k, a, b)
2820	}
2821
2822	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2823	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2824	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2825	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2826	///
2827	/// Rounding is done according to the rounding parameter, which can be one of:
2828	///
2829	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2830	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2831	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2832	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2833	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2834	///
2835	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
2836	#[inline]
2837	#[target_feature(enable = "avx512fp16")]
2838	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2839	#[rustc_legacy_const_generics(`2`)]
2840	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2841	pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2842	static_assert_rounding!(ROUNDING);
2843	_mm_mask_mul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
2844	}
2845
2846	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2847	/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2848	/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2849	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2850	///
2851	/// Rounding is done according to the rounding parameter, which can be one of:
2852	///
2853	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2854	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2855	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2856	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2857	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2858	///
2859	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
2860	#[inline]
2861	#[target_feature(enable = "avx512fp16")]
2862	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2863	#[rustc_legacy_const_generics(`4`)]
2864	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2865	pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
2866	src: __m128h,
2867	k: __mmask8,
2868	a: __m128h,
2869	b: __m128h,
2870	) -> __m128h {
2871	unsafe {
2872	static_assert_rounding!(ROUNDING);
2873	transmute(src:vfmulcsh(
2874	a:transmute(a),
2875	b:transmute(b),
2876	src:transmute(src),
2877	k,
2878	ROUNDING,
2879	))
2880	}
2881	}
2882
2883	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2884	/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2885	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2886	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2887	///
2888	/// Rounding is done according to the rounding parameter, which can be one of:
2889	///
2890	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2891	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2892	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2893	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2894	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2895	///
2896	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
2897	#[inline]
2898	#[target_feature(enable = "avx512fp16")]
2899	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2900	#[rustc_legacy_const_generics(`3`)]
2901	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2902	pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
2903	k: __mmask8,
2904	a: __m128h,
2905	b: __m128h,
2906	) -> __m128h {
2907	static_assert_rounding!(ROUNDING);
2908	_mm_mask_mul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2909	}
2910
2911	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2912	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2913	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2914	///
2915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
2916	#[inline]
2917	#[target_feature(enable = "avx512fp16,avx512vl")]
2918	#[cfg_attr(test, assert_instr(vfmulcph))]
2919	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2920	pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
2921	_mm_mul_pch(a, b)
2922	}
2923
2924	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2925	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2926	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2927	///
2928	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
2929	#[inline]
2930	#[target_feature(enable = "avx512fp16,avx512vl")]
2931	#[cfg_attr(test, assert_instr(vfmulcph))]
2932	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2933	pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2934	_mm_mask_mul_pch(src, k, a, b)
2935	}
2936
2937	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2938	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2939	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2940	///
2941	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
2942	#[inline]
2943	#[target_feature(enable = "avx512fp16,avx512vl")]
2944	#[cfg_attr(test, assert_instr(vfmulcph))]
2945	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2946	pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2947	_mm_maskz_mul_pch(k, a, b)
2948	}
2949
2950	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2951	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2952	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2953	///
2954	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
2955	#[inline]
2956	#[target_feature(enable = "avx512fp16,avx512vl")]
2957	#[cfg_attr(test, assert_instr(vfmulcph))]
2958	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2959	pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
2960	_mm256_mul_pch(a, b)
2961	}
2962
2963	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2964	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2965	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2966	///
2967	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
2968	#[inline]
2969	#[target_feature(enable = "avx512fp16,avx512vl")]
2970	#[cfg_attr(test, assert_instr(vfmulcph))]
2971	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2972	pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2973	_mm256_mask_mul_pch(src, k, a, b)
2974	}
2975
2976	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2977	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2978	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2979	///
2980	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
2981	#[inline]
2982	#[target_feature(enable = "avx512fp16,avx512vl")]
2983	#[cfg_attr(test, assert_instr(vfmulcph))]
2984	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2985	pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2986	_mm256_maskz_mul_pch(k, a, b)
2987	}
2988
2989	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
2990	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2991	///
2992	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
2993	#[inline]
2994	#[target_feature(enable = "avx512fp16")]
2995	#[cfg_attr(test, assert_instr(vfmulcph))]
2996	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2997	pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
2998	_mm512_mul_pch(a, b)
2999	}
3000
3001	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3002	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3003	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3004	///
3005	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3006	#[inline]
3007	#[target_feature(enable = "avx512fp16")]
3008	#[cfg_attr(test, assert_instr(vfmulcph))]
3009	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3010	pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3011	_mm512_mask_mul_pch(src, k, a, b)
3012	}
3013
3014	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3015	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3016	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3017	///
3018	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3019	#[inline]
3020	#[target_feature(enable = "avx512fp16")]
3021	#[cfg_attr(test, assert_instr(vfmulcph))]
3022	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3023	pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3024	_mm512_maskz_mul_pch(k, a, b)
3025	}
3026
3027	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3028	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3029	/// Rounding is done according to the rounding parameter, which can be one of:
3030	///
3031	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3032	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3033	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3034	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3035	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3036	///
3037	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3038	#[inline]
3039	#[target_feature(enable = "avx512fp16")]
3040	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3041	#[rustc_legacy_const_generics(`2`)]
3042	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3043	pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3044	static_assert_rounding!(ROUNDING);
3045	_mm512_mul_round_pch::<ROUNDING>(a, b)
3046	}
3047
3048	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3049	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3050	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3051	/// Rounding is done according to the rounding parameter, which can be one of:
3052	///
3053	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3054	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3055	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3056	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3057	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3058	///
3059	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3060	#[inline]
3061	#[target_feature(enable = "avx512fp16")]
3062	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3063	#[rustc_legacy_const_generics(`4`)]
3064	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3065	pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3066	src: __m512h,
3067	k: __mmask16,
3068	a: __m512h,
3069	b: __m512h,
3070	) -> __m512h {
3071	static_assert_rounding!(ROUNDING);
3072	_mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3073	}
3074
3075	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3076	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3077	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3078	/// Rounding is done according to the rounding parameter, which can be one of:
3079	///
3080	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3081	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3082	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3083	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3084	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3085	///
3086	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3087	#[inline]
3088	#[target_feature(enable = "avx512fp16")]
3089	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3090	#[rustc_legacy_const_generics(`3`)]
3091	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3092	pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3093	k: __mmask16,
3094	a: __m512h,
3095	b: __m512h,
3096	) -> __m512h {
3097	static_assert_rounding!(ROUNDING);
3098	_mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3099	}
3100
3101	/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3102	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3103	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3104	///
3105	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3106	#[inline]
3107	#[target_feature(enable = "avx512fp16")]
3108	#[cfg_attr(test, assert_instr(vfmulcsh))]
3109	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3110	pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3111	_mm_mul_sch(a, b)
3112	}
3113
3114	/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3115	/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3116	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3117	///
3118	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3119	#[inline]
3120	#[target_feature(enable = "avx512fp16")]
3121	#[cfg_attr(test, assert_instr(vfmulcsh))]
3122	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3123	pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3124	_mm_mask_mul_sch(src, k, a, b)
3125	}
3126
3127	/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3128	/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3129	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3130	///
3131	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3132	#[inline]
3133	#[target_feature(enable = "avx512fp16")]
3134	#[cfg_attr(test, assert_instr(vfmulcsh))]
3135	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3136	pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3137	_mm_maskz_mul_sch(k, a, b)
3138	}
3139
3140	/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3141	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3142	///
3143	/// Rounding is done according to the rounding parameter, which can be one of:
3144	///
3145	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3146	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3147	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3148	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3149	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3150	///
3151	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3152	#[inline]
3153	#[target_feature(enable = "avx512fp16")]
3154	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3155	#[rustc_legacy_const_generics(`2`)]
3156	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3157	pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3158	static_assert_rounding!(ROUNDING);
3159	_mm_mul_round_sch::<ROUNDING>(a, b)
3160	}
3161
3162	/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3163	/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3164	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3165	///
3166	/// Rounding is done according to the rounding parameter, which can be one of:
3167	///
3168	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3169	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3170	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3171	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3172	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3173	///
3174	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3175	#[inline]
3176	#[target_feature(enable = "avx512fp16")]
3177	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3178	#[rustc_legacy_const_generics(`4`)]
3179	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3180	pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3181	src: __m128h,
3182	k: __mmask8,
3183	a: __m128h,
3184	b: __m128h,
3185	) -> __m128h {
3186	static_assert_rounding!(ROUNDING);
3187	_mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3188	}
3189
3190	/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3191	/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3192	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3193	///
3194	/// Rounding is done according to the rounding parameter, which can be one of:
3195	///
3196	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3197	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3198	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3199	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3200	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3201	///
3202	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3203	#[inline]
3204	#[target_feature(enable = "avx512fp16")]
3205	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3206	#[rustc_legacy_const_generics(`3`)]
3207	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3208	pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3209	k: __mmask8,
3210	a: __m128h,
3211	b: __m128h,
3212	) -> __m128h {
3213	static_assert_rounding!(ROUNDING);
3214	_mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3215	}
3216
3217	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3218	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3219	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3220	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3221	///
3222	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3223	#[inline]
3224	#[target_feature(enable = "avx512fp16,avx512vl")]
3225	#[cfg_attr(test, assert_instr(vfcmulcph))]
3226	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3227	pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3228	_mm_mask_cmul_pch(src:_mm_undefined_ph(), k:`0xff`, a, b)
3229	}
3230
3231	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3232	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3233	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3234	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3235	///
3236	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3237	#[inline]
3238	#[target_feature(enable = "avx512fp16,avx512vl")]
3239	#[cfg_attr(test, assert_instr(vfcmulcph))]
3240	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3241	pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3242	unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3243	}
3244
3245	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3246	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3247	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3248	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3249	///
3250	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3251	#[inline]
3252	#[target_feature(enable = "avx512fp16,avx512vl")]
3253	#[cfg_attr(test, assert_instr(vfcmulcph))]
3254	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3255	pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3256	_mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3257	}
3258
3259	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3260	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3261	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3262	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3263	///
3264	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3265	#[inline]
3266	#[target_feature(enable = "avx512fp16,avx512vl")]
3267	#[cfg_attr(test, assert_instr(vfcmulcph))]
3268	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3269	pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3270	_mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:`0xff`, a, b)
3271	}
3272
3273	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3274	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3275	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3276	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3277	///
3278	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3279	#[inline]
3280	#[target_feature(enable = "avx512fp16,avx512vl")]
3281	#[cfg_attr(test, assert_instr(vfcmulcph))]
3282	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3283	pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3284	unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3285	}
3286
3287	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3288	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3289	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3290	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3291	///
3292	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3293	#[inline]
3294	#[target_feature(enable = "avx512fp16,avx512vl")]
3295	#[cfg_attr(test, assert_instr(vfcmulcph))]
3296	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3297	pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3298	_mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3299	}
3300
3301	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3302	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3303	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3304	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3305	///
3306	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3307	#[inline]
3308	#[target_feature(enable = "avx512fp16")]
3309	#[cfg_attr(test, assert_instr(vfcmulcph))]
3310	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3311	pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3312	_mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
3313	}
3314
3315	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3316	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3317	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3318	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3319	///
3320	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3321	#[inline]
3322	#[target_feature(enable = "avx512fp16")]
3323	#[cfg_attr(test, assert_instr(vfcmulcph))]
3324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3325	pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3326	_mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3327	}
3328
3329	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3330	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3331	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3332	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3333	///
3334	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3335	#[inline]
3336	#[target_feature(enable = "avx512fp16")]
3337	#[cfg_attr(test, assert_instr(vfcmulcph))]
3338	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3339	pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3340	_mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3341	}
3342
3343	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3344	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3345	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3346	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3347	///
3348	/// Rounding is done according to the rounding parameter, which can be one of:
3349	///
3350	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3351	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3352	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3353	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3354	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3355	///
3356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3357	#[inline]
3358	#[target_feature(enable = "avx512fp16")]
3359	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3360	#[rustc_legacy_const_generics(`2`)]
3361	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3362	pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3363	static_assert_rounding!(ROUNDING);
3364	_mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
3365	}
3366
3367	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3368	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3369	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3370	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3371	///
3372	/// Rounding is done according to the rounding parameter, which can be one of:
3373	///
3374	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3375	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3376	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3377	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3378	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3379	///
3380	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3381	#[inline]
3382	#[target_feature(enable = "avx512fp16")]
3383	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3384	#[rustc_legacy_const_generics(`4`)]
3385	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3386	pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3387	src: __m512h,
3388	k: __mmask16,
3389	a: __m512h,
3390	b: __m512h,
3391	) -> __m512h {
3392	unsafe {
3393	static_assert_rounding!(ROUNDING);
3394	transmute(src:vfcmulcph_512(
3395	a:transmute(a),
3396	b:transmute(b),
3397	src:transmute(src),
3398	k,
3399	ROUNDING,
3400	))
3401	}
3402	}
3403
3404	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3405	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3406	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3407	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3408	///
3409	/// Rounding is done according to the rounding parameter, which can be one of:
3410	///
3411	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3412	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3413	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3414	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3415	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3416	///
3417	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3418	#[inline]
3419	#[target_feature(enable = "avx512fp16")]
3420	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3421	#[rustc_legacy_const_generics(`3`)]
3422	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3423	pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3424	k: __mmask16,
3425	a: __m512h,
3426	b: __m512h,
3427	) -> __m512h {
3428	static_assert_rounding!(ROUNDING);
3429	_mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3430	}
3431
3432	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3433	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3434	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3435	///
3436	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3437	#[inline]
3438	#[target_feature(enable = "avx512fp16")]
3439	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3440	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3441	pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3442	_mm_mask_cmul_sch(src:_mm_undefined_ph(), k:`0xff`, a, b)
3443	}
3444
3445	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3446	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3447	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3448	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3449	///
3450	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3451	#[inline]
3452	#[target_feature(enable = "avx512fp16")]
3453	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3454	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3455	pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3456	_mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3457	}
3458
3459	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3460	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3461	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3462	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3463	///
3464	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3465	#[inline]
3466	#[target_feature(enable = "avx512fp16")]
3467	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3468	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3469	pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3470	_mm_mask_cmul_sch(src:_mm_setzero_ph(), k, a, b)
3471	}
3472
3473	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3474	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3475	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3476	///
3477	/// Rounding is done according to the rounding parameter, which can be one of:
3478	///
3479	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3480	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3481	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3482	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3483	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3484	///
3485	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3486	#[inline]
3487	#[target_feature(enable = "avx512fp16")]
3488	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3489	#[rustc_legacy_const_generics(`2`)]
3490	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3491	pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3492	static_assert_rounding!(ROUNDING);
3493	_mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
3494	}
3495
3496	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3497	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3498	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3499	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3500	///
3501	/// Rounding is done according to the rounding parameter, which can be one of:
3502	///
3503	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3504	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3505	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3506	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3507	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3508	///
3509	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3510	#[inline]
3511	#[target_feature(enable = "avx512fp16")]
3512	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3513	#[rustc_legacy_const_generics(`4`)]
3514	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3515	pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3516	src: __m128h,
3517	k: __mmask8,
3518	a: __m128h,
3519	b: __m128h,
3520	) -> __m128h {
3521	unsafe {
3522	static_assert_rounding!(ROUNDING);
3523	transmute(src:vfcmulcsh(
3524	a:transmute(a),
3525	b:transmute(b),
3526	src:transmute(src),
3527	k,
3528	ROUNDING,
3529	))
3530	}
3531	}
3532
3533	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3534	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3535	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3536	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3537	///
3538	/// Rounding is done according to the rounding parameter, which can be one of:
3539	///
3540	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3541	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3542	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3543	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3544	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3545	///
3546	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3547	#[inline]
3548	#[target_feature(enable = "avx512fp16")]
3549	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3550	#[rustc_legacy_const_generics(`3`)]
3551	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3552	pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3553	k: __mmask8,
3554	a: __m128h,
3555	b: __m128h,
3556	) -> __m128h {
3557	static_assert_rounding!(ROUNDING);
3558	_mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
3559	}
3560
3561	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3562	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3563	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3564	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3565	///
3566	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3567	#[inline]
3568	#[target_feature(enable = "avx512fp16,avx512vl")]
3569	#[cfg_attr(test, assert_instr(vfcmulcph))]
3570	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3571	pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3572	_mm_cmul_pch(a, b)
3573	}
3574
3575	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3576	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3577	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3578	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3579	///
3580	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3581	#[inline]
3582	#[target_feature(enable = "avx512fp16,avx512vl")]
3583	#[cfg_attr(test, assert_instr(vfcmulcph))]
3584	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3585	pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3586	_mm_mask_cmul_pch(src, k, a, b)
3587	}
3588
3589	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3590	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3591	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3592	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3593	///
3594	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3595	#[inline]
3596	#[target_feature(enable = "avx512fp16,avx512vl")]
3597	#[cfg_attr(test, assert_instr(vfcmulcph))]
3598	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3599	pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3600	_mm_maskz_cmul_pch(k, a, b)
3601	}
3602
3603	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3604	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3605	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3606	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3607	///
3608	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3609	#[inline]
3610	#[target_feature(enable = "avx512fp16,avx512vl")]
3611	#[cfg_attr(test, assert_instr(vfcmulcph))]
3612	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3613	pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3614	_mm256_cmul_pch(a, b)
3615	}
3616
3617	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3618	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3619	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3620	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3621	///
3622	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3623	#[inline]
3624	#[target_feature(enable = "avx512fp16,avx512vl")]
3625	#[cfg_attr(test, assert_instr(vfcmulcph))]
3626	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3627	pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3628	_mm256_mask_cmul_pch(src, k, a, b)
3629	}
3630
3631	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3632	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3633	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3634	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3635	///
3636	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3637	#[inline]
3638	#[target_feature(enable = "avx512fp16,avx512vl")]
3639	#[cfg_attr(test, assert_instr(vfcmulcph))]
3640	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3641	pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3642	_mm256_maskz_cmul_pch(k, a, b)
3643	}
3644
3645	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3646	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3647	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3648	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3649	///
3650	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3651	#[inline]
3652	#[target_feature(enable = "avx512fp16")]
3653	#[cfg_attr(test, assert_instr(vfcmulcph))]
3654	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3655	pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3656	_mm512_cmul_pch(a, b)
3657	}
3658
3659	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3660	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3661	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3662	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3663	///
3664	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3665	#[inline]
3666	#[target_feature(enable = "avx512fp16")]
3667	#[cfg_attr(test, assert_instr(vfcmulcph))]
3668	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3669	pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3670	_mm512_mask_cmul_pch(src, k, a, b)
3671	}
3672
3673	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3674	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3675	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3676	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3677	///
3678	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3679	#[inline]
3680	#[target_feature(enable = "avx512fp16")]
3681	#[cfg_attr(test, assert_instr(vfcmulcph))]
3682	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3683	pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3684	_mm512_maskz_cmul_pch(k, a, b)
3685	}
3686
3687	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3688	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3689	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3690	///
3691	/// Rounding is done according to the rounding parameter, which can be one of:
3692	///
3693	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3694	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3695	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3696	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3697	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3698	///
3699	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3700	#[inline]
3701	#[target_feature(enable = "avx512fp16")]
3702	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3703	#[rustc_legacy_const_generics(`2`)]
3704	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3705	pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3706	static_assert_rounding!(ROUNDING);
3707	_mm512_cmul_round_pch::<ROUNDING>(a, b)
3708	}
3709
3710	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3711	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3712	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3713	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3714	///
3715	/// Rounding is done according to the rounding parameter, which can be one of:
3716	///
3717	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3718	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3719	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3720	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3721	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3722	///
3723	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3724	#[inline]
3725	#[target_feature(enable = "avx512fp16")]
3726	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3727	#[rustc_legacy_const_generics(`4`)]
3728	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3729	pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3730	src: __m512h,
3731	k: __mmask16,
3732	a: __m512h,
3733	b: __m512h,
3734	) -> __m512h {
3735	static_assert_rounding!(ROUNDING);
3736	_mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3737	}
3738
3739	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3740	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3741	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3742	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3743	///
3744	/// Rounding is done according to the rounding parameter, which can be one of:
3745	///
3746	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3747	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3748	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3749	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3750	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3751	///
3752	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3753	#[inline]
3754	#[target_feature(enable = "avx512fp16")]
3755	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3756	#[rustc_legacy_const_generics(`3`)]
3757	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3758	pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3759	k: __mmask16,
3760	a: __m512h,
3761	b: __m512h,
3762	) -> __m512h {
3763	static_assert_rounding!(ROUNDING);
3764	_mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3765	}
3766
3767	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3768	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3769	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3770	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3771	///
3772	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3773	#[inline]
3774	#[target_feature(enable = "avx512fp16")]
3775	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3776	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3777	pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3778	_mm_cmul_sch(a, b)
3779	}
3780
3781	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3782	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3783	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3784	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3785	///
3786	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3787	#[inline]
3788	#[target_feature(enable = "avx512fp16")]
3789	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3790	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3791	pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3792	_mm_mask_cmul_sch(src, k, a, b)
3793	}
3794
3795	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3796	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3797	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3798	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3799	///
3800	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3801	#[inline]
3802	#[target_feature(enable = "avx512fp16")]
3803	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3804	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3805	pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3806	_mm_maskz_cmul_sch(k, a, b)
3807	}
3808
3809	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3810	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3811	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3812	///
3813	/// Rounding is done according to the rounding parameter, which can be one of:
3814	///
3815	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3816	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3817	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3818	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3819	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3820	///
3821	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
3822	#[inline]
3823	#[target_feature(enable = "avx512fp16")]
3824	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3825	#[rustc_legacy_const_generics(`2`)]
3826	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3827	pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3828	static_assert_rounding!(ROUNDING);
3829	_mm_cmul_round_sch::<ROUNDING>(a, b)
3830	}
3831
3832	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3833	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3834	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3835	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3836	///
3837	/// Rounding is done according to the rounding parameter, which can be one of:
3838	///
3839	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3840	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3841	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3842	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3843	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3844	///
3845	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
3846	#[inline]
3847	#[target_feature(enable = "avx512fp16")]
3848	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3849	#[rustc_legacy_const_generics(`4`)]
3850	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3851	pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
3852	src: __m128h,
3853	k: __mmask8,
3854	a: __m128h,
3855	b: __m128h,
3856	) -> __m128h {
3857	static_assert_rounding!(ROUNDING);
3858	_mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
3859	}
3860
3861	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3862	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3863	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3864	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3865	///
3866	/// Rounding is done according to the rounding parameter, which can be one of:
3867	///
3868	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3869	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3870	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3871	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3872	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3873	///
3874	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
3875	#[inline]
3876	#[target_feature(enable = "avx512fp16")]
3877	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3878	#[rustc_legacy_const_generics(`3`)]
3879	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3880	pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
3881	k: __mmask8,
3882	a: __m128h,
3883	b: __m128h,
3884	) -> __m128h {
3885	static_assert_rounding!(ROUNDING);
3886	_mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
3887	}
3888
3889	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3890	/// the results in dst.
3891	///
3892	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
3893	#[inline]
3894	#[target_feature(enable = "avx512fp16,avx512vl")]
3895	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3896	pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
3897	unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
3898	}
3899
3900	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3901	/// the result in dst.
3902	///
3903	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
3904	#[inline]
3905	#[target_feature(enable = "avx512fp16,avx512vl")]
3906	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3907	pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
3908	unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
3909	}
3910
3911	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3912	/// the result in dst.
3913	///
3914	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
3915	#[inline]
3916	#[target_feature(enable = "avx512fp16")]
3917	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3918	pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
3919	unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
3920	}
3921
3922	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
3923	/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
3924	/// the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate*
3925	/// `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3926	///
3927	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
3928	#[inline]
3929	#[target_feature(enable = "avx512fp16,avx512vl")]
3930	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3931	pub fn _mm_conj_pch(a: __m128h) -> __m128h {
3932	unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
3933	}
3934
3935	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3936	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3937	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
3938	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3939	///
3940	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
3941	#[inline]
3942	#[target_feature(enable = "avx512fp16,avx512vl")]
3943	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3944	pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
3945	unsafe {
3946	let r: __m128 = transmute(src:_mm_conj_pch(a));
3947	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3948	}
3949	}
3950
3951	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3952	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3953	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3954	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3955	///
3956	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
3957	#[inline]
3958	#[target_feature(enable = "avx512fp16,avx512vl")]
3959	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3960	pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
3961	_mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
3962	}
3963
3964	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
3965	/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3966	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3967	///
3968	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
3969	#[inline]
3970	#[target_feature(enable = "avx512fp16,avx512vl")]
3971	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3972	pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
3973	unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
3974	}
3975
3976	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3977	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3978	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3979	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3980	///
3981	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
3982	#[inline]
3983	#[target_feature(enable = "avx512fp16,avx512vl")]
3984	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3985	pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
3986	unsafe {
3987	let r: __m256 = transmute(src:_mm256_conj_pch(a));
3988	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3989	}
3990	}
3991
3992	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3993	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3994	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3995	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3996	///
3997	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
3998	#[inline]
3999	#[target_feature(enable = "avx512fp16,avx512vl")]
4000	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4001	pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4002	_mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4003	}
4004
4005	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4006	/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4007	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4008	///
4009	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4010	#[inline]
4011	#[target_feature(enable = "avx512fp16")]
4012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4013	pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
4014	unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4015	}
4016
4017	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4018	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4019	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4020	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4021	///
4022	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4023	#[inline]
4024	#[target_feature(enable = "avx512fp16")]
4025	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4026	pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4027	unsafe {
4028	let r: __m512 = transmute(src:_mm512_conj_pch(a));
4029	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4030	}
4031	}
4032
4033	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4034	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4035	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4036	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4037	///
4038	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4039	#[inline]
4040	#[target_feature(enable = "avx512fp16")]
4041	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4042	pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4043	_mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4044	}
4045
4046	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4047	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4048	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4049	///
4050	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4051	#[inline]
4052	#[target_feature(enable = "avx512fp16,avx512vl")]
4053	#[cfg_attr(test, assert_instr(vfmaddcph))]
4054	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4055	pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4056	_mm_mask3_fmadd_pch(a, b, c, k:`0xff`)
4057	}
4058
4059	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4060	/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4061	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4062	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4063	///
4064	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4065	#[inline]
4066	#[target_feature(enable = "avx512fp16,avx512vl")]
4067	#[cfg_attr(test, assert_instr(vfmaddcph))]
4068	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4069	pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4070	unsafe {
4071	let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4072	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4073	}
4074	}
4075
4076	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4077	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4078	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4079	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4080	///
4081	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4082	#[inline]
4083	#[target_feature(enable = "avx512fp16,avx512vl")]
4084	#[cfg_attr(test, assert_instr(vfmaddcph))]
4085	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4086	pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4087	unsafe {
4088	transmute(src:vfmaddcph_mask3_128(
4089	a:transmute(a),
4090	b:transmute(b),
4091	c:transmute(src:c),
4092	k,
4093	))
4094	}
4095	}
4096
4097	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4098	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4099	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4100	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4101	///
4102	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4103	#[inline]
4104	#[target_feature(enable = "avx512fp16,avx512vl")]
4105	#[cfg_attr(test, assert_instr(vfmaddcph))]
4106	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4107	pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4108	unsafe {
4109	transmute(src:vfmaddcph_maskz_128(
4110	a:transmute(a),
4111	b:transmute(b),
4112	c:transmute(src:c),
4113	k,
4114	))
4115	}
4116	}
4117
4118	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4119	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4120	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4121	///
4122	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4123	#[inline]
4124	#[target_feature(enable = "avx512fp16,avx512vl")]
4125	#[cfg_attr(test, assert_instr(vfmaddcph))]
4126	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4127	pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4128	_mm256_mask3_fmadd_pch(a, b, c, k:`0xff`)
4129	}
4130
4131	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4132	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4133	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4134	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4135	///
4136	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4137	#[inline]
4138	#[target_feature(enable = "avx512fp16,avx512vl")]
4139	#[cfg_attr(test, assert_instr(vfmaddcph))]
4140	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4141	pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4142	unsafe {
4143	let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4144	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4145	}
4146	}
4147
4148	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4149	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4150	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4151	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4152	///
4153	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4154	#[inline]
4155	#[target_feature(enable = "avx512fp16,avx512vl")]
4156	#[cfg_attr(test, assert_instr(vfmaddcph))]
4157	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4158	pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4159	unsafe {
4160	transmute(src:vfmaddcph_mask3_256(
4161	a:transmute(a),
4162	b:transmute(b),
4163	c:transmute(src:c),
4164	k,
4165	))
4166	}
4167	}
4168
4169	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4170	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4171	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4172	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4173	///
4174	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4175	#[inline]
4176	#[target_feature(enable = "avx512fp16,avx512vl")]
4177	#[cfg_attr(test, assert_instr(vfmaddcph))]
4178	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4179	pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4180	unsafe {
4181	transmute(src:vfmaddcph_maskz_256(
4182	a:transmute(a),
4183	b:transmute(b),
4184	c:transmute(src:c),
4185	k,
4186	))
4187	}
4188	}
4189
4190	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4191	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4192	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4193	///
4194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4195	#[inline]
4196	#[target_feature(enable = "avx512fp16")]
4197	#[cfg_attr(test, assert_instr(vfmaddcph))]
4198	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4199	pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4200	_mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4201	}
4202
4203	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4204	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4205	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4206	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4207	///
4208	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4209	#[inline]
4210	#[target_feature(enable = "avx512fp16")]
4211	#[cfg_attr(test, assert_instr(vfmaddcph))]
4212	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4213	pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4214	_mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4215	}
4216
4217	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4218	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4219	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4220	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4221	///
4222	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4223	#[inline]
4224	#[target_feature(enable = "avx512fp16")]
4225	#[cfg_attr(test, assert_instr(vfmaddcph))]
4226	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4227	pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4228	_mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4229	}
4230
4231	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4232	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4233	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4234	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4235	///
4236	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4237	#[inline]
4238	#[target_feature(enable = "avx512fp16")]
4239	#[cfg_attr(test, assert_instr(vfmaddcph))]
4240	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4241	pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4242	_mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4243	}
4244
4245	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4246	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4247	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4248	///
4249	/// Rounding is done according to the rounding parameter, which can be one of:
4250	///
4251	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4252	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4253	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4254	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4255	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4256	///
4257	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4258	#[inline]
4259	#[target_feature(enable = "avx512fp16")]
4260	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4261	#[rustc_legacy_const_generics(`3`)]
4262	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4263	pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4264	static_assert_rounding!(ROUNDING);
4265	_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:`0xffff`)
4266	}
4267
4268	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4269	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4270	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4271	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4272	///
4273	/// Rounding is done according to the rounding parameter, which can be one of:
4274	///
4275	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4276	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4277	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4278	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4279	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4280	///
4281	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4282	#[inline]
4283	#[target_feature(enable = "avx512fp16")]
4284	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4285	#[rustc_legacy_const_generics(`4`)]
4286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4287	pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4288	a: __m512h,
4289	k: __mmask16,
4290	b: __m512h,
4291	c: __m512h,
4292	) -> __m512h {
4293	unsafe {
4294	static_assert_rounding!(ROUNDING);
4295	let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4296	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4297	}
4298	}
4299
4300	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4301	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4302	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4303	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4304	///
4305	/// Rounding is done according to the rounding parameter, which can be one of:
4306	///
4307	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4308	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4309	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4310	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4311	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4312	///
4313	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4314	#[inline]
4315	#[target_feature(enable = "avx512fp16")]
4316	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4317	#[rustc_legacy_const_generics(`4`)]
4318	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4319	pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4320	a: __m512h,
4321	b: __m512h,
4322	c: __m512h,
4323	k: __mmask16,
4324	) -> __m512h {
4325	unsafe {
4326	static_assert_rounding!(ROUNDING);
4327	transmute(src:vfmaddcph_mask3_512(
4328	a:transmute(a),
4329	b:transmute(b),
4330	c:transmute(src:c),
4331	k,
4332	ROUNDING,
4333	))
4334	}
4335	}
4336
4337	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4338	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4339	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4340	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4341	///
4342	/// Rounding is done according to the rounding parameter, which can be one of:
4343	///
4344	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4345	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4346	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4347	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4348	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4349	///
4350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4351	#[inline]
4352	#[target_feature(enable = "avx512fp16")]
4353	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4354	#[rustc_legacy_const_generics(`4`)]
4355	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4356	pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4357	k: __mmask16,
4358	a: __m512h,
4359	b: __m512h,
4360	c: __m512h,
4361	) -> __m512h {
4362	unsafe {
4363	static_assert_rounding!(ROUNDING);
4364	transmute(src:vfmaddcph_maskz_512(
4365	a:transmute(a),
4366	b:transmute(b),
4367	c:transmute(src:c),
4368	k,
4369	ROUNDING,
4370	))
4371	}
4372	}
4373
4374	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4375	/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4376	/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4377	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4378	///
4379	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4380	#[inline]
4381	#[target_feature(enable = "avx512fp16")]
4382	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4383	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4384	pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4385	_mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4386	}
4387
4388	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4389	/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4390	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4391	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4392	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4393	///
4394	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4395	#[inline]
4396	#[target_feature(enable = "avx512fp16")]
4397	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4398	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4399	pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4400	_mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4401	}
4402
4403	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4404	/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4405	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4406	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4407	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4408	///
4409	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4410	#[inline]
4411	#[target_feature(enable = "avx512fp16")]
4412	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4413	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4414	pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4415	_mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4416	}
4417
4418	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4419	/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4420	/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4421	/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4422	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4423	///
4424	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4425	#[inline]
4426	#[target_feature(enable = "avx512fp16")]
4427	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4428	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4429	pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4430	_mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4431	}
4432
4433	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4434	/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4435	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4436	///
4437	/// Rounding is done according to the rounding parameter, which can be one of:
4438	///
4439	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4440	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4441	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4442	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4443	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4444	///
4445	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4446	#[inline]
4447	#[target_feature(enable = "avx512fp16")]
4448	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4449	#[rustc_legacy_const_generics(`3`)]
4450	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4451	pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4452	unsafe {
4453	static_assert_rounding!(ROUNDING);
4454	transmute(src:vfmaddcsh_mask(
4455	a:transmute(a),
4456	b:transmute(b),
4457	c:transmute(c),
4458	k:`0xff`,
4459	ROUNDING,
4460	))
4461	}
4462	}
4463
4464	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4465	/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4466	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4467	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4468	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4469	///
4470	/// Rounding is done according to the rounding parameter, which can be one of:
4471	///
4472	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4473	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4474	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4475	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4476	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4477	///
4478	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4479	#[inline]
4480	#[target_feature(enable = "avx512fp16")]
4481	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4482	#[rustc_legacy_const_generics(`4`)]
4483	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4484	pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4485	a: __m128h,
4486	k: __mmask8,
4487	b: __m128h,
4488	c: __m128h,
4489	) -> __m128h {
4490	unsafe {
4491	static_assert_rounding!(ROUNDING);
4492	let a: __m128 = transmute(src:a);
4493	let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4494	transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4495	}
4496	}
4497
4498	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4499	/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4500	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4501	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4502	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4503	///
4504	/// Rounding is done according to the rounding parameter, which can be one of:
4505	///
4506	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4507	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4508	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4509	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4510	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4511	///
4512	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4513	#[inline]
4514	#[target_feature(enable = "avx512fp16")]
4515	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4516	#[rustc_legacy_const_generics(`4`)]
4517	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4518	pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4519	a: __m128h,
4520	b: __m128h,
4521	c: __m128h,
4522	k: __mmask8,
4523	) -> __m128h {
4524	unsafe {
4525	static_assert_rounding!(ROUNDING);
4526	let c: __m128 = transmute(src:c);
4527	let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4528	transmute(src:_mm_move_ss(a:c, b:r))
4529	}
4530	}
4531
4532	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4533	/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4534	/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4535	/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4536	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4537	///
4538	/// Rounding is done according to the rounding parameter, which can be one of:
4539	///
4540	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4541	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4542	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4543	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4544	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4545	///
4546	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4547	#[inline]
4548	#[target_feature(enable = "avx512fp16")]
4549	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4550	#[rustc_legacy_const_generics(`4`)]
4551	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4552	pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4553	k: __mmask8,
4554	a: __m128h,
4555	b: __m128h,
4556	c: __m128h,
4557	) -> __m128h {
4558	unsafe {
4559	static_assert_rounding!(ROUNDING);
4560	let a: __m128 = transmute(src:a);
4561	let r: __m128 = vfmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
4562	transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect
4563	}
4564	}
4565
4566	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4567	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4568	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4569	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4570	///
4571	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4572	#[inline]
4573	#[target_feature(enable = "avx512fp16,avx512vl")]
4574	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4575	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4576	pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4577	_mm_mask3_fcmadd_pch(a, b, c, k:`0xff`)
4578	}
4579
4580	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4581	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4582	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4583	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4584	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4585	///
4586	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4587	#[inline]
4588	#[target_feature(enable = "avx512fp16,avx512vl")]
4589	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4590	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4591	pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4592	unsafe {
4593	let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4594	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4595	}
4596	}
4597
4598	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4599	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4600	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4601	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4602	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4603	///
4604	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4605	#[inline]
4606	#[target_feature(enable = "avx512fp16,avx512vl")]
4607	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4608	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4609	pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4610	unsafe {
4611	transmute(src:vfcmaddcph_mask3_128(
4612	a:transmute(a),
4613	b:transmute(b),
4614	c:transmute(src:c),
4615	k,
4616	))
4617	}
4618	}
4619
4620	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4621	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4622	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4623	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4624	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4625	///
4626	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4627	#[inline]
4628	#[target_feature(enable = "avx512fp16,avx512vl")]
4629	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4630	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4631	pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4632	unsafe {
4633	transmute(src:vfcmaddcph_maskz_128(
4634	a:transmute(a),
4635	b:transmute(b),
4636	c:transmute(src:c),
4637	k,
4638	))
4639	}
4640	}
4641
4642	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4643	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4644	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4645	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4646	///
4647	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4648	#[inline]
4649	#[target_feature(enable = "avx512fp16,avx512vl")]
4650	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4651	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4652	pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4653	_mm256_mask3_fcmadd_pch(a, b, c, k:`0xff`)
4654	}
4655
4656	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4657	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4658	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4659	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4660	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4661	///
4662	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4663	#[inline]
4664	#[target_feature(enable = "avx512fp16,avx512vl")]
4665	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4666	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4667	pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4668	unsafe {
4669	let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4670	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4671	}
4672	}
4673
4674	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4675	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4676	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4677	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4678	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4679	///
4680	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4681	#[inline]
4682	#[target_feature(enable = "avx512fp16,avx512vl")]
4683	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4684	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4685	pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4686	unsafe {
4687	transmute(src:vfcmaddcph_mask3_256(
4688	a:transmute(a),
4689	b:transmute(b),
4690	c:transmute(src:c),
4691	k,
4692	))
4693	}
4694	}
4695
4696	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4697	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4698	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4699	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4700	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4701	///
4702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4703	#[inline]
4704	#[target_feature(enable = "avx512fp16,avx512vl")]
4705	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4706	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4707	pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4708	unsafe {
4709	transmute(src:vfcmaddcph_maskz_256(
4710	a:transmute(a),
4711	b:transmute(b),
4712	c:transmute(src:c),
4713	k,
4714	))
4715	}
4716	}
4717
4718	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4719	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4720	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4721	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4722	///
4723	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4724	#[inline]
4725	#[target_feature(enable = "avx512fp16")]
4726	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4727	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4728	pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4729	_mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4730	}
4731
4732	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4733	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4734	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4735	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4736	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4737	///
4738	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4739	#[inline]
4740	#[target_feature(enable = "avx512fp16")]
4741	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4742	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4743	pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4744	_mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4745	}
4746
4747	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4748	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4749	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4750	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4751	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4752	///
4753	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4754	#[inline]
4755	#[target_feature(enable = "avx512fp16")]
4756	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4757	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4758	pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4759	_mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4760	}
4761
4762	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4763	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4764	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4765	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4766	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4767	///
4768	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4769	#[inline]
4770	#[target_feature(enable = "avx512fp16")]
4771	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4772	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4773	pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4774	_mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4775	}
4776
4777	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4778	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4779	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4780	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4781	///
4782	/// Rounding is done according to the rounding parameter, which can be one of:
4783	///
4784	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4785	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4786	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4787	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4788	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4789	///
4790	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4791	#[inline]
4792	#[target_feature(enable = "avx512fp16")]
4793	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4794	#[rustc_legacy_const_generics(`3`)]
4795	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4796	pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4797	static_assert_rounding!(ROUNDING);
4798	_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:`0xffff`)
4799	}
4800
4801	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4802	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4803	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4804	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4805	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4806	///
4807	/// Rounding is done according to the rounding parameter, which can be one of:
4808	///
4809	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4810	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4811	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4812	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4813	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4814	///
4815	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
4816	#[inline]
4817	#[target_feature(enable = "avx512fp16")]
4818	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4819	#[rustc_legacy_const_generics(`4`)]
4820	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4821	pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
4822	a: __m512h,
4823	k: __mmask16,
4824	b: __m512h,
4825	c: __m512h,
4826	) -> __m512h {
4827	unsafe {
4828	static_assert_rounding!(ROUNDING);
4829	let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4830	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4831	}
4832	}
4833
4834	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4835	/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
4836	/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4837	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
4838	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4839	///
4840	/// Rounding is done according to the rounding parameter, which can be one of:
4841	///
4842	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4843	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4844	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4845	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4846	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4847	///
4848	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
4849	#[inline]
4850	#[target_feature(enable = "avx512fp16")]
4851	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4852	#[rustc_legacy_const_generics(`4`)]
4853	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4854	pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
4855	a: __m512h,
4856	b: __m512h,
4857	c: __m512h,
4858	k: __mmask16,
4859	) -> __m512h {
4860	unsafe {
4861	static_assert_rounding!(ROUNDING);
4862	transmute(src:vfcmaddcph_mask3_512(
4863	a:transmute(a),
4864	b:transmute(b),
4865	c:transmute(src:c),
4866	k,
4867	ROUNDING,
4868	))
4869	}
4870	}
4871
4872	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4873	/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
4874	/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4875	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
4876	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4877	///
4878	/// Rounding is done according to the rounding parameter, which can be one of:
4879	///
4880	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4881	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4882	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4883	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4884	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4885	///
4886	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
4887	#[inline]
4888	#[target_feature(enable = "avx512fp16")]
4889	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4890	#[rustc_legacy_const_generics(`4`)]
4891	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4892	pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
4893	k: __mmask16,
4894	a: __m512h,
4895	b: __m512h,
4896	c: __m512h,
4897	) -> __m512h {
4898	unsafe {
4899	static_assert_rounding!(ROUNDING);
4900	transmute(src:vfcmaddcph_maskz_512(
4901	a:transmute(a),
4902	b:transmute(b),
4903	c:transmute(src:c),
4904	k,
4905	ROUNDING,
4906	))
4907	}
4908	}
4909
4910	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4911	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4912	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4913	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4914	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4915	///
4916	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
4917	#[inline]
4918	#[target_feature(enable = "avx512fp16")]
4919	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4921	pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4922	_mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4923	}
4924
4925	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4926	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4927	/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
4928	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4929	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4930	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4931	///
4932	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
4933	#[inline]
4934	#[target_feature(enable = "avx512fp16")]
4935	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4936	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4937	pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4938	_mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4939	}
4940
4941	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4942	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4943	/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
4944	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4945	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4946	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4947	///
4948	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
4949	#[inline]
4950	#[target_feature(enable = "avx512fp16")]
4951	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4952	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4953	pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4954	_mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4955	}
4956
4957	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4958	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4959	/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
4960	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4961	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4962	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4963	///
4964	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
4965	#[inline]
4966	#[target_feature(enable = "avx512fp16")]
4967	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4968	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4969	pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4970	_mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4971	}
4972
4973	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4974	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4975	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4976	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4977	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4978	///
4979	/// Rounding is done according to the rounding parameter, which can be one of:
4980	///
4981	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4982	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4983	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4984	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4985	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4986	///
4987	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
4988	#[inline]
4989	#[target_feature(enable = "avx512fp16")]
4990	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
4991	#[rustc_legacy_const_generics(`3`)]
4992	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4993	pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4994	unsafe {
4995	static_assert_rounding!(ROUNDING);
4996	transmute(src:vfcmaddcsh_mask(
4997	a:transmute(a),
4998	b:transmute(b),
4999	c:transmute(c),
5000	k:`0xff`,
5001	ROUNDING,
5002	))
5003	}
5004	}
5005
5006	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5007	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5008	/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5009	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5010	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
5011	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5012	///
5013	/// Rounding is done according to the rounding parameter, which can be one of:
5014	///
5015	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5016	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5017	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5018	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5019	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5020	///
5021	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5022	#[inline]
5023	#[target_feature(enable = "avx512fp16")]
5024	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5025	#[rustc_legacy_const_generics(`4`)]
5026	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5027	pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5028	a: __m128h,
5029	k: __mmask8,
5030	b: __m128h,
5031	c: __m128h,
5032	) -> __m128h {
5033	unsafe {
5034	static_assert_rounding!(ROUNDING);
5035	let a: __m128 = transmute(src:a);
5036	let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5037	transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5038	}
5039	}
5040
5041	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5042	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5043	/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5044	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5045	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
5046	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5047	///
5048	/// Rounding is done according to the rounding parameter, which can be one of:
5049	///
5050	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5051	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5052	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5053	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5054	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5055	///
5056	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5057	#[inline]
5058	#[target_feature(enable = "avx512fp16")]
5059	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5060	#[rustc_legacy_const_generics(`4`)]
5061	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5062	pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5063	a: __m128h,
5064	b: __m128h,
5065	c: __m128h,
5066	k: __mmask8,
5067	) -> __m128h {
5068	unsafe {
5069	static_assert_rounding!(ROUNDING);
5070	let c: __m128 = transmute(src:c);
5071	let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5072	transmute(src:_mm_move_ss(a:c, b:r))
5073	}
5074	}
5075
5076	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5077	/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5078	/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5079	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5080	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
5081	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5082	///
5083	/// Rounding is done according to the rounding parameter, which can be one of:
5084	///
5085	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5086	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5087	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5088	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5089	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5090	///
5091	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5092	#[inline]
5093	#[target_feature(enable = "avx512fp16")]
5094	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5095	#[rustc_legacy_const_generics(`4`)]
5096	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5097	pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5098	k: __mmask8,
5099	a: __m128h,
5100	b: __m128h,
5101	c: __m128h,
5102	) -> __m128h {
5103	unsafe {
5104	static_assert_rounding!(ROUNDING);
5105	let a: __m128 = transmute(src:a);
5106	let r: __m128 = vfcmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5107	transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect
5108	}
5109	}
5110
5111	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5112	/// result to packed elements in c, and store the results in dst.
5113	///
5114	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5115	#[inline]
5116	#[target_feature(enable = "avx512fp16,avx512vl")]
5117	#[cfg_attr(test, assert_instr(vfmadd))]
5118	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5119	pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5120	unsafe { simd_fma(x:a, y:b, z:c) }
5121	}
5122
5123	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5124	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5125	/// from a when the corresponding mask bit is not set).
5126	///
5127	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5128	#[inline]
5129	#[target_feature(enable = "avx512fp16,avx512vl")]
5130	#[cfg_attr(test, assert_instr(vfmadd))]
5131	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5132	pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5133	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5134	}
5135
5136	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5137	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5138	/// from c when the corresponding mask bit is not set).
5139	///
5140	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5141	#[inline]
5142	#[target_feature(enable = "avx512fp16,avx512vl")]
5143	#[cfg_attr(test, assert_instr(vfmadd))]
5144	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5145	pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5146	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5147	}
5148
5149	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5150	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5151	/// out when the corresponding mask bit is not set).
5152	///
5153	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5154	#[inline]
5155	#[target_feature(enable = "avx512fp16,avx512vl")]
5156	#[cfg_attr(test, assert_instr(vfmadd))]
5157	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5158	pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5159	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5160	}
5161
5162	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5163	/// result to packed elements in c, and store the results in dst.
5164	///
5165	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5166	#[inline]
5167	#[target_feature(enable = "avx512fp16,avx512vl")]
5168	#[cfg_attr(test, assert_instr(vfmadd))]
5169	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5170	pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5171	unsafe { simd_fma(x:a, y:b, z:c) }
5172	}
5173
5174	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5175	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5176	/// from a when the corresponding mask bit is not set).
5177	///
5178	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5179	#[inline]
5180	#[target_feature(enable = "avx512fp16,avx512vl")]
5181	#[cfg_attr(test, assert_instr(vfmadd))]
5182	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5183	pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5184	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5185	}
5186
5187	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5188	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5189	/// from c when the corresponding mask bit is not set).
5190	///
5191	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5192	#[inline]
5193	#[target_feature(enable = "avx512fp16,avx512vl")]
5194	#[cfg_attr(test, assert_instr(vfmadd))]
5195	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5196	pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5197	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5198	}
5199
5200	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5201	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5202	/// out when the corresponding mask bit is not set).
5203	///
5204	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5205	#[inline]
5206	#[target_feature(enable = "avx512fp16,avx512vl")]
5207	#[cfg_attr(test, assert_instr(vfmadd))]
5208	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5209	pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5210	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5211	}
5212
5213	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5214	/// result to packed elements in c, and store the results in dst.
5215	///
5216	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5217	#[inline]
5218	#[target_feature(enable = "avx512fp16")]
5219	#[cfg_attr(test, assert_instr(vfmadd))]
5220	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5221	pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5222	unsafe { simd_fma(x:a, y:b, z:c) }
5223	}
5224
5225	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5226	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5227	/// from a when the corresponding mask bit is not set).
5228	///
5229	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5230	#[inline]
5231	#[target_feature(enable = "avx512fp16")]
5232	#[cfg_attr(test, assert_instr(vfmadd))]
5233	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5234	pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5235	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5236	}
5237
5238	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5239	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5240	/// from c when the corresponding mask bit is not set).
5241	///
5242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5243	#[inline]
5244	#[target_feature(enable = "avx512fp16")]
5245	#[cfg_attr(test, assert_instr(vfmadd))]
5246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5247	pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5248	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5249	}
5250
5251	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5252	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5253	/// out when the corresponding mask bit is not set).
5254	///
5255	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5256	#[inline]
5257	#[target_feature(enable = "avx512fp16")]
5258	#[cfg_attr(test, assert_instr(vfmadd))]
5259	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5260	pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5261	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5262	}
5263
5264	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5265	/// result to packed elements in c, and store the results in dst.
5266	///
5267	/// Rounding is done according to the rounding parameter, which can be one of:
5268	///
5269	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5270	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5271	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5272	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5273	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5274	///
5275	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5276	#[inline]
5277	#[target_feature(enable = "avx512fp16")]
5278	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5279	#[rustc_legacy_const_generics(`3`)]
5280	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5281	pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5282	unsafe {
5283	static_assert_rounding!(ROUNDING);
5284	vfmaddph_512(a, b, c, ROUNDING)
5285	}
5286	}
5287
5288	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5289	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5290	/// from a when the corresponding mask bit is not set).
5291	///
5292	/// Rounding is done according to the rounding parameter, which can be one of:
5293	///
5294	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5295	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5296	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5297	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5298	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5299	///
5300	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5301	#[inline]
5302	#[target_feature(enable = "avx512fp16")]
5303	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5304	#[rustc_legacy_const_generics(`4`)]
5305	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5306	pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5307	a: __m512h,
5308	k: __mmask32,
5309	b: __m512h,
5310	c: __m512h,
5311	) -> __m512h {
5312	unsafe {
5313	static_assert_rounding!(ROUNDING);
5314	simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5315	}
5316	}
5317
5318	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5319	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5320	/// from c when the corresponding mask bit is not set).
5321	///
5322	/// Rounding is done according to the rounding parameter, which can be one of:
5323	///
5324	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5325	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5326	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5327	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5328	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5329	///
5330	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5331	#[inline]
5332	#[target_feature(enable = "avx512fp16")]
5333	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5334	#[rustc_legacy_const_generics(`4`)]
5335	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5336	pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5337	a: __m512h,
5338	b: __m512h,
5339	c: __m512h,
5340	k: __mmask32,
5341	) -> __m512h {
5342	unsafe {
5343	static_assert_rounding!(ROUNDING);
5344	simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5345	}
5346	}
5347
5348	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5349	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5350	/// out when the corresponding mask bit is not set).
5351	///
5352	/// Rounding is done according to the rounding parameter, which can be one of:
5353	///
5354	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5355	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5356	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5357	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5358	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5359	///
5360	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5361	#[inline]
5362	#[target_feature(enable = "avx512fp16")]
5363	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5364	#[rustc_legacy_const_generics(`4`)]
5365	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5366	pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5367	k: __mmask32,
5368	a: __m512h,
5369	b: __m512h,
5370	c: __m512h,
5371	) -> __m512h {
5372	unsafe {
5373	static_assert_rounding!(ROUNDING);
5374	simd_select_bitmask(
5375	m:k,
5376	yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5377	no:_mm512_setzero_ph(),
5378	)
5379	}
5380	}
5381
5382	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5383	/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5384	/// 7 packed elements from a to the upper elements of dst.
5385	///
5386	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5387	#[inline]
5388	#[target_feature(enable = "avx512fp16")]
5389	#[cfg_attr(test, assert_instr(vfmadd))]
5390	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5391	pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5392	unsafe {
5393	let extracta: f16 = simd_extract!(a, `0`);
5394	let extractb: f16 = simd_extract!(b, `0`);
5395	let extractc: f16 = simd_extract!(c, `0`);
5396	let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5397	simd_insert!(a, `0`, r)
5398	}
5399	}
5400
5401	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5402	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5403	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5404	/// upper elements of dst.
5405	///
5406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5407	#[inline]
5408	#[target_feature(enable = "avx512fp16")]
5409	#[cfg_attr(test, assert_instr(vfmadd))]
5410	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5411	pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5412	unsafe {
5413	let mut fmadd: f16 = simd_extract!(a, `0`);
5414	if k & `1` != `0` {
5415	let extractb: f16 = simd_extract!(b, `0`);
5416	let extractc: f16 = simd_extract!(c, `0`);
5417	fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5418	}
5419	simd_insert!(a, `0`, fmadd)
5420	}
5421	}
5422
5423	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5424	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5425	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5426	/// upper elements of dst.
5427	///
5428	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5429	#[inline]
5430	#[target_feature(enable = "avx512fp16")]
5431	#[cfg_attr(test, assert_instr(vfmadd))]
5432	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5433	pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5434	unsafe {
5435	let mut fmadd: f16 = simd_extract!(c, `0`);
5436	if k & `1` != `0` {
5437	let extracta: f16 = simd_extract!(a, `0`);
5438	let extractb: f16 = simd_extract!(b, `0`);
5439	fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5440	}
5441	simd_insert!(c, `0`, fmadd)
5442	}
5443	}
5444
5445	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5446	/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5447	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5448	/// upper elements of dst.
5449	///
5450	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5451	#[inline]
5452	#[target_feature(enable = "avx512fp16")]
5453	#[cfg_attr(test, assert_instr(vfmadd))]
5454	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5455	pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5456	unsafe {
5457	let mut fmadd: f16 = `0.0`;
5458	if k & `1` != `0` {
5459	let extracta: f16 = simd_extract!(a, `0`);
5460	let extractb: f16 = simd_extract!(b, `0`);
5461	let extractc: f16 = simd_extract!(c, `0`);
5462	fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5463	}
5464	simd_insert!(a, `0`, fmadd)
5465	}
5466	}
5467
5468	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5469	/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5470	/// 7 packed elements from a to the upper elements of dst.
5471	///
5472	/// Rounding is done according to the rounding parameter, which can be one of:
5473	///
5474	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5475	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5476	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5477	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5478	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5479	///
5480	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5481	#[inline]
5482	#[target_feature(enable = "avx512fp16")]
5483	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5484	#[rustc_legacy_const_generics(`3`)]
5485	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5486	pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5487	unsafe {
5488	static_assert_rounding!(ROUNDING);
5489	let extracta: f16 = simd_extract!(a, `0`);
5490	let extractb: f16 = simd_extract!(b, `0`);
5491	let extractc: f16 = simd_extract!(c, `0`);
5492	let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5493	simd_insert!(a, `0`, r)
5494	}
5495	}
5496
5497	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5498	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5499	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5500	/// upper elements of dst.
5501	///
5502	/// Rounding is done according to the rounding parameter, which can be one of:
5503	///
5504	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5505	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5506	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5507	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5508	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5509	///
5510	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5511	#[inline]
5512	#[target_feature(enable = "avx512fp16")]
5513	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5514	#[rustc_legacy_const_generics(`4`)]
5515	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5516	pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5517	a: __m128h,
5518	k: __mmask8,
5519	b: __m128h,
5520	c: __m128h,
5521	) -> __m128h {
5522	unsafe {
5523	static_assert_rounding!(ROUNDING);
5524	let mut fmadd: f16 = simd_extract!(a, `0`);
5525	if k & `1` != `0` {
5526	let extractb: f16 = simd_extract!(b, `0`);
5527	let extractc: f16 = simd_extract!(c, `0`);
5528	fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5529	}
5530	simd_insert!(a, `0`, fmadd)
5531	}
5532	}
5533
5534	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5535	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5536	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5537	/// upper elements of dst.
5538	///
5539	/// Rounding is done according to the rounding parameter, which can be one of:
5540	///
5541	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5542	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5543	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5544	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5545	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5546	///
5547	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5548	#[inline]
5549	#[target_feature(enable = "avx512fp16")]
5550	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5551	#[rustc_legacy_const_generics(`4`)]
5552	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5553	pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5554	a: __m128h,
5555	b: __m128h,
5556	c: __m128h,
5557	k: __mmask8,
5558	) -> __m128h {
5559	unsafe {
5560	static_assert_rounding!(ROUNDING);
5561	let mut fmadd: f16 = simd_extract!(c, `0`);
5562	if k & `1` != `0` {
5563	let extracta: f16 = simd_extract!(a, `0`);
5564	let extractb: f16 = simd_extract!(b, `0`);
5565	fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5566	}
5567	simd_insert!(c, `0`, fmadd)
5568	}
5569	}
5570
5571	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5572	/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5573	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5574	/// upper elements of dst.
5575	///
5576	/// Rounding is done according to the rounding parameter, which can be one of:
5577	///
5578	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5579	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5580	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5581	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5582	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5583	///
5584	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5585	#[inline]
5586	#[target_feature(enable = "avx512fp16")]
5587	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5588	#[rustc_legacy_const_generics(`4`)]
5589	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5590	pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5591	k: __mmask8,
5592	a: __m128h,
5593	b: __m128h,
5594	c: __m128h,
5595	) -> __m128h {
5596	unsafe {
5597	static_assert_rounding!(ROUNDING);
5598	let mut fmadd: f16 = `0.0`;
5599	if k & `1` != `0` {
5600	let extracta: f16 = simd_extract!(a, `0`);
5601	let extractb: f16 = simd_extract!(b, `0`);
5602	let extractc: f16 = simd_extract!(c, `0`);
5603	fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5604	}
5605	simd_insert!(a, `0`, fmadd)
5606	}
5607	}
5608
5609	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5610	/// in c from the intermediate result, and store the results in dst.
5611	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5612	///
5613	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5614	#[inline]
5615	#[target_feature(enable = "avx512fp16,avx512vl")]
5616	#[cfg_attr(test, assert_instr(vfmsub))]
5617	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5618	pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5619	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5620	}
5621
5622	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5623	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5624	/// from a when the corresponding mask bit is not set).
5625	///
5626	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5627	#[inline]
5628	#[target_feature(enable = "avx512fp16,avx512vl")]
5629	#[cfg_attr(test, assert_instr(vfmsub))]
5630	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5631	pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5632	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5633	}
5634
5635	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5636	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5637	/// from c when the corresponding mask bit is not set).
5638	///
5639	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5640	#[inline]
5641	#[target_feature(enable = "avx512fp16,avx512vl")]
5642	#[cfg_attr(test, assert_instr(vfmsub))]
5643	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5644	pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5645	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5646	}
5647
5648	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5649	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5650	/// out when the corresponding mask bit is not set).
5651	///
5652	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5653	#[inline]
5654	#[target_feature(enable = "avx512fp16,avx512vl")]
5655	#[cfg_attr(test, assert_instr(vfmsub))]
5656	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5657	pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5658	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5659	}
5660
5661	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5662	/// in c from the intermediate result, and store the results in dst.
5663	///
5664	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5665	#[inline]
5666	#[target_feature(enable = "avx512fp16,avx512vl")]
5667	#[cfg_attr(test, assert_instr(vfmsub))]
5668	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5669	pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5670	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5671	}
5672
5673	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5674	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5675	/// from a when the corresponding mask bit is not set).
5676	///
5677	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5678	#[inline]
5679	#[target_feature(enable = "avx512fp16,avx512vl")]
5680	#[cfg_attr(test, assert_instr(vfmsub))]
5681	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5682	pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5683	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5684	}
5685
5686	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5687	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5688	/// from c when the corresponding mask bit is not set).
5689	///
5690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5691	#[inline]
5692	#[target_feature(enable = "avx512fp16,avx512vl")]
5693	#[cfg_attr(test, assert_instr(vfmsub))]
5694	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5695	pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5696	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5697	}
5698
5699	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5700	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5701	/// out when the corresponding mask bit is not set).
5702	///
5703	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5704	#[inline]
5705	#[target_feature(enable = "avx512fp16,avx512vl")]
5706	#[cfg_attr(test, assert_instr(vfmsub))]
5707	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5708	pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5709	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5710	}
5711
5712	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5713	/// in c from the intermediate result, and store the results in dst.
5714	///
5715	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5716	#[inline]
5717	#[target_feature(enable = "avx512fp16")]
5718	#[cfg_attr(test, assert_instr(vfmsub))]
5719	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5720	pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5721	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5722	}
5723
5724	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5725	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5726	/// from a when the corresponding mask bit is not set).
5727	///
5728	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5729	#[inline]
5730	#[target_feature(enable = "avx512fp16")]
5731	#[cfg_attr(test, assert_instr(vfmsub))]
5732	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5733	pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5734	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5735	}
5736
5737	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5738	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5739	/// from c when the corresponding mask bit is not set).
5740	///
5741	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5742	#[inline]
5743	#[target_feature(enable = "avx512fp16")]
5744	#[cfg_attr(test, assert_instr(vfmsub))]
5745	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5746	pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5747	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5748	}
5749
5750	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5751	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5752	/// out when the corresponding mask bit is not set).
5753	///
5754	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5755	#[inline]
5756	#[target_feature(enable = "avx512fp16")]
5757	#[cfg_attr(test, assert_instr(vfmsub))]
5758	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5759	pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5760	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5761	}
5762
5763	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5764	/// in c from the intermediate result, and store the results in dst.
5765	///
5766	/// Rounding is done according to the rounding parameter, which can be one of:
5767	///
5768	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5769	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5770	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5771	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5772	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5773	///
5774	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
5775	#[inline]
5776	#[target_feature(enable = "avx512fp16")]
5777	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5778	#[rustc_legacy_const_generics(`3`)]
5779	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5780	pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5781	unsafe {
5782	static_assert_rounding!(ROUNDING);
5783	vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
5784	}
5785	}
5786
5787	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5788	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5789	/// from a when the corresponding mask bit is not set).
5790	///
5791	/// Rounding is done according to the rounding parameter, which can be one of:
5792	///
5793	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5794	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5795	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5796	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5797	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798	///
5799	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
5800	#[inline]
5801	#[target_feature(enable = "avx512fp16")]
5802	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5803	#[rustc_legacy_const_generics(`4`)]
5804	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5805	pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
5806	a: __m512h,
5807	k: __mmask32,
5808	b: __m512h,
5809	c: __m512h,
5810	) -> __m512h {
5811	unsafe {
5812	static_assert_rounding!(ROUNDING);
5813	simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
5814	}
5815	}
5816
5817	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5818	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5819	/// from c when the corresponding mask bit is not set).
5820	///
5821	/// Rounding is done according to the rounding parameter, which can be one of:
5822	///
5823	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5824	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5825	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5826	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5827	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5828	///
5829	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
5830	#[inline]
5831	#[target_feature(enable = "avx512fp16")]
5832	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5833	#[rustc_legacy_const_generics(`4`)]
5834	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5835	pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
5836	a: __m512h,
5837	b: __m512h,
5838	c: __m512h,
5839	k: __mmask32,
5840	) -> __m512h {
5841	unsafe {
5842	static_assert_rounding!(ROUNDING);
5843	simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
5844	}
5845	}
5846
5847	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5848	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5849	/// out when the corresponding mask bit is not set).
5850	///
5851	/// Rounding is done according to the rounding parameter, which can be one of:
5852	///
5853	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5854	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5855	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5856	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5857	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5858	///
5859	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
5860	#[inline]
5861	#[target_feature(enable = "avx512fp16")]
5862	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5863	#[rustc_legacy_const_generics(`4`)]
5864	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5865	pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
5866	k: __mmask32,
5867	a: __m512h,
5868	b: __m512h,
5869	c: __m512h,
5870	) -> __m512h {
5871	unsafe {
5872	static_assert_rounding!(ROUNDING);
5873	simd_select_bitmask(
5874	m:k,
5875	yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
5876	no:_mm512_setzero_ph(),
5877	)
5878	}
5879	}
5880
5881	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5882	/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5883	/// 7 packed elements from a to the upper elements of dst.
5884	///
5885	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
5886	#[inline]
5887	#[target_feature(enable = "avx512fp16")]
5888	#[cfg_attr(test, assert_instr(vfmsub))]
5889	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5890	pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5891	unsafe {
5892	let extracta: f16 = simd_extract!(a, `0`);
5893	let extractb: f16 = simd_extract!(b, `0`);
5894	let extractc: f16 = simd_extract!(c, `0`);
5895	let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
5896	simd_insert!(a, `0`, r)
5897	}
5898	}
5899
5900	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5901	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5902	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5903	/// upper elements of dst.
5904	///
5905	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
5906	#[inline]
5907	#[target_feature(enable = "avx512fp16")]
5908	#[cfg_attr(test, assert_instr(vfmsub))]
5909	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5910	pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5911	unsafe {
5912	let mut fmsub: f16 = simd_extract!(a, `0`);
5913	if k & `1` != `0` {
5914	let extractb: f16 = simd_extract!(b, `0`);
5915	let extractc: f16 = simd_extract!(c, `0`);
5916	fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
5917	}
5918	simd_insert!(a, `0`, fmsub)
5919	}
5920	}
5921
5922	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5923	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5924	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5925	/// upper elements of dst.
5926	///
5927	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
5928	#[inline]
5929	#[target_feature(enable = "avx512fp16")]
5930	#[cfg_attr(test, assert_instr(vfmsub))]
5931	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5932	pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5933	unsafe {
5934	let mut fmsub: f16 = simd_extract!(c, `0`);
5935	if k & `1` != `0` {
5936	let extracta: f16 = simd_extract!(a, `0`);
5937	let extractb: f16 = simd_extract!(b, `0`);
5938	fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
5939	}
5940	simd_insert!(c, `0`, fmsub)
5941	}
5942	}
5943
5944	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5945	/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
5946	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5947	/// upper elements of dst.
5948	///
5949	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
5950	#[inline]
5951	#[target_feature(enable = "avx512fp16")]
5952	#[cfg_attr(test, assert_instr(vfmsub))]
5953	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5954	pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5955	unsafe {
5956	let mut fmsub: f16 = `0.0`;
5957	if k & `1` != `0` {
5958	let extracta: f16 = simd_extract!(a, `0`);
5959	let extractb: f16 = simd_extract!(b, `0`);
5960	let extractc: f16 = simd_extract!(c, `0`);
5961	fmsub = fmaf16(a:extracta, b:extractb, -extractc);
5962	}
5963	simd_insert!(a, `0`, fmsub)
5964	}
5965	}
5966
5967	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5968	/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5969	/// 7 packed elements from a to the upper elements of dst.
5970	///
5971	/// Rounding is done according to the rounding parameter, which can be one of:
5972	///
5973	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5974	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5975	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5976	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5977	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5978	///
5979	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
5980	#[inline]
5981	#[target_feature(enable = "avx512fp16")]
5982	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5983	#[rustc_legacy_const_generics(`3`)]
5984	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5985	pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5986	unsafe {
5987	static_assert_rounding!(ROUNDING);
5988	let extracta: f16 = simd_extract!(a, `0`);
5989	let extractb: f16 = simd_extract!(b, `0`);
5990	let extractc: f16 = simd_extract!(c, `0`);
5991	let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
5992	simd_insert!(a, `0`, r)
5993	}
5994	}
5995
5996	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5997	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5998	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5999	/// upper elements of dst.
6000	///
6001	/// Rounding is done according to the rounding parameter, which can be one of:
6002	///
6003	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6004	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6005	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6006	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6007	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6008	///
6009	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6010	#[inline]
6011	#[target_feature(enable = "avx512fp16")]
6012	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6013	#[rustc_legacy_const_generics(`4`)]
6014	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6015	pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6016	a: __m128h,
6017	k: __mmask8,
6018	b: __m128h,
6019	c: __m128h,
6020	) -> __m128h {
6021	unsafe {
6022	static_assert_rounding!(ROUNDING);
6023	let mut fmsub: f16 = simd_extract!(a, `0`);
6024	if k & `1` != `0` {
6025	let extractb: f16 = simd_extract!(b, `0`);
6026	let extractc: f16 = simd_extract!(c, `0`);
6027	fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6028	}
6029	simd_insert!(a, `0`, fmsub)
6030	}
6031	}
6032
6033	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6034	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6035	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6036	/// upper elements of dst.
6037	///
6038	/// Rounding is done according to the rounding parameter, which can be one of:
6039	///
6040	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6041	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6042	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6043	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6044	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6045	///
6046	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6047	#[inline]
6048	#[target_feature(enable = "avx512fp16")]
6049	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6050	#[rustc_legacy_const_generics(`4`)]
6051	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6052	pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6053	a: __m128h,
6054	b: __m128h,
6055	c: __m128h,
6056	k: __mmask8,
6057	) -> __m128h {
6058	unsafe {
6059	static_assert_rounding!(ROUNDING);
6060	let mut fmsub: f16 = simd_extract!(c, `0`);
6061	if k & `1` != `0` {
6062	let extracta: f16 = simd_extract!(a, `0`);
6063	let extractb: f16 = simd_extract!(b, `0`);
6064	fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6065	}
6066	simd_insert!(c, `0`, fmsub)
6067	}
6068	}
6069
6070	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6071	/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6072	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6073	/// upper elements of dst.
6074	///
6075	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6076	#[inline]
6077	#[target_feature(enable = "avx512fp16")]
6078	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6079	#[rustc_legacy_const_generics(`4`)]
6080	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6081	pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6082	k: __mmask8,
6083	a: __m128h,
6084	b: __m128h,
6085	c: __m128h,
6086	) -> __m128h {
6087	unsafe {
6088	static_assert_rounding!(ROUNDING);
6089	let mut fmsub: f16 = `0.0`;
6090	if k & `1` != `0` {
6091	let extracta: f16 = simd_extract!(a, `0`);
6092	let extractb: f16 = simd_extract!(b, `0`);
6093	let extractc: f16 = simd_extract!(c, `0`);
6094	fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6095	}
6096	simd_insert!(a, `0`, fmsub)
6097	}
6098	}
6099
6100	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6101	/// result from packed elements in c, and store the results in dst.
6102	///
6103	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6104	#[inline]
6105	#[target_feature(enable = "avx512fp16,avx512vl")]
6106	#[cfg_attr(test, assert_instr(vfnmadd))]
6107	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6108	pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6109	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6110	}
6111
6112	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6113	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6114	/// from a when the corresponding mask bit is not set).
6115	///
6116	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6117	#[inline]
6118	#[target_feature(enable = "avx512fp16,avx512vl")]
6119	#[cfg_attr(test, assert_instr(vfnmadd))]
6120	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6121	pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6122	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6123	}
6124
6125	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6126	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6127	/// from c when the corresponding mask bit is not set).
6128	///
6129	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6130	#[inline]
6131	#[target_feature(enable = "avx512fp16,avx512vl")]
6132	#[cfg_attr(test, assert_instr(vfnmadd))]
6133	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6134	pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6135	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6136	}
6137
6138	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6139	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6140	/// out when the corresponding mask bit is not set).
6141	///
6142	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6143	#[inline]
6144	#[target_feature(enable = "avx512fp16,avx512vl")]
6145	#[cfg_attr(test, assert_instr(vfnmadd))]
6146	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6147	pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6148	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6149	}
6150
6151	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6152	/// result from packed elements in c, and store the results in dst.
6153	///
6154	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6155	#[inline]
6156	#[target_feature(enable = "avx512fp16,avx512vl")]
6157	#[cfg_attr(test, assert_instr(vfnmadd))]
6158	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6159	pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6160	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6161	}
6162
6163	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6164	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6165	/// from a when the corresponding mask bit is not set).
6166	///
6167	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6168	#[inline]
6169	#[target_feature(enable = "avx512fp16,avx512vl")]
6170	#[cfg_attr(test, assert_instr(vfnmadd))]
6171	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6172	pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6173	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6174	}
6175
6176	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6177	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6178	/// from c when the corresponding mask bit is not set).
6179	///
6180	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6181	#[inline]
6182	#[target_feature(enable = "avx512fp16,avx512vl")]
6183	#[cfg_attr(test, assert_instr(vfnmadd))]
6184	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6185	pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6186	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6187	}
6188
6189	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6190	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6191	/// out when the corresponding mask bit is not set).
6192	///
6193	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6194	#[inline]
6195	#[target_feature(enable = "avx512fp16,avx512vl")]
6196	#[cfg_attr(test, assert_instr(vfnmadd))]
6197	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6198	pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6199	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6200	}
6201
6202	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6203	/// result from packed elements in c, and store the results in dst.
6204	///
6205	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6206	#[inline]
6207	#[target_feature(enable = "avx512fp16")]
6208	#[cfg_attr(test, assert_instr(vfnmadd))]
6209	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6210	pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6211	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6212	}
6213
6214	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6215	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6216	/// from a when the corresponding mask bit is not set).
6217	///
6218	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6219	#[inline]
6220	#[target_feature(enable = "avx512fp16")]
6221	#[cfg_attr(test, assert_instr(vfnmadd))]
6222	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6223	pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6224	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6225	}
6226
6227	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6228	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6229	/// from c when the corresponding mask bit is not set).
6230	///
6231	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6232	#[inline]
6233	#[target_feature(enable = "avx512fp16")]
6234	#[cfg_attr(test, assert_instr(vfnmadd))]
6235	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6236	pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6237	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6238	}
6239
6240	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6241	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6242	/// out when the corresponding mask bit is not set).
6243	///
6244	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6245	#[inline]
6246	#[target_feature(enable = "avx512fp16")]
6247	#[cfg_attr(test, assert_instr(vfnmadd))]
6248	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6249	pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6250	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6251	}
6252
6253	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6254	/// result from packed elements in c, and store the results in dst.
6255	///
6256	/// Rounding is done according to the rounding parameter, which can be one of:
6257	///
6258	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6259	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6260	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6261	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6262	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6263	///
6264	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6265	#[inline]
6266	#[target_feature(enable = "avx512fp16")]
6267	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6268	#[rustc_legacy_const_generics(`3`)]
6269	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6270	pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6271	unsafe {
6272	static_assert_rounding!(ROUNDING);
6273	vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6274	}
6275	}
6276
6277	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6278	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6279	/// from a when the corresponding mask bit is not set).
6280	///
6281	/// Rounding is done according to the rounding parameter, which can be one of:
6282	///
6283	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6284	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6285	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6286	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6287	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6288	///
6289	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6290	#[inline]
6291	#[target_feature(enable = "avx512fp16")]
6292	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6293	#[rustc_legacy_const_generics(`4`)]
6294	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6295	pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6296	a: __m512h,
6297	k: __mmask32,
6298	b: __m512h,
6299	c: __m512h,
6300	) -> __m512h {
6301	unsafe {
6302	static_assert_rounding!(ROUNDING);
6303	simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6304	}
6305	}
6306
6307	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6308	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6309	/// from c when the corresponding mask bit is not set).
6310	///
6311	/// Rounding is done according to the rounding parameter, which can be one of:
6312	///
6313	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6314	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6315	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6316	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6317	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6318	///
6319	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6320	#[inline]
6321	#[target_feature(enable = "avx512fp16")]
6322	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6323	#[rustc_legacy_const_generics(`4`)]
6324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6325	pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6326	a: __m512h,
6327	b: __m512h,
6328	c: __m512h,
6329	k: __mmask32,
6330	) -> __m512h {
6331	unsafe {
6332	static_assert_rounding!(ROUNDING);
6333	simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6334	}
6335	}
6336
6337	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6338	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6339	/// out when the corresponding mask bit is not set).
6340	///
6341	/// Rounding is done according to the rounding parameter, which can be one of:
6342	///
6343	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6344	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6345	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6346	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6347	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6348	///
6349	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6350	#[inline]
6351	#[target_feature(enable = "avx512fp16")]
6352	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6353	#[rustc_legacy_const_generics(`4`)]
6354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6355	pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6356	k: __mmask32,
6357	a: __m512h,
6358	b: __m512h,
6359	c: __m512h,
6360	) -> __m512h {
6361	unsafe {
6362	static_assert_rounding!(ROUNDING);
6363	simd_select_bitmask(
6364	m:k,
6365	yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6366	no:_mm512_setzero_ph(),
6367	)
6368	}
6369	}
6370
6371	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6372	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6373	/// elements from a to the upper elements of dst.
6374	///
6375	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6376	#[inline]
6377	#[target_feature(enable = "avx512fp16")]
6378	#[cfg_attr(test, assert_instr(vfnmadd))]
6379	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6380	pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6381	unsafe {
6382	let extracta: f16 = simd_extract!(a, `0`);
6383	let extractb: f16 = simd_extract!(b, `0`);
6384	let extractc: f16 = simd_extract!(c, `0`);
6385	let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6386	simd_insert!(a, `0`, r)
6387	}
6388	}
6389
6390	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6391	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6392	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6393	/// elements of dst.
6394	///
6395	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6396	#[inline]
6397	#[target_feature(enable = "avx512fp16")]
6398	#[cfg_attr(test, assert_instr(vfnmadd))]
6399	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6400	pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6401	unsafe {
6402	let mut fnmadd: f16 = simd_extract!(a, `0`);
6403	if k & `1` != `0` {
6404	let extractb: f16 = simd_extract!(b, `0`);
6405	let extractc: f16 = simd_extract!(c, `0`);
6406	fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6407	}
6408	simd_insert!(a, `0`, fnmadd)
6409	}
6410	}
6411
6412	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6413	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6414	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6415	/// elements of dst.
6416	///
6417	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6418	#[inline]
6419	#[target_feature(enable = "avx512fp16")]
6420	#[cfg_attr(test, assert_instr(vfnmadd))]
6421	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6422	pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6423	unsafe {
6424	let mut fnmadd: f16 = simd_extract!(c, `0`);
6425	if k & `1` != `0` {
6426	let extracta: f16 = simd_extract!(a, `0`);
6427	let extractb: f16 = simd_extract!(b, `0`);
6428	fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6429	}
6430	simd_insert!(c, `0`, fnmadd)
6431	}
6432	}
6433
6434	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6435	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6436	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6437	/// elements of dst.
6438	///
6439	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6440	#[inline]
6441	#[target_feature(enable = "avx512fp16")]
6442	#[cfg_attr(test, assert_instr(vfnmadd))]
6443	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6444	pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6445	unsafe {
6446	let mut fnmadd: f16 = `0.0`;
6447	if k & `1` != `0` {
6448	let extracta: f16 = simd_extract!(a, `0`);
6449	let extractb: f16 = simd_extract!(b, `0`);
6450	let extractc: f16 = simd_extract!(c, `0`);
6451	fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6452	}
6453	simd_insert!(a, `0`, fnmadd)
6454	}
6455	}
6456
6457	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6458	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6459	/// elements from a to the upper elements of dst.
6460	///
6461	/// Rounding is done according to the rounding parameter, which can be one of:
6462	///
6463	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6464	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6465	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6466	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6467	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6468	///
6469	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6470	#[inline]
6471	#[target_feature(enable = "avx512fp16")]
6472	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6473	#[rustc_legacy_const_generics(`3`)]
6474	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6475	pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6476	unsafe {
6477	static_assert_rounding!(ROUNDING);
6478	let extracta: f16 = simd_extract!(a, `0`);
6479	let extractb: f16 = simd_extract!(b, `0`);
6480	let extractc: f16 = simd_extract!(c, `0`);
6481	let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6482	simd_insert!(a, `0`, r)
6483	}
6484	}
6485
6486	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6487	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6488	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6489	/// elements of dst.
6490	///
6491	/// Rounding is done according to the rounding parameter, which can be one of:
6492	///
6493	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6494	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6495	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6496	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6497	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6498	///
6499	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6500	#[inline]
6501	#[target_feature(enable = "avx512fp16")]
6502	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6503	#[rustc_legacy_const_generics(`4`)]
6504	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6505	pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6506	a: __m128h,
6507	k: __mmask8,
6508	b: __m128h,
6509	c: __m128h,
6510	) -> __m128h {
6511	unsafe {
6512	static_assert_rounding!(ROUNDING);
6513	let mut fnmadd: f16 = simd_extract!(a, `0`);
6514	if k & `1` != `0` {
6515	let extractb: f16 = simd_extract!(b, `0`);
6516	let extractc: f16 = simd_extract!(c, `0`);
6517	fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6518	}
6519	simd_insert!(a, `0`, fnmadd)
6520	}
6521	}
6522
6523	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6524	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6525	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6526	/// elements of dst.
6527	///
6528	/// Rounding is done according to the rounding parameter, which can be one of:
6529	///
6530	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6531	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6532	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6533	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6534	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6535	///
6536	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6537	#[inline]
6538	#[target_feature(enable = "avx512fp16")]
6539	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6540	#[rustc_legacy_const_generics(`4`)]
6541	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6542	pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6543	a: __m128h,
6544	b: __m128h,
6545	c: __m128h,
6546	k: __mmask8,
6547	) -> __m128h {
6548	unsafe {
6549	static_assert_rounding!(ROUNDING);
6550	let mut fnmadd: f16 = simd_extract!(c, `0`);
6551	if k & `1` != `0` {
6552	let extracta: f16 = simd_extract!(a, `0`);
6553	let extractb: f16 = simd_extract!(b, `0`);
6554	fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6555	}
6556	simd_insert!(c, `0`, fnmadd)
6557	}
6558	}
6559
6560	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6561	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6562	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6563	/// elements of dst.
6564	///
6565	/// Rounding is done according to the rounding parameter, which can be one of:
6566	///
6567	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6568	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6569	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6570	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6571	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6572	///
6573	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6574	#[inline]
6575	#[target_feature(enable = "avx512fp16")]
6576	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6577	#[rustc_legacy_const_generics(`4`)]
6578	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6579	pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6580	k: __mmask8,
6581	a: __m128h,
6582	b: __m128h,
6583	c: __m128h,
6584	) -> __m128h {
6585	unsafe {
6586	static_assert_rounding!(ROUNDING);
6587	let mut fnmadd: f16 = `0.0`;
6588	if k & `1` != `0` {
6589	let extracta: f16 = simd_extract!(a, `0`);
6590	let extractb: f16 = simd_extract!(b, `0`);
6591	let extractc: f16 = simd_extract!(c, `0`);
6592	fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6593	}
6594	simd_insert!(a, `0`, fnmadd)
6595	}
6596	}
6597
6598	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6599	/// in c from the negated intermediate result, and store the results in dst.
6600	///
6601	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6602	#[inline]
6603	#[target_feature(enable = "avx512fp16,avx512vl")]
6604	#[cfg_attr(test, assert_instr(vfnmsub))]
6605	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6606	pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6607	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6608	}
6609
6610	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6611	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6612	/// copied from a when the corresponding mask bit is not set).
6613	///
6614	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6615	#[inline]
6616	#[target_feature(enable = "avx512fp16,avx512vl")]
6617	#[cfg_attr(test, assert_instr(vfnmsub))]
6618	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6619	pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6620	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6621	}
6622
6623	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6624	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6625	/// copied from c when the corresponding mask bit is not set).
6626	///
6627	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6628	#[inline]
6629	#[target_feature(enable = "avx512fp16,avx512vl")]
6630	#[cfg_attr(test, assert_instr(vfnmsub))]
6631	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6632	pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6633	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6634	}
6635
6636	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6637	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6638	/// zeroed out when the corresponding mask bit is not set).
6639	///
6640	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6641	#[inline]
6642	#[target_feature(enable = "avx512fp16,avx512vl")]
6643	#[cfg_attr(test, assert_instr(vfnmsub))]
6644	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6645	pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6646	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6647	}
6648
6649	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6650	/// in c from the negated intermediate result, and store the results in dst.
6651	///
6652	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6653	#[inline]
6654	#[target_feature(enable = "avx512fp16,avx512vl")]
6655	#[cfg_attr(test, assert_instr(vfnmsub))]
6656	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6657	pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6658	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6659	}
6660
6661	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6662	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6663	/// copied from a when the corresponding mask bit is not set).
6664	///
6665	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6666	#[inline]
6667	#[target_feature(enable = "avx512fp16,avx512vl")]
6668	#[cfg_attr(test, assert_instr(vfnmsub))]
6669	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6670	pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6671	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6672	}
6673
6674	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6675	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6676	/// copied from c when the corresponding mask bit is not set).
6677	///
6678	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6679	#[inline]
6680	#[target_feature(enable = "avx512fp16,avx512vl")]
6681	#[cfg_attr(test, assert_instr(vfnmsub))]
6682	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6683	pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6684	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6685	}
6686
6687	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6688	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6689	/// zeroed out when the corresponding mask bit is not set).
6690	///
6691	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6692	#[inline]
6693	#[target_feature(enable = "avx512fp16,avx512vl")]
6694	#[cfg_attr(test, assert_instr(vfnmsub))]
6695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6696	pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6697	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6698	}
6699
6700	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6701	/// in c from the negated intermediate result, and store the results in dst.
6702	///
6703	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6704	#[inline]
6705	#[target_feature(enable = "avx512fp16")]
6706	#[cfg_attr(test, assert_instr(vfnmsub))]
6707	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6708	pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6709	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6710	}
6711
6712	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6713	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6714	/// copied from a when the corresponding mask bit is not set).
6715	///
6716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6717	#[inline]
6718	#[target_feature(enable = "avx512fp16")]
6719	#[cfg_attr(test, assert_instr(vfnmsub))]
6720	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6721	pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6722	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6723	}
6724
6725	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6726	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6727	/// copied from c when the corresponding mask bit is not set).
6728	///
6729	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6730	#[inline]
6731	#[target_feature(enable = "avx512fp16")]
6732	#[cfg_attr(test, assert_instr(vfnmsub))]
6733	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6734	pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6735	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6736	}
6737
6738	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6739	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6740	/// zeroed out when the corresponding mask bit is not set).
6741	///
6742	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
6743	#[inline]
6744	#[target_feature(enable = "avx512fp16")]
6745	#[cfg_attr(test, assert_instr(vfnmsub))]
6746	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6747	pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6748	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
6749	}
6750
6751	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6752	/// in c from the negated intermediate result, and store the results in dst.
6753	///
6754	/// Rounding is done according to the rounding parameter, which can be one of:
6755	///
6756	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6757	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6758	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6759	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6760	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6761	///
6762	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
6763	#[inline]
6764	#[target_feature(enable = "avx512fp16")]
6765	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6766	#[rustc_legacy_const_generics(`3`)]
6767	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6768	pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6769	unsafe {
6770	static_assert_rounding!(ROUNDING);
6771	vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
6772	}
6773	}
6774
6775	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6776	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6777	/// copied from a when the corresponding mask bit is not set).
6778	///
6779	/// Rounding is done according to the rounding parameter, which can be one of:
6780	///
6781	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6782	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6783	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6784	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6785	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6786	///
6787	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
6788	#[inline]
6789	#[target_feature(enable = "avx512fp16")]
6790	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6791	#[rustc_legacy_const_generics(`4`)]
6792	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6793	pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
6794	a: __m512h,
6795	k: __mmask32,
6796	b: __m512h,
6797	c: __m512h,
6798	) -> __m512h {
6799	unsafe {
6800	static_assert_rounding!(ROUNDING);
6801	simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6802	}
6803	}
6804
6805	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6806	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6807	/// copied from c when the corresponding mask bit is not set).
6808	///
6809	/// Rounding is done according to the rounding parameter, which can be one of:
6810	///
6811	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6812	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6813	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6814	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6815	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6816	///
6817	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
6818	#[inline]
6819	#[target_feature(enable = "avx512fp16")]
6820	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6821	#[rustc_legacy_const_generics(`4`)]
6822	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6823	pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
6824	a: __m512h,
6825	b: __m512h,
6826	c: __m512h,
6827	k: __mmask32,
6828	) -> __m512h {
6829	unsafe {
6830	static_assert_rounding!(ROUNDING);
6831	simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6832	}
6833	}
6834
6835	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6836	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6837	/// zeroed out when the corresponding mask bit is not set).
6838	///
6839	/// Rounding is done according to the rounding parameter, which can be one of:
6840	///
6841	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6842	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6843	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6844	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6845	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6846	///
6847	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
6848	#[inline]
6849	#[target_feature(enable = "avx512fp16")]
6850	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6851	#[rustc_legacy_const_generics(`4`)]
6852	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6853	pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
6854	k: __mmask32,
6855	a: __m512h,
6856	b: __m512h,
6857	c: __m512h,
6858	) -> __m512h {
6859	unsafe {
6860	static_assert_rounding!(ROUNDING);
6861	simd_select_bitmask(
6862	m:k,
6863	yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
6864	no:_mm512_setzero_ph(),
6865	)
6866	}
6867	}
6868
6869	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6870	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6871	/// elements from a to the upper elements of dst.
6872	///
6873	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
6874	#[inline]
6875	#[target_feature(enable = "avx512fp16")]
6876	#[cfg_attr(test, assert_instr(vfnmsub))]
6877	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6878	pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6879	unsafe {
6880	let extracta: f16 = simd_extract!(a, `0`);
6881	let extractb: f16 = simd_extract!(b, `0`);
6882	let extractc: f16 = simd_extract!(c, `0`);
6883	let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
6884	simd_insert!(a, `0`, r)
6885	}
6886	}
6887
6888	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6889	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6890	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6891	/// elements of dst.
6892	///
6893	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
6894	#[inline]
6895	#[target_feature(enable = "avx512fp16")]
6896	#[cfg_attr(test, assert_instr(vfnmsub))]
6897	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6898	pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6899	unsafe {
6900	let mut fnmsub: f16 = simd_extract!(a, `0`);
6901	if k & `1` != `0` {
6902	let extractb: f16 = simd_extract!(b, `0`);
6903	let extractc: f16 = simd_extract!(c, `0`);
6904	fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
6905	}
6906	simd_insert!(a, `0`, fnmsub)
6907	}
6908	}
6909
6910	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6911	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6912	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6913	/// elements of dst.
6914	///
6915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
6916	#[inline]
6917	#[target_feature(enable = "avx512fp16")]
6918	#[cfg_attr(test, assert_instr(vfnmsub))]
6919	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6920	pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6921	unsafe {
6922	let mut fnmsub: f16 = simd_extract!(c, `0`);
6923	if k & `1` != `0` {
6924	let extracta: f16 = simd_extract!(a, `0`);
6925	let extractb: f16 = simd_extract!(b, `0`);
6926	fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
6927	}
6928	simd_insert!(c, `0`, fnmsub)
6929	}
6930	}
6931
6932	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6933	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6934	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6935	/// elements of dst.
6936	///
6937	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
6938	#[inline]
6939	#[target_feature(enable = "avx512fp16")]
6940	#[cfg_attr(test, assert_instr(vfnmsub))]
6941	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6942	pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6943	unsafe {
6944	let mut fnmsub: f16 = `0.0`;
6945	if k & `1` != `0` {
6946	let extracta: f16 = simd_extract!(a, `0`);
6947	let extractb: f16 = simd_extract!(b, `0`);
6948	let extractc: f16 = simd_extract!(c, `0`);
6949	fnmsub = fmaf16(-extracta, b:extractb, -extractc);
6950	}
6951	simd_insert!(a, `0`, fnmsub)
6952	}
6953	}
6954
6955	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6956	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6957	/// elements from a to the upper elements of dst.
6958	///
6959	/// Rounding is done according to the rounding parameter, which can be one of:
6960	///
6961	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6962	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6963	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6964	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6965	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6966	///
6967	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
6968	#[inline]
6969	#[target_feature(enable = "avx512fp16")]
6970	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6971	#[rustc_legacy_const_generics(`3`)]
6972	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6973	pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6974	unsafe {
6975	static_assert_rounding!(ROUNDING);
6976	let extracta: f16 = simd_extract!(a, `0`);
6977	let extractb: f16 = simd_extract!(b, `0`);
6978	let extractc: f16 = simd_extract!(c, `0`);
6979	let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
6980	simd_insert!(a, `0`, r)
6981	}
6982	}
6983
6984	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6985	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6986	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6987	/// elements of dst.
6988	///
6989	/// Rounding is done according to the rounding parameter, which can be one of:
6990	///
6991	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6992	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6993	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6994	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6995	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6996	///
6997	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
6998	#[inline]
6999	#[target_feature(enable = "avx512fp16")]
7000	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7001	#[rustc_legacy_const_generics(`4`)]
7002	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7003	pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7004	a: __m128h,
7005	k: __mmask8,
7006	b: __m128h,
7007	c: __m128h,
7008	) -> __m128h {
7009	unsafe {
7010	static_assert_rounding!(ROUNDING);
7011	let mut fnmsub: f16 = simd_extract!(a, `0`);
7012	if k & `1` != `0` {
7013	let extractb: f16 = simd_extract!(b, `0`);
7014	let extractc: f16 = simd_extract!(c, `0`);
7015	fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7016	}
7017	simd_insert!(a, `0`, fnmsub)
7018	}
7019	}
7020
7021	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7022	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7023	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7024	/// elements of dst.
7025	///
7026	/// Rounding is done according to the rounding parameter, which can be one of:
7027	///
7028	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7029	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7030	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7031	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7032	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7033	///
7034	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7035	#[inline]
7036	#[target_feature(enable = "avx512fp16")]
7037	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7038	#[rustc_legacy_const_generics(`4`)]
7039	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7040	pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7041	a: __m128h,
7042	b: __m128h,
7043	c: __m128h,
7044	k: __mmask8,
7045	) -> __m128h {
7046	unsafe {
7047	static_assert_rounding!(ROUNDING);
7048	let mut fnmsub: f16 = simd_extract!(c, `0`);
7049	if k & `1` != `0` {
7050	let extracta: f16 = simd_extract!(a, `0`);
7051	let extractb: f16 = simd_extract!(b, `0`);
7052	fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7053	}
7054	simd_insert!(c, `0`, fnmsub)
7055	}
7056	}
7057
7058	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7059	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7060	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7061	/// elements of dst.
7062	///
7063	/// Rounding is done according to the rounding parameter, which can be one of:
7064	///
7065	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7066	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7067	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7068	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7069	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7070	///
7071	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7072	#[inline]
7073	#[target_feature(enable = "avx512fp16")]
7074	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7075	#[rustc_legacy_const_generics(`4`)]
7076	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7077	pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7078	k: __mmask8,
7079	a: __m128h,
7080	b: __m128h,
7081	c: __m128h,
7082	) -> __m128h {
7083	unsafe {
7084	static_assert_rounding!(ROUNDING);
7085	let mut fnmsub: f16 = `0.0`;
7086	if k & `1` != `0` {
7087	let extracta: f16 = simd_extract!(a, `0`);
7088	let extractb: f16 = simd_extract!(b, `0`);
7089	let extractc: f16 = simd_extract!(c, `0`);
7090	fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7091	}
7092	simd_insert!(a, `0`, fnmsub)
7093	}
7094	}
7095
7096	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7097	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7098	///
7099	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7100	#[inline]
7101	#[target_feature(enable = "avx512fp16,avx512vl")]
7102	#[cfg_attr(test, assert_instr(vfmaddsub))]
7103	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7104	pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7105	unsafe { vfmaddsubph_128(a, b, c) }
7106	}
7107
7108	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7109	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7110	/// (the element is copied from a when the corresponding mask bit is not set).
7111	///
7112	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7113	#[inline]
7114	#[target_feature(enable = "avx512fp16,avx512vl")]
7115	#[cfg_attr(test, assert_instr(vfmaddsub))]
7116	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7117	pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7118	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7119	}
7120
7121	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7122	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7123	/// (the element is copied from c when the corresponding mask bit is not set).
7124	///
7125	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7126	#[inline]
7127	#[target_feature(enable = "avx512fp16,avx512vl")]
7128	#[cfg_attr(test, assert_instr(vfmaddsub))]
7129	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7130	pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7131	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7132	}
7133
7134	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7135	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7136	/// (the element is zeroed out when the corresponding mask bit is not set).
7137	///
7138	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7139	#[inline]
7140	#[target_feature(enable = "avx512fp16,avx512vl")]
7141	#[cfg_attr(test, assert_instr(vfmaddsub))]
7142	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7143	pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7144	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7145	}
7146
7147	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7148	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7149	///
7150	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7151	#[inline]
7152	#[target_feature(enable = "avx512fp16,avx512vl")]
7153	#[cfg_attr(test, assert_instr(vfmaddsub))]
7154	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7155	pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7156	unsafe { vfmaddsubph_256(a, b, c) }
7157	}
7158
7159	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7160	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7161	/// (the element is copied from a when the corresponding mask bit is not set).
7162	///
7163	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7164	#[inline]
7165	#[target_feature(enable = "avx512fp16,avx512vl")]
7166	#[cfg_attr(test, assert_instr(vfmaddsub))]
7167	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7168	pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7169	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7170	}
7171
7172	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7173	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7174	/// (the element is copied from c when the corresponding mask bit is not set).
7175	///
7176	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7177	#[inline]
7178	#[target_feature(enable = "avx512fp16,avx512vl")]
7179	#[cfg_attr(test, assert_instr(vfmaddsub))]
7180	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7181	pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7182	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7183	}
7184
7185	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7186	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7187	/// (the element is zeroed out when the corresponding mask bit is not set).
7188	///
7189	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7190	#[inline]
7191	#[target_feature(enable = "avx512fp16,avx512vl")]
7192	#[cfg_attr(test, assert_instr(vfmaddsub))]
7193	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7194	pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7195	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7196	}
7197
7198	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7199	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7200	///
7201	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7202	#[inline]
7203	#[target_feature(enable = "avx512fp16")]
7204	#[cfg_attr(test, assert_instr(vfmaddsub))]
7205	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7206	pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7207	_mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7208	}
7209
7210	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7211	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7212	/// (the element is copied from a when the corresponding mask bit is not set).
7213	///
7214	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7215	#[inline]
7216	#[target_feature(enable = "avx512fp16")]
7217	#[cfg_attr(test, assert_instr(vfmaddsub))]
7218	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7219	pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7220	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7221	}
7222
7223	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7224	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7225	/// (the element is copied from c when the corresponding mask bit is not set).
7226	///
7227	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7228	#[inline]
7229	#[target_feature(enable = "avx512fp16")]
7230	#[cfg_attr(test, assert_instr(vfmaddsub))]
7231	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7232	pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7233	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7234	}
7235
7236	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7237	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7238	/// (the element is zeroed out when the corresponding mask bit is not set).
7239	///
7240	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7241	#[inline]
7242	#[target_feature(enable = "avx512fp16")]
7243	#[cfg_attr(test, assert_instr(vfmaddsub))]
7244	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7245	pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7246	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7247	}
7248
7249	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7250	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7251	///
7252	/// Rounding is done according to the rounding parameter, which can be one of:
7253	///
7254	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7255	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7256	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7257	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7258	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7259	///
7260	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7261	#[inline]
7262	#[target_feature(enable = "avx512fp16")]
7263	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7264	#[rustc_legacy_const_generics(`3`)]
7265	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7266	pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7267	a: __m512h,
7268	b: __m512h,
7269	c: __m512h,
7270	) -> __m512h {
7271	unsafe {
7272	static_assert_rounding!(ROUNDING);
7273	vfmaddsubph_512(a, b, c, ROUNDING)
7274	}
7275	}
7276
7277	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7278	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7279	/// (the element is copied from a when the corresponding mask bit is not set).
7280	///
7281	/// Rounding is done according to the rounding parameter, which can be one of:
7282	///
7283	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7284	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7285	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7286	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7287	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7288	///
7289	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7290	#[inline]
7291	#[target_feature(enable = "avx512fp16")]
7292	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7293	#[rustc_legacy_const_generics(`4`)]
7294	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7295	pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7296	a: __m512h,
7297	k: __mmask32,
7298	b: __m512h,
7299	c: __m512h,
7300	) -> __m512h {
7301	unsafe {
7302	static_assert_rounding!(ROUNDING);
7303	simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7304	}
7305	}
7306
7307	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7308	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7309	/// (the element is copied from c when the corresponding mask bit is not set).
7310	///
7311	/// Rounding is done according to the rounding parameter, which can be one of:
7312	///
7313	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7314	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7315	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7316	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7317	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7318	///
7319	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7320	#[inline]
7321	#[target_feature(enable = "avx512fp16")]
7322	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7323	#[rustc_legacy_const_generics(`4`)]
7324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7325	pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7326	a: __m512h,
7327	b: __m512h,
7328	c: __m512h,
7329	k: __mmask32,
7330	) -> __m512h {
7331	unsafe {
7332	static_assert_rounding!(ROUNDING);
7333	simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7334	}
7335	}
7336
7337	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7338	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7339	/// (the element is zeroed out when the corresponding mask bit is not set).
7340	///
7341	/// Rounding is done according to the rounding parameter, which can be one of:
7342	///
7343	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7344	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7345	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7346	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7347	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7348	///
7349	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7350	#[inline]
7351	#[target_feature(enable = "avx512fp16")]
7352	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7353	#[rustc_legacy_const_generics(`4`)]
7354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7355	pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7356	k: __mmask32,
7357	a: __m512h,
7358	b: __m512h,
7359	c: __m512h,
7360	) -> __m512h {
7361	unsafe {
7362	static_assert_rounding!(ROUNDING);
7363	simd_select_bitmask(
7364	m:k,
7365	yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7366	no:_mm512_setzero_ph(),
7367	)
7368	}
7369	}
7370
7371	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7372	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7373	///
7374	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7375	#[inline]
7376	#[target_feature(enable = "avx512fp16,avx512vl")]
7377	#[cfg_attr(test, assert_instr(vfmsubadd))]
7378	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7379	pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7380	unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) }
7381	}
7382
7383	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7384	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7385	/// (the element is copied from a when the corresponding mask bit is not set).
7386	///
7387	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7388	#[inline]
7389	#[target_feature(enable = "avx512fp16,avx512vl")]
7390	#[cfg_attr(test, assert_instr(vfmsubadd))]
7391	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7392	pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7393	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7394	}
7395
7396	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7397	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7398	/// (the element is copied from c when the corresponding mask bit is not set).
7399	///
7400	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7401	#[inline]
7402	#[target_feature(enable = "avx512fp16,avx512vl")]
7403	#[cfg_attr(test, assert_instr(vfmsubadd))]
7404	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7405	pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7406	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7407	}
7408
7409	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7410	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7411	/// (the element is zeroed out when the corresponding mask bit is not set).
7412	///
7413	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7414	#[inline]
7415	#[target_feature(enable = "avx512fp16,avx512vl")]
7416	#[cfg_attr(test, assert_instr(vfmsubadd))]
7417	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7418	pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7419	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7420	}
7421
7422	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7423	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7424	///
7425	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7426	#[inline]
7427	#[target_feature(enable = "avx512fp16,avx512vl")]
7428	#[cfg_attr(test, assert_instr(vfmsubadd))]
7429	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7430	pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7431	unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) }
7432	}
7433
7434	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7435	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7436	/// (the element is copied from a when the corresponding mask bit is not set).
7437	///
7438	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7439	#[inline]
7440	#[target_feature(enable = "avx512fp16,avx512vl")]
7441	#[cfg_attr(test, assert_instr(vfmsubadd))]
7442	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7443	pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7444	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7445	}
7446
7447	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7448	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7449	/// (the element is copied from c when the corresponding mask bit is not set).
7450	///
7451	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7452	#[inline]
7453	#[target_feature(enable = "avx512fp16,avx512vl")]
7454	#[cfg_attr(test, assert_instr(vfmsubadd))]
7455	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7456	pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7457	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7458	}
7459
7460	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7461	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7462	/// (the element is zeroed out when the corresponding mask bit is not set).
7463	///
7464	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7465	#[inline]
7466	#[target_feature(enable = "avx512fp16,avx512vl")]
7467	#[cfg_attr(test, assert_instr(vfmsubadd))]
7468	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7469	pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7470	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7471	}
7472
7473	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7474	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7475	///
7476	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7477	#[inline]
7478	#[target_feature(enable = "avx512fp16")]
7479	#[cfg_attr(test, assert_instr(vfmsubadd))]
7480	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7481	pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7482	_mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7483	}
7484
7485	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7486	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7487	/// (the element is copied from a when the corresponding mask bit is not set).
7488	///
7489	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7490	#[inline]
7491	#[target_feature(enable = "avx512fp16")]
7492	#[cfg_attr(test, assert_instr(vfmsubadd))]
7493	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7494	pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7495	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7496	}
7497
7498	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7499	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7500	/// (the element is copied from c when the corresponding mask bit is not set).
7501	///
7502	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7503	#[inline]
7504	#[target_feature(enable = "avx512fp16")]
7505	#[cfg_attr(test, assert_instr(vfmsubadd))]
7506	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7507	pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7508	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7509	}
7510
7511	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7512	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7513	/// (the element is zeroed out when the corresponding mask bit is not set).
7514	///
7515	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7516	#[inline]
7517	#[target_feature(enable = "avx512fp16")]
7518	#[cfg_attr(test, assert_instr(vfmsubadd))]
7519	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7520	pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7521	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7522	}
7523
7524	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7525	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7526	///
7527	/// Rounding is done according to the rounding parameter, which can be one of:
7528	///
7529	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7530	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7531	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7532	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7533	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7534	///
7535	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7536	#[inline]
7537	#[target_feature(enable = "avx512fp16")]
7538	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7539	#[rustc_legacy_const_generics(`3`)]
7540	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7541	pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7542	a: __m512h,
7543	b: __m512h,
7544	c: __m512h,
7545	) -> __m512h {
7546	unsafe {
7547	static_assert_rounding!(ROUNDING);
7548	vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7549	}
7550	}
7551
7552	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7553	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7554	/// (the element is copied from a when the corresponding mask bit is not set).
7555	///
7556	/// Rounding is done according to the rounding parameter, which can be one of:
7557	///
7558	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7559	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7560	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7561	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7562	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7563	///
7564	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7565	#[inline]
7566	#[target_feature(enable = "avx512fp16")]
7567	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7568	#[rustc_legacy_const_generics(`4`)]
7569	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7570	pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7571	a: __m512h,
7572	k: __mmask32,
7573	b: __m512h,
7574	c: __m512h,
7575	) -> __m512h {
7576	unsafe {
7577	static_assert_rounding!(ROUNDING);
7578	simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7579	}
7580	}
7581
7582	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7583	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7584	/// (the element is copied from c when the corresponding mask bit is not set).
7585	///
7586	/// Rounding is done according to the rounding parameter, which can be one of:
7587	///
7588	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7589	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7590	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7591	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7592	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7593	///
7594	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7595	#[inline]
7596	#[target_feature(enable = "avx512fp16")]
7597	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7598	#[rustc_legacy_const_generics(`4`)]
7599	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7600	pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7601	a: __m512h,
7602	b: __m512h,
7603	c: __m512h,
7604	k: __mmask32,
7605	) -> __m512h {
7606	unsafe {
7607	static_assert_rounding!(ROUNDING);
7608	simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7609	}
7610	}
7611
7612	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7613	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7614	/// (the element is zeroed out when the corresponding mask bit is not set).
7615	///
7616	/// Rounding is done according to the rounding parameter, which can be one of:
7617	///
7618	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7619	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7620	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7621	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7622	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7623	///
7624	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7625	#[inline]
7626	#[target_feature(enable = "avx512fp16")]
7627	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7628	#[rustc_legacy_const_generics(`4`)]
7629	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7630	pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7631	k: __mmask32,
7632	a: __m512h,
7633	b: __m512h,
7634	c: __m512h,
7635	) -> __m512h {
7636	unsafe {
7637	static_assert_rounding!(ROUNDING);
7638	simd_select_bitmask(
7639	m:k,
7640	yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7641	no:_mm512_setzero_ph(),
7642	)
7643	}
7644	}
7645
7646	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7647	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7648	///
7649	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7650	#[inline]
7651	#[target_feature(enable = "avx512fp16,avx512vl")]
7652	#[cfg_attr(test, assert_instr(vrcpph))]
7653	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7654	pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7655	_mm_mask_rcp_ph(src:_mm_undefined_ph(), k:`0xff`, a)
7656	}
7657
7658	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7659	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7660	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7661	///
7662	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7663	#[inline]
7664	#[target_feature(enable = "avx512fp16,avx512vl")]
7665	#[cfg_attr(test, assert_instr(vrcpph))]
7666	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7667	pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7668	unsafe { vrcpph_128(a, src, k) }
7669	}
7670
7671	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7672	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7673	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7674	///
7675	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7676	#[inline]
7677	#[target_feature(enable = "avx512fp16,avx512vl")]
7678	#[cfg_attr(test, assert_instr(vrcpph))]
7679	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7680	pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7681	_mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7682	}
7683
7684	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7685	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7686	///
7687	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
7688	#[inline]
7689	#[target_feature(enable = "avx512fp16,avx512vl")]
7690	#[cfg_attr(test, assert_instr(vrcpph))]
7691	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7692	pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
7693	_mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
7694	}
7695
7696	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7697	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7698	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7699	///
7700	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
7701	#[inline]
7702	#[target_feature(enable = "avx512fp16,avx512vl")]
7703	#[cfg_attr(test, assert_instr(vrcpph))]
7704	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7705	pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7706	unsafe { vrcpph_256(a, src, k) }
7707	}
7708
7709	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7710	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7711	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7712	///
7713	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
7714	#[inline]
7715	#[target_feature(enable = "avx512fp16,avx512vl")]
7716	#[cfg_attr(test, assert_instr(vrcpph))]
7717	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7718	pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
7719	_mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
7720	}
7721
7722	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7723	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7724	///
7725	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
7726	#[inline]
7727	#[target_feature(enable = "avx512fp16")]
7728	#[cfg_attr(test, assert_instr(vrcpph))]
7729	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7730	pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
7731	_mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
7732	}
7733
7734	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7735	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7736	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7737	///
7738	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
7739	#[inline]
7740	#[target_feature(enable = "avx512fp16")]
7741	#[cfg_attr(test, assert_instr(vrcpph))]
7742	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7743	pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7744	unsafe { vrcpph_512(a, src, k) }
7745	}
7746
7747	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7748	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7749	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7750	///
7751	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
7752	#[inline]
7753	#[target_feature(enable = "avx512fp16")]
7754	#[cfg_attr(test, assert_instr(vrcpph))]
7755	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7756	pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
7757	_mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
7758	}
7759
7760	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7761	/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
7762	/// upper elements of dst.
7763	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7764	///
7765	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
7766	#[inline]
7767	#[target_feature(enable = "avx512fp16")]
7768	#[cfg_attr(test, assert_instr(vrcpsh))]
7769	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7770	pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
7771	_mm_mask_rcp_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
7772	}
7773
7774	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7775	/// store the result in the lower element of dst using writemask k (the element is copied from src when
7776	/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7777	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7778	///
7779	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
7780	#[inline]
7781	#[target_feature(enable = "avx512fp16")]
7782	#[cfg_attr(test, assert_instr(vrcpsh))]
7783	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7784	pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7785	unsafe { vrcpsh(a, b, src, k) }
7786	}
7787
7788	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7789	/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
7790	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7791	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7792	///
7793	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
7794	#[inline]
7795	#[target_feature(enable = "avx512fp16")]
7796	#[cfg_attr(test, assert_instr(vrcpsh))]
7797	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7798	pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7799	_mm_mask_rcp_sh(src:_mm_setzero_ph(), k, a, b)
7800	}
7801
7802	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7803	/// elements in a, and store the results in dst.
7804	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7805	///
7806	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
7807	#[inline]
7808	#[target_feature(enable = "avx512fp16,avx512vl")]
7809	#[cfg_attr(test, assert_instr(vrsqrtph))]
7810	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7811	pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
7812	_mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:`0xff`, a)
7813	}
7814
7815	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7816	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7817	/// the corresponding mask bit is not set).
7818	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7819	///
7820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
7821	#[inline]
7822	#[target_feature(enable = "avx512fp16,avx512vl")]
7823	#[cfg_attr(test, assert_instr(vrsqrtph))]
7824	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7825	pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7826	unsafe { vrsqrtph_128(a, src, k) }
7827	}
7828
7829	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7830	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7831	/// corresponding mask bit is not set).
7832	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7833	///
7834	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
7835	#[inline]
7836	#[target_feature(enable = "avx512fp16,avx512vl")]
7837	#[cfg_attr(test, assert_instr(vrsqrtph))]
7838	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7839	pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
7840	_mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
7841	}
7842
7843	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7844	/// elements in a, and store the results in dst.
7845	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7846	///
7847	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
7848	#[inline]
7849	#[target_feature(enable = "avx512fp16,avx512vl")]
7850	#[cfg_attr(test, assert_instr(vrsqrtph))]
7851	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7852	pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
7853	_mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
7854	}
7855
7856	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7857	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7858	/// the corresponding mask bit is not set).
7859	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7860	///
7861	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
7862	#[inline]
7863	#[target_feature(enable = "avx512fp16,avx512vl")]
7864	#[cfg_attr(test, assert_instr(vrsqrtph))]
7865	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7866	pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7867	unsafe { vrsqrtph_256(a, src, k) }
7868	}
7869
7870	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7871	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7872	/// corresponding mask bit is not set).
7873	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7874	///
7875	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
7876	#[inline]
7877	#[target_feature(enable = "avx512fp16,avx512vl")]
7878	#[cfg_attr(test, assert_instr(vrsqrtph))]
7879	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7880	pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
7881	_mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
7882	}
7883
7884	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7885	/// elements in a, and store the results in dst.
7886	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7887	///
7888	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
7889	#[inline]
7890	#[target_feature(enable = "avx512fp16")]
7891	#[cfg_attr(test, assert_instr(vrsqrtph))]
7892	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7893	pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
7894	_mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
7895	}
7896
7897	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7898	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7899	/// the corresponding mask bit is not set).
7900	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7901	///
7902	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
7903	#[inline]
7904	#[target_feature(enable = "avx512fp16")]
7905	#[cfg_attr(test, assert_instr(vrsqrtph))]
7906	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7907	pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7908	unsafe { vrsqrtph_512(a, src, k) }
7909	}
7910
7911	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7912	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7913	/// corresponding mask bit is not set).
7914	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7915	///
7916	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
7917	#[inline]
7918	#[target_feature(enable = "avx512fp16")]
7919	#[cfg_attr(test, assert_instr(vrsqrtph))]
7920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7921	pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
7922	_mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
7923	}
7924
7925	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7926	/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
7927	/// to the upper elements of dst.
7928	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7929	///
7930	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
7931	#[inline]
7932	#[target_feature(enable = "avx512fp16")]
7933	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7934	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7935	pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
7936	_mm_mask_rsqrt_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
7937	}
7938
7939	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7940	/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
7941	/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7942	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7943	///
7944	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
7945	#[inline]
7946	#[target_feature(enable = "avx512fp16")]
7947	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7949	pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7950	unsafe { vrsqrtsh(a, b, src, k) }
7951	}
7952
7953	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7954	/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
7955	/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7956	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7957	///
7958	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
7959	#[inline]
7960	#[target_feature(enable = "avx512fp16")]
7961	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7962	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7963	pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7964	_mm_mask_rsqrt_sh(src:_mm_setzero_ph(), k, a, b)
7965	}
7966
7967	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7968	/// results in dst.
7969	///
7970	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
7971	#[inline]
7972	#[target_feature(enable = "avx512fp16,avx512vl")]
7973	#[cfg_attr(test, assert_instr(vsqrtph))]
7974	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7975	pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
7976	unsafe { simd_fsqrt(a) }
7977	}
7978
7979	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7980	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7981	///
7982	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
7983	#[inline]
7984	#[target_feature(enable = "avx512fp16,avx512vl")]
7985	#[cfg_attr(test, assert_instr(vsqrtph))]
7986	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7987	pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7988	unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
7989	}
7990
7991	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7992	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7993	///
7994	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
7995	#[inline]
7996	#[target_feature(enable = "avx512fp16,avx512vl")]
7997	#[cfg_attr(test, assert_instr(vsqrtph))]
7998	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7999	pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8000	unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8001	}
8002
8003	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8004	/// results in dst.
8005	///
8006	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8007	#[inline]
8008	#[target_feature(enable = "avx512fp16,avx512vl")]
8009	#[cfg_attr(test, assert_instr(vsqrtph))]
8010	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8011	pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8012	unsafe { simd_fsqrt(a) }
8013	}
8014
8015	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8016	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8017	///
8018	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8019	#[inline]
8020	#[target_feature(enable = "avx512fp16,avx512vl")]
8021	#[cfg_attr(test, assert_instr(vsqrtph))]
8022	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8023	pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8024	unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8025	}
8026
8027	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8028	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8029	///
8030	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8031	#[inline]
8032	#[target_feature(enable = "avx512fp16,avx512vl")]
8033	#[cfg_attr(test, assert_instr(vsqrtph))]
8034	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8035	pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8036	unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8037	}
8038
8039	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8040	/// results in dst.
8041	///
8042	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8043	#[inline]
8044	#[target_feature(enable = "avx512fp16")]
8045	#[cfg_attr(test, assert_instr(vsqrtph))]
8046	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8047	pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8048	unsafe { simd_fsqrt(a) }
8049	}
8050
8051	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8052	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8053	///
8054	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8055	#[inline]
8056	#[target_feature(enable = "avx512fp16")]
8057	#[cfg_attr(test, assert_instr(vsqrtph))]
8058	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8059	pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8060	unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8061	}
8062
8063	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8064	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8065	///
8066	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8067	#[inline]
8068	#[target_feature(enable = "avx512fp16")]
8069	#[cfg_attr(test, assert_instr(vsqrtph))]
8070	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8071	pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8072	unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8073	}
8074
8075	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8076	/// results in dst.
8077	/// Rounding is done according to the rounding parameter, which can be one of:
8078	///
8079	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8080	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8081	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8082	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8083	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8084	///
8085	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8086	#[inline]
8087	#[target_feature(enable = "avx512fp16")]
8088	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8089	#[rustc_legacy_const_generics(`1`)]
8090	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8091	pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8092	unsafe {
8093	static_assert_rounding!(ROUNDING);
8094	vsqrtph_512(a, ROUNDING)
8095	}
8096	}
8097
8098	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8099	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8100	/// Rounding is done according to the rounding parameter, which can be one of:
8101	///
8102	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8103	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8104	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8105	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8106	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8107	///
8108	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8109	#[inline]
8110	#[target_feature(enable = "avx512fp16")]
8111	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8112	#[rustc_legacy_const_generics(`3`)]
8113	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8114	pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8115	src: __m512h,
8116	k: __mmask32,
8117	a: __m512h,
8118	) -> __m512h {
8119	unsafe {
8120	static_assert_rounding!(ROUNDING);
8121	simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8122	}
8123	}
8124
8125	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8126	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8127	/// Rounding is done according to the rounding parameter, which can be one of:
8128	///
8129	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8130	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8131	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8132	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8133	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8134	///
8135	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8136	#[inline]
8137	#[target_feature(enable = "avx512fp16")]
8138	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8139	#[rustc_legacy_const_generics(`2`)]
8140	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8141	pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8142	unsafe {
8143	static_assert_rounding!(ROUNDING);
8144	simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8145	}
8146	}
8147
8148	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8149	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8150	/// elements of dst.
8151	///
8152	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8153	#[inline]
8154	#[target_feature(enable = "avx512fp16")]
8155	#[cfg_attr(test, assert_instr(vsqrtsh))]
8156	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8157	pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8158	_mm_mask_sqrt_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
8159	}
8160
8161	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8162	/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8163	/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8164	///
8165	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8166	#[inline]
8167	#[target_feature(enable = "avx512fp16")]
8168	#[cfg_attr(test, assert_instr(vsqrtsh))]
8169	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8170	pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8171	_mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8172	}
8173
8174	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8175	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8176	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8177	///
8178	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8179	#[inline]
8180	#[target_feature(enable = "avx512fp16")]
8181	#[cfg_attr(test, assert_instr(vsqrtsh))]
8182	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8183	pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8184	_mm_mask_sqrt_sh(src:_mm_setzero_ph(), k, a, b)
8185	}
8186
8187	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8188	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8189	/// elements of dst.
8190	/// Rounding is done according to the rounding parameter, which can be one of:
8191	///
8192	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8193	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8194	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8195	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8196	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8197	///
8198	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8199	#[inline]
8200	#[target_feature(enable = "avx512fp16")]
8201	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8202	#[rustc_legacy_const_generics(`2`)]
8203	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8204	pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8205	static_assert_rounding!(ROUNDING);
8206	_mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
8207	}
8208
8209	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8210	/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8211	/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8212	/// Rounding is done according to the rounding parameter, which can be one of:
8213	///
8214	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8215	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8216	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8217	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8218	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8219	///
8220	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8221	#[inline]
8222	#[target_feature(enable = "avx512fp16")]
8223	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8224	#[rustc_legacy_const_generics(`4`)]
8225	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8226	pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8227	src: __m128h,
8228	k: __mmask8,
8229	a: __m128h,
8230	b: __m128h,
8231	) -> __m128h {
8232	unsafe {
8233	static_assert_rounding!(ROUNDING);
8234	vsqrtsh(a, b, src, k, ROUNDING)
8235	}
8236	}
8237
8238	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8239	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8240	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8241	/// Rounding is done according to the rounding parameter, which can be one of:
8242	///
8243	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8244	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8245	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8246	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8247	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8248	///
8249	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8250	#[inline]
8251	#[target_feature(enable = "avx512fp16")]
8252	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8253	#[rustc_legacy_const_generics(`3`)]
8254	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8255	pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8256	k: __mmask8,
8257	a: __m128h,
8258	b: __m128h,
8259	) -> __m128h {
8260	static_assert_rounding!(ROUNDING);
8261	_mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
8262	}
8263
8264	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8265	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8266	/// value when inputs are NaN or signed-zero values.
8267	///
8268	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8269	#[inline]
8270	#[target_feature(enable = "avx512fp16,avx512vl")]
8271	#[cfg_attr(test, assert_instr(vmaxph))]
8272	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8273	pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8274	unsafe { vmaxph_128(a, b) }
8275	}
8276
8277	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8278	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8279	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8280	/// NaN or signed-zero values.
8281	///
8282	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8283	#[inline]
8284	#[target_feature(enable = "avx512fp16,avx512vl")]
8285	#[cfg_attr(test, assert_instr(vmaxph))]
8286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8287	pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8288	unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8289	}
8290
8291	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8292	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8293	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8294	/// NaN or signed-zero values.
8295	///
8296	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8297	#[inline]
8298	#[target_feature(enable = "avx512fp16,avx512vl")]
8299	#[cfg_attr(test, assert_instr(vmaxph))]
8300	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8301	pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8302	unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8303	}
8304
8305	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8306	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8307	/// value when inputs are NaN or signed-zero values.
8308	///
8309	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8310	#[inline]
8311	#[target_feature(enable = "avx512fp16,avx512vl")]
8312	#[cfg_attr(test, assert_instr(vmaxph))]
8313	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8314	pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8315	unsafe { vmaxph_256(a, b) }
8316	}
8317
8318	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8319	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8320	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8321	/// NaN or signed-zero values.
8322	///
8323	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8324	#[inline]
8325	#[target_feature(enable = "avx512fp16,avx512vl")]
8326	#[cfg_attr(test, assert_instr(vmaxph))]
8327	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8328	pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8329	unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8330	}
8331
8332	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8333	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8334	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8335	/// NaN or signed-zero values.
8336	///
8337	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8338	#[inline]
8339	#[target_feature(enable = "avx512fp16,avx512vl")]
8340	#[cfg_attr(test, assert_instr(vmaxph))]
8341	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8342	pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8343	unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8344	}
8345
8346	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8347	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8348	/// value when inputs are NaN or signed-zero values.
8349	///
8350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8351	#[inline]
8352	#[target_feature(enable = "avx512fp16")]
8353	#[cfg_attr(test, assert_instr(vmaxph))]
8354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8355	pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8356	_mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8357	}
8358
8359	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8360	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8361	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8362	/// NaN or signed-zero values.
8363	///
8364	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8365	#[inline]
8366	#[target_feature(enable = "avx512fp16")]
8367	#[cfg_attr(test, assert_instr(vmaxph))]
8368	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8369	pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8370	unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8371	}
8372
8373	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8374	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8375	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8376	/// NaN or signed-zero values.
8377	///
8378	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8379	#[inline]
8380	#[target_feature(enable = "avx512fp16")]
8381	#[cfg_attr(test, assert_instr(vmaxph))]
8382	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8383	pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8384	unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8385	}
8386
8387	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8388	/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8389	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8390	/// NaN or signed-zero values.
8391	///
8392	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8393	#[inline]
8394	#[target_feature(enable = "avx512fp16")]
8395	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8396	#[rustc_legacy_const_generics(`2`)]
8397	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8398	pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8399	unsafe {
8400	static_assert_sae!(SAE);
8401	vmaxph_512(a, b, SAE)
8402	}
8403	}
8404
8405	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8406	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8407	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8408	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8409	///
8410	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8411	#[inline]
8412	#[target_feature(enable = "avx512fp16")]
8413	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8414	#[rustc_legacy_const_generics(`4`)]
8415	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8416	pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8417	src: __m512h,
8418	k: __mmask32,
8419	a: __m512h,
8420	b: __m512h,
8421	) -> __m512h {
8422	unsafe {
8423	static_assert_sae!(SAE);
8424	simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8425	}
8426	}
8427
8428	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8429	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8430	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8431	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8432	///
8433	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8434	#[inline]
8435	#[target_feature(enable = "avx512fp16")]
8436	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8437	#[rustc_legacy_const_generics(`3`)]
8438	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8439	pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8440	unsafe {
8441	static_assert_sae!(SAE);
8442	simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8443	}
8444	}
8445
8446	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8447	/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8448	/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8449	/// when inputs are NaN or signed-zero values.
8450	///
8451	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8452	#[inline]
8453	#[target_feature(enable = "avx512fp16,avx512vl")]
8454	#[cfg_attr(test, assert_instr(vmaxsh))]
8455	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8456	pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8457	_mm_mask_max_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
8458	}
8459
8460	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8461	/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8462	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8463	/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8464	///
8465	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8466	#[inline]
8467	#[target_feature(enable = "avx512fp16,avx512vl")]
8468	#[cfg_attr(test, assert_instr(vmaxsh))]
8469	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8470	pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8471	_mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8472	}
8473
8474	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8475	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8476	/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8477	/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8478	///
8479	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8480	#[inline]
8481	#[target_feature(enable = "avx512fp16,avx512vl")]
8482	#[cfg_attr(test, assert_instr(vmaxsh))]
8483	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8484	pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8485	_mm_mask_max_sh(src:_mm_setzero_ph(), k, a, b)
8486	}
8487
8488	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8489	/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8490	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8491	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8492	///
8493	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8494	#[inline]
8495	#[target_feature(enable = "avx512fp16,avx512vl")]
8496	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8497	#[rustc_legacy_const_generics(`2`)]
8498	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8499	pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8500	static_assert_sae!(SAE);
8501	_mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
8502	}
8503
8504	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8505	/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8506	/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8507	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8508	/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8509	///
8510	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8511	#[inline]
8512	#[target_feature(enable = "avx512fp16,avx512vl")]
8513	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8514	#[rustc_legacy_const_generics(`4`)]
8515	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8516	pub fn _mm_mask_max_round_sh<const SAE: i32>(
8517	src: __m128h,
8518	k: __mmask8,
8519	a: __m128h,
8520	b: __m128h,
8521	) -> __m128h {
8522	unsafe {
8523	static_assert_sae!(SAE);
8524	vmaxsh(a, b, src, k, SAE)
8525	}
8526	}
8527
8528	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8529	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8530	/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8531	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8532	/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8533	///
8534	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8535	#[inline]
8536	#[target_feature(enable = "avx512fp16,avx512vl")]
8537	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8538	#[rustc_legacy_const_generics(`3`)]
8539	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8540	pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8541	static_assert_sae!(SAE);
8542	_mm_mask_max_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
8543	}
8544
8545	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8546	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8547	/// when inputs are NaN or signed-zero values.
8548	///
8549	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8550	#[inline]
8551	#[target_feature(enable = "avx512fp16,avx512vl")]
8552	#[cfg_attr(test, assert_instr(vminph))]
8553	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8554	pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8555	unsafe { vminph_128(a, b) }
8556	}
8557
8558	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8559	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8560	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8561	/// NaN or signed-zero values.
8562	///
8563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8564	#[inline]
8565	#[target_feature(enable = "avx512fp16,avx512vl")]
8566	#[cfg_attr(test, assert_instr(vminph))]
8567	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8568	pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8569	unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8570	}
8571
8572	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8573	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8574	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8575	/// NaN or signed-zero values.
8576	///
8577	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8578	#[inline]
8579	#[target_feature(enable = "avx512fp16,avx512vl")]
8580	#[cfg_attr(test, assert_instr(vminph))]
8581	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8582	pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8583	unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8584	}
8585
8586	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8587	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8588	/// when inputs are NaN or signed-zero values.
8589	///
8590	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8591	#[inline]
8592	#[target_feature(enable = "avx512fp16,avx512vl")]
8593	#[cfg_attr(test, assert_instr(vminph))]
8594	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8595	pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8596	unsafe { vminph_256(a, b) }
8597	}
8598
8599	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8600	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8601	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8602	/// NaN or signed-zero values.
8603	///
8604	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8605	#[inline]
8606	#[target_feature(enable = "avx512fp16,avx512vl")]
8607	#[cfg_attr(test, assert_instr(vminph))]
8608	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8609	pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8610	unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8611	}
8612
8613	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8614	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8615	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8616	/// NaN or signed-zero values.
8617	///
8618	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8619	#[inline]
8620	#[target_feature(enable = "avx512fp16,avx512vl")]
8621	#[cfg_attr(test, assert_instr(vminph))]
8622	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8623	pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8624	unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8625	}
8626
8627	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8628	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8629	/// when inputs are NaN or signed-zero values.
8630	///
8631	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8632	#[inline]
8633	#[target_feature(enable = "avx512fp16")]
8634	#[cfg_attr(test, assert_instr(vminph))]
8635	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8636	pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8637	_mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8638	}
8639
8640	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8641	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8642	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8643	/// NaN or signed-zero values.
8644	///
8645	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8646	#[inline]
8647	#[target_feature(enable = "avx512fp16")]
8648	#[cfg_attr(test, assert_instr(vminph))]
8649	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8650	pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8651	unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8652	}
8653
8654	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8655	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8656	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8657	/// NaN or signed-zero values.
8658	///
8659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8660	#[inline]
8661	#[target_feature(enable = "avx512fp16")]
8662	#[cfg_attr(test, assert_instr(vminph))]
8663	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8664	pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8665	unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8666	}
8667
8668	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8669	/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8670	/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8671	///
8672	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8673	#[inline]
8674	#[target_feature(enable = "avx512fp16")]
8675	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8676	#[rustc_legacy_const_generics(`2`)]
8677	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8678	pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8679	unsafe {
8680	static_assert_sae!(SAE);
8681	vminph_512(a, b, SAE)
8682	}
8683	}
8684
8685	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8686	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8687	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8688	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8689	///
8690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
8691	#[inline]
8692	#[target_feature(enable = "avx512fp16")]
8693	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8694	#[rustc_legacy_const_generics(`4`)]
8695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8696	pub fn _mm512_mask_min_round_ph<const SAE: i32>(
8697	src: __m512h,
8698	k: __mmask32,
8699	a: __m512h,
8700	b: __m512h,
8701	) -> __m512h {
8702	unsafe {
8703	static_assert_sae!(SAE);
8704	simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
8705	}
8706	}
8707
8708	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8709	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8710	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8711	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8712	///
8713	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
8714	#[inline]
8715	#[target_feature(enable = "avx512fp16")]
8716	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8717	#[rustc_legacy_const_generics(`3`)]
8718	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8719	pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8720	unsafe {
8721	static_assert_sae!(SAE);
8722	simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8723	}
8724	}
8725
8726	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8727	/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8728	/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
8729	/// inputs are NaN or signed-zero values.
8730	///
8731	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
8732	#[inline]
8733	#[target_feature(enable = "avx512fp16,avx512vl")]
8734	#[cfg_attr(test, assert_instr(vminsh))]
8735	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8736	pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
8737	_mm_mask_min_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
8738	}
8739
8740	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8741	/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8742	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8743	/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8744	///
8745	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
8746	#[inline]
8747	#[target_feature(enable = "avx512fp16,avx512vl")]
8748	#[cfg_attr(test, assert_instr(vminsh))]
8749	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8750	pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8751	_mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8752	}
8753
8754	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8755	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8756	/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8757	/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8758	///
8759	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
8760	#[inline]
8761	#[target_feature(enable = "avx512fp16,avx512vl")]
8762	#[cfg_attr(test, assert_instr(vminsh))]
8763	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8764	pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8765	_mm_mask_min_sh(src:_mm_setzero_ph(), k, a, b)
8766	}
8767
8768	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8769	/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8770	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8771	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8772	///
8773	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
8774	#[inline]
8775	#[target_feature(enable = "avx512fp16,avx512vl")]
8776	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8777	#[rustc_legacy_const_generics(`2`)]
8778	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8779	pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8780	static_assert_sae!(SAE);
8781	_mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
8782	}
8783
8784	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8785	/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8786	/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8787	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8788	/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8789	///
8790	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
8791	#[inline]
8792	#[target_feature(enable = "avx512fp16,avx512vl")]
8793	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8794	#[rustc_legacy_const_generics(`4`)]
8795	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8796	pub fn _mm_mask_min_round_sh<const SAE: i32>(
8797	src: __m128h,
8798	k: __mmask8,
8799	a: __m128h,
8800	b: __m128h,
8801	) -> __m128h {
8802	unsafe {
8803	static_assert_sae!(SAE);
8804	vminsh(a, b, src, k, SAE)
8805	}
8806	}
8807
8808	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8809	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8810	/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8811	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8812	/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8813	///
8814	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
8815	#[inline]
8816	#[target_feature(enable = "avx512fp16,avx512vl")]
8817	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8818	#[rustc_legacy_const_generics(`3`)]
8819	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8820	pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8821	static_assert_sae!(SAE);
8822	_mm_mask_min_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
8823	}
8824
8825	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8826	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8827	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8828	///
8829	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
8830	#[inline]
8831	#[target_feature(enable = "avx512fp16,avx512vl")]
8832	#[cfg_attr(test, assert_instr(vgetexpph))]
8833	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8834	pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
8835	_mm_mask_getexp_ph(src:_mm_undefined_ph(), k:`0xff`, a)
8836	}
8837
8838	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8839	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8840	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8841	/// `floor(log2(x))` for each element.
8842	///
8843	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
8844	#[inline]
8845	#[target_feature(enable = "avx512fp16,avx512vl")]
8846	#[cfg_attr(test, assert_instr(vgetexpph))]
8847	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8848	pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8849	unsafe { vgetexpph_128(a, src, k) }
8850	}
8851
8852	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8853	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8854	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8855	/// `floor(log2(x))` for each element.
8856	///
8857	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
8858	#[inline]
8859	#[target_feature(enable = "avx512fp16,avx512vl")]
8860	#[cfg_attr(test, assert_instr(vgetexpph))]
8861	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8862	pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
8863	_mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
8864	}
8865
8866	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8867	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8868	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8869	///
8870	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
8871	#[inline]
8872	#[target_feature(enable = "avx512fp16,avx512vl")]
8873	#[cfg_attr(test, assert_instr(vgetexpph))]
8874	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8875	pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
8876	_mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
8877	}
8878
8879	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8880	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8881	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8882	/// `floor(log2(x))` for each element.
8883	///
8884	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
8885	#[inline]
8886	#[target_feature(enable = "avx512fp16,avx512vl")]
8887	#[cfg_attr(test, assert_instr(vgetexpph))]
8888	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8889	pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8890	unsafe { vgetexpph_256(a, src, k) }
8891	}
8892
8893	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8894	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8895	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8896	/// `floor(log2(x))` for each element.
8897	///
8898	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
8899	#[inline]
8900	#[target_feature(enable = "avx512fp16,avx512vl")]
8901	#[cfg_attr(test, assert_instr(vgetexpph))]
8902	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8903	pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
8904	_mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
8905	}
8906
8907	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8908	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8909	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8910	///
8911	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
8912	#[inline]
8913	#[target_feature(enable = "avx512fp16")]
8914	#[cfg_attr(test, assert_instr(vgetexpph))]
8915	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8916	pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
8917	_mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
8918	}
8919
8920	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8921	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8922	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8923	/// `floor(log2(x))` for each element.
8924	///
8925	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
8926	#[inline]
8927	#[target_feature(enable = "avx512fp16")]
8928	#[cfg_attr(test, assert_instr(vgetexpph))]
8929	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8930	pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8931	_mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
8932	}
8933
8934	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8935	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8936	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8937	/// `floor(log2(x))` for each element.
8938	///
8939	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
8940	#[inline]
8941	#[target_feature(enable = "avx512fp16")]
8942	#[cfg_attr(test, assert_instr(vgetexpph))]
8943	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8944	pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
8945	_mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
8946	}
8947
8948	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8949	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8950	/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
8951	/// by passing _MM_FROUND_NO_EXC in the sae parameter
8952	///
8953	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
8954	#[inline]
8955	#[target_feature(enable = "avx512fp16")]
8956	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
8957	#[rustc_legacy_const_generics(`1`)]
8958	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8959	pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
8960	static_assert_sae!(SAE);
8961	_mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
8962	}
8963
8964	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8965	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8966	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8967	/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8968	///
8969	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
8970	#[inline]
8971	#[target_feature(enable = "avx512fp16")]
8972	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
8973	#[rustc_legacy_const_generics(`3`)]
8974	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8975	pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
8976	src: __m512h,
8977	k: __mmask32,
8978	a: __m512h,
8979	) -> __m512h {
8980	unsafe {
8981	static_assert_sae!(SAE);
8982	vgetexpph_512(a, src, k, SAE)
8983	}
8984	}
8985
8986	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8987	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8988	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8989	/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8990	///
8991	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
8992	#[inline]
8993	#[target_feature(enable = "avx512fp16")]
8994	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
8995	#[rustc_legacy_const_generics(`2`)]
8996	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8997	pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
8998	static_assert_sae!(SAE);
8999	_mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9000	}
9001
9002	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9003	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9004	/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9005	/// calculates `floor(log2(x))` for the lower element.
9006	///
9007	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9008	#[inline]
9009	#[target_feature(enable = "avx512fp16")]
9010	#[cfg_attr(test, assert_instr(vgetexpsh))]
9011	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9012	pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9013	_mm_mask_getexp_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
9014	}
9015
9016	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9017	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9018	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9019	/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9020	/// for the lower element.
9021	///
9022	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9023	#[inline]
9024	#[target_feature(enable = "avx512fp16")]
9025	#[cfg_attr(test, assert_instr(vgetexpsh))]
9026	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9027	pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9028	_mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9029	}
9030
9031	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9032	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9033	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9034	/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9035	/// lower element.
9036	///
9037	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9038	#[inline]
9039	#[target_feature(enable = "avx512fp16")]
9040	#[cfg_attr(test, assert_instr(vgetexpsh))]
9041	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9042	pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9043	_mm_mask_getexp_sh(src:_mm_setzero_ph(), k, a, b)
9044	}
9045
9046	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9047	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9048	/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9049	/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9050	/// in the sae parameter
9051	///
9052	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9053	#[inline]
9054	#[target_feature(enable = "avx512fp16")]
9055	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9056	#[rustc_legacy_const_generics(`2`)]
9057	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9058	pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9059	static_assert_sae!(SAE);
9060	_mm_mask_getexp_round_sh::<SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
9061	}
9062
9063	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9064	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9065	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9066	/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9067	/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9068	///
9069	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9070	#[inline]
9071	#[target_feature(enable = "avx512fp16")]
9072	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9073	#[rustc_legacy_const_generics(`4`)]
9074	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9075	pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9076	src: __m128h,
9077	k: __mmask8,
9078	a: __m128h,
9079	b: __m128h,
9080	) -> __m128h {
9081	unsafe {
9082	static_assert_sae!(SAE);
9083	vgetexpsh(a, b, src, k, SAE)
9084	}
9085	}
9086
9087	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9088	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9089	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9090	/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9091	/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9092	///
9093	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9094	#[inline]
9095	#[target_feature(enable = "avx512fp16")]
9096	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9097	#[rustc_legacy_const_generics(`3`)]
9098	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9099	pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9100	static_assert_sae!(SAE);
9101	_mm_mask_getexp_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
9102	}
9103
9104	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9105	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9106	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9107	///
9108	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9109	///
9110	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9111	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9112	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9113	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9114	///
9115	/// The sign is determined by sc which can take the following values:
9116	///
9117	/// _MM_MANT_SIGN_src // sign = sign(src)
9118	/// _MM_MANT_SIGN_zero // sign = 0
9119	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9120	///
9121	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9122	#[inline]
9123	#[target_feature(enable = "avx512fp16,avx512vl")]
9124	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9125	#[rustc_legacy_const_generics(`1`, `2`)]
9126	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9127	pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9128	a: __m128h,
9129	) -> __m128h {
9130	static_assert_uimm_bits!(NORM, `4`);
9131	static_assert_uimm_bits!(SIGN, `2`);
9132	_mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:`0xff`, a)
9133	}
9134
9135	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9136	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9137	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9138	/// by norm and the sign depends on sign and the source sign.
9139	///
9140	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9141	///
9142	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9143	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9144	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9145	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9146	///
9147	/// The sign is determined by sc which can take the following values:
9148	///
9149	/// _MM_MANT_SIGN_src // sign = sign(src)
9150	/// _MM_MANT_SIGN_zero // sign = 0
9151	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9152	///
9153	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9154	#[inline]
9155	#[target_feature(enable = "avx512fp16,avx512vl")]
9156	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9157	#[rustc_legacy_const_generics(`3`, `4`)]
9158	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9159	pub fn _mm_mask_getmant_ph<
9160	const NORM: _MM_MANTISSA_NORM_ENUM,
9161	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9162	>(
9163	src: __m128h,
9164	k: __mmask8,
9165	a: __m128h,
9166	) -> __m128h {
9167	unsafe {
9168	static_assert_uimm_bits!(NORM, `4`);
9169	static_assert_uimm_bits!(SIGN, `2`);
9170	vgetmantph_128(a, (SIGN << `2`) \| NORM, src, k)
9171	}
9172	}
9173
9174	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9175	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9176	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9177	/// by norm and the sign depends on sign and the source sign.
9178	///
9179	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9180	///
9181	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9182	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9183	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9184	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9185	///
9186	/// The sign is determined by sc which can take the following values:
9187	///
9188	/// _MM_MANT_SIGN_src // sign = sign(src)
9189	/// _MM_MANT_SIGN_zero // sign = 0
9190	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9191	///
9192	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9193	#[inline]
9194	#[target_feature(enable = "avx512fp16,avx512vl")]
9195	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9196	#[rustc_legacy_const_generics(`2`, `3`)]
9197	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9198	pub fn _mm_maskz_getmant_ph<
9199	const NORM: _MM_MANTISSA_NORM_ENUM,
9200	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9201	>(
9202	k: __mmask8,
9203	a: __m128h,
9204	) -> __m128h {
9205	static_assert_uimm_bits!(NORM, `4`);
9206	static_assert_uimm_bits!(SIGN, `2`);
9207	_mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9208	}
9209
9210	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9211	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9212	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9213	///
9214	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9215	///
9216	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9217	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9218	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9219	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9220	///
9221	/// The sign is determined by sc which can take the following values:
9222	///
9223	/// _MM_MANT_SIGN_src // sign = sign(src)
9224	/// _MM_MANT_SIGN_zero // sign = 0
9225	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9226	///
9227	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9228	#[inline]
9229	#[target_feature(enable = "avx512fp16,avx512vl")]
9230	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9231	#[rustc_legacy_const_generics(`1`, `2`)]
9232	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9233	pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9234	a: __m256h,
9235	) -> __m256h {
9236	static_assert_uimm_bits!(NORM, `4`);
9237	static_assert_uimm_bits!(SIGN, `2`);
9238	_mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:`0xffff`, a)
9239	}
9240
9241	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9242	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9243	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9244	/// by norm and the sign depends on sign and the source sign.
9245	///
9246	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9247	///
9248	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9249	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9250	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9251	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9252	///
9253	/// The sign is determined by sc which can take the following values:
9254	///
9255	/// _MM_MANT_SIGN_src // sign = sign(src)
9256	/// _MM_MANT_SIGN_zero // sign = 0
9257	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9258	///
9259	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9260	#[inline]
9261	#[target_feature(enable = "avx512fp16,avx512vl")]
9262	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9263	#[rustc_legacy_const_generics(`3`, `4`)]
9264	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9265	pub fn _mm256_mask_getmant_ph<
9266	const NORM: _MM_MANTISSA_NORM_ENUM,
9267	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9268	>(
9269	src: __m256h,
9270	k: __mmask16,
9271	a: __m256h,
9272	) -> __m256h {
9273	unsafe {
9274	static_assert_uimm_bits!(NORM, `4`);
9275	static_assert_uimm_bits!(SIGN, `2`);
9276	vgetmantph_256(a, (SIGN << `2`) \| NORM, src, k)
9277	}
9278	}
9279
9280	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9281	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9282	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9283	/// by norm and the sign depends on sign and the source sign.
9284	///
9285	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9286	///
9287	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9288	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9289	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9290	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9291	///
9292	/// The sign is determined by sc which can take the following values:
9293	///
9294	/// _MM_MANT_SIGN_src // sign = sign(src)
9295	/// _MM_MANT_SIGN_zero // sign = 0
9296	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9297	///
9298	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9299	#[inline]
9300	#[target_feature(enable = "avx512fp16,avx512vl")]
9301	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9302	#[rustc_legacy_const_generics(`2`, `3`)]
9303	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9304	pub fn _mm256_maskz_getmant_ph<
9305	const NORM: _MM_MANTISSA_NORM_ENUM,
9306	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9307	>(
9308	k: __mmask16,
9309	a: __m256h,
9310	) -> __m256h {
9311	static_assert_uimm_bits!(NORM, `4`);
9312	static_assert_uimm_bits!(SIGN, `2`);
9313	_mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9314	}
9315
9316	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9317	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9318	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9319	///
9320	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9321	///
9322	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9323	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9324	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9325	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9326	///
9327	/// The sign is determined by sc which can take the following values:
9328	///
9329	/// _MM_MANT_SIGN_src // sign = sign(src)
9330	/// _MM_MANT_SIGN_zero // sign = 0
9331	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9332	///
9333	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9334	#[inline]
9335	#[target_feature(enable = "avx512fp16")]
9336	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9337	#[rustc_legacy_const_generics(`1`, `2`)]
9338	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9339	pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9340	a: __m512h,
9341	) -> __m512h {
9342	static_assert_uimm_bits!(NORM, `4`);
9343	static_assert_uimm_bits!(SIGN, `2`);
9344	_mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9345	}
9346
9347	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9348	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9349	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9350	/// by norm and the sign depends on sign and the source sign.
9351	///
9352	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9353	///
9354	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9355	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9356	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9357	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9358	///
9359	/// The sign is determined by sc which can take the following values:
9360	///
9361	/// _MM_MANT_SIGN_src // sign = sign(src)
9362	/// _MM_MANT_SIGN_zero // sign = 0
9363	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9364	///
9365	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9366	#[inline]
9367	#[target_feature(enable = "avx512fp16")]
9368	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9369	#[rustc_legacy_const_generics(`3`, `4`)]
9370	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9371	pub fn _mm512_mask_getmant_ph<
9372	const NORM: _MM_MANTISSA_NORM_ENUM,
9373	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9374	>(
9375	src: __m512h,
9376	k: __mmask32,
9377	a: __m512h,
9378	) -> __m512h {
9379	static_assert_uimm_bits!(NORM, `4`);
9380	static_assert_uimm_bits!(SIGN, `2`);
9381	_mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9382	}
9383
9384	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9385	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9386	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9387	/// by norm and the sign depends on sign and the source sign.
9388	///
9389	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9390	///
9391	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9392	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9393	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9394	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9395	///
9396	/// The sign is determined by sc which can take the following values:
9397	///
9398	/// _MM_MANT_SIGN_src // sign = sign(src)
9399	/// _MM_MANT_SIGN_zero // sign = 0
9400	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9401	///
9402	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9403	#[inline]
9404	#[target_feature(enable = "avx512fp16")]
9405	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9406	#[rustc_legacy_const_generics(`2`, `3`)]
9407	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9408	pub fn _mm512_maskz_getmant_ph<
9409	const NORM: _MM_MANTISSA_NORM_ENUM,
9410	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9411	>(
9412	k: __mmask32,
9413	a: __m512h,
9414	) -> __m512h {
9415	static_assert_uimm_bits!(NORM, `4`);
9416	static_assert_uimm_bits!(SIGN, `2`);
9417	_mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9418	}
9419
9420	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9421	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9422	/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9423	/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9424	///
9425	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9426	///
9427	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9428	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9429	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9430	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9431	///
9432	/// The sign is determined by sc which can take the following values:
9433	///
9434	/// _MM_MANT_SIGN_src // sign = sign(src)
9435	/// _MM_MANT_SIGN_zero // sign = 0
9436	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9437	///
9438	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9439	///
9440	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9441	#[inline]
9442	#[target_feature(enable = "avx512fp16")]
9443	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9444	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
9445	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9446	pub fn _mm512_getmant_round_ph<
9447	const NORM: _MM_MANTISSA_NORM_ENUM,
9448	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9449	const SAE: i32,
9450	>(
9451	a: __m512h,
9452	) -> __m512h {
9453	static_assert_uimm_bits!(NORM, `4`);
9454	static_assert_uimm_bits!(SIGN, `2`);
9455	static_assert_sae!(SAE);
9456	_mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9457	}
9458
9459	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9460	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9461	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9462	/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9463	/// in the sae parameter
9464	///
9465	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9466	///
9467	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9468	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9469	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9470	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9471	///
9472	/// The sign is determined by sc which can take the following values:
9473	///
9474	/// _MM_MANT_SIGN_src // sign = sign(src)
9475	/// _MM_MANT_SIGN_zero // sign = 0
9476	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9477	///
9478	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9479	///
9480	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9481	#[inline]
9482	#[target_feature(enable = "avx512fp16")]
9483	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9484	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
9485	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9486	pub fn _mm512_mask_getmant_round_ph<
9487	const NORM: _MM_MANTISSA_NORM_ENUM,
9488	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9489	const SAE: i32,
9490	>(
9491	src: __m512h,
9492	k: __mmask32,
9493	a: __m512h,
9494	) -> __m512h {
9495	unsafe {
9496	static_assert_uimm_bits!(NORM, `4`);
9497	static_assert_uimm_bits!(SIGN, `2`);
9498	static_assert_sae!(SAE);
9499	vgetmantph_512(a, (SIGN << `2`) \| NORM, src, k, SAE)
9500	}
9501	}
9502
9503	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9504	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9505	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9506	/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9507	/// in the sae parameter
9508	///
9509	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9510	///
9511	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9512	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9513	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9514	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9515	///
9516	/// The sign is determined by sc which can take the following values:
9517	///
9518	/// _MM_MANT_SIGN_src // sign = sign(src)
9519	/// _MM_MANT_SIGN_zero // sign = 0
9520	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9521	///
9522	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9523	///
9524	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9525	#[inline]
9526	#[target_feature(enable = "avx512fp16")]
9527	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9528	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
9529	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9530	pub fn _mm512_maskz_getmant_round_ph<
9531	const NORM: _MM_MANTISSA_NORM_ENUM,
9532	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9533	const SAE: i32,
9534	>(
9535	k: __mmask32,
9536	a: __m512h,
9537	) -> __m512h {
9538	static_assert_uimm_bits!(NORM, `4`);
9539	static_assert_uimm_bits!(SIGN, `2`);
9540	static_assert_sae!(SAE);
9541	_mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9542	}
9543
9544	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9545	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9546	/// elements of dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9547	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9548	///
9549	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9550	///
9551	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9552	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9553	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9554	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9555	///
9556	/// The sign is determined by sc which can take the following values:
9557	///
9558	/// _MM_MANT_SIGN_src // sign = sign(src)
9559	/// _MM_MANT_SIGN_zero // sign = 0
9560	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9561	///
9562	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9563	#[inline]
9564	#[target_feature(enable = "avx512fp16")]
9565	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9566	#[rustc_legacy_const_generics(`2`, `3`)]
9567	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9568	pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9569	a: __m128h,
9570	b: __m128h,
9571	) -> __m128h {
9572	static_assert_uimm_bits!(NORM, `4`);
9573	static_assert_uimm_bits!(SIGN, `2`);
9574	_mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_undefined_ph(), k:`0xff`, a, b)
9575	}
9576
9577	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9578	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9579	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9580	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9581	/// the source sign.
9582	///
9583	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9584	///
9585	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9586	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9587	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9588	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9589	///
9590	/// The sign is determined by sc which can take the following values:
9591	///
9592	/// _MM_MANT_SIGN_src // sign = sign(src)
9593	/// _MM_MANT_SIGN_zero // sign = 0
9594	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9595	///
9596	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9597	#[inline]
9598	#[target_feature(enable = "avx512fp16")]
9599	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9600	#[rustc_legacy_const_generics(`4`, `5`)]
9601	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9602	pub fn _mm_mask_getmant_sh<
9603	const NORM: _MM_MANTISSA_NORM_ENUM,
9604	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9605	>(
9606	src: __m128h,
9607	k: __mmask8,
9608	a: __m128h,
9609	b: __m128h,
9610	) -> __m128h {
9611	static_assert_uimm_bits!(NORM, `4`);
9612	static_assert_uimm_bits!(SIGN, `2`);
9613	_mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9614	}
9615
9616	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9617	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9618	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9619	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9620	/// the source sign.
9621	///
9622	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9623	///
9624	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9625	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9626	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9627	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9628	///
9629	/// The sign is determined by sc which can take the following values:
9630	///
9631	/// _MM_MANT_SIGN_src // sign = sign(src)
9632	/// _MM_MANT_SIGN_zero // sign = 0
9633	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9634	///
9635	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9636	#[inline]
9637	#[target_feature(enable = "avx512fp16")]
9638	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9639	#[rustc_legacy_const_generics(`3`, `4`)]
9640	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9641	pub fn _mm_maskz_getmant_sh<
9642	const NORM: _MM_MANTISSA_NORM_ENUM,
9643	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9644	>(
9645	k: __mmask8,
9646	a: __m128h,
9647	b: __m128h,
9648	) -> __m128h {
9649	static_assert_uimm_bits!(NORM, `4`);
9650	static_assert_uimm_bits!(SIGN, `2`);
9651	_mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_setzero_ph(), k, a, b)
9652	}
9653
9654	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9655	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9656	/// elements of dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9657	/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9658	/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9659	///
9660	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9661	///
9662	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9663	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9664	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9665	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9666	///
9667	/// The sign is determined by sc which can take the following values:
9668	///
9669	/// _MM_MANT_SIGN_src // sign = sign(src)
9670	/// _MM_MANT_SIGN_zero // sign = 0
9671	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9672	///
9673	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9674	///
9675	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9676	#[inline]
9677	#[target_feature(enable = "avx512fp16")]
9678	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9679	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
9680	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9681	pub fn _mm_getmant_round_sh<
9682	const NORM: _MM_MANTISSA_NORM_ENUM,
9683	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9684	const SAE: i32,
9685	>(
9686	a: __m128h,
9687	b: __m128h,
9688	) -> __m128h {
9689	static_assert_uimm_bits!(NORM, `4`);
9690	static_assert_uimm_bits!(SIGN, `2`);
9691	static_assert_sae!(SAE);
9692	_mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
9693	}
9694
9695	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9696	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9697	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9698	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9699	/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9700	///
9701	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9702	///
9703	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9704	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9705	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9706	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9707	///
9708	/// The sign is determined by sc which can take the following values:
9709	///
9710	/// _MM_MANT_SIGN_src // sign = sign(src)
9711	/// _MM_MANT_SIGN_zero // sign = 0
9712	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9713	///
9714	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9715	///
9716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
9717	#[inline]
9718	#[target_feature(enable = "avx512fp16")]
9719	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9720	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
9721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9722	pub fn _mm_mask_getmant_round_sh<
9723	const NORM: _MM_MANTISSA_NORM_ENUM,
9724	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9725	const SAE: i32,
9726	>(
9727	src: __m128h,
9728	k: __mmask8,
9729	a: __m128h,
9730	b: __m128h,
9731	) -> __m128h {
9732	unsafe {
9733	static_assert_uimm_bits!(NORM, `4`);
9734	static_assert_uimm_bits!(SIGN, `2`);
9735	static_assert_sae!(SAE);
9736	vgetmantsh(a, b, (SIGN << `2`) \| NORM, src, k, SAE)
9737	}
9738	}
9739
9740	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9741	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9742	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9743	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9744	/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9745	///
9746	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9747	///
9748	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9749	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9750	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9751	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9752	///
9753	/// The sign is determined by sc which can take the following values:
9754	///
9755	/// _MM_MANT_SIGN_src // sign = sign(src)
9756	/// _MM_MANT_SIGN_zero // sign = 0
9757	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9758	///
9759	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9760	///
9761	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
9762	#[inline]
9763	#[target_feature(enable = "avx512fp16")]
9764	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9765	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
9766	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9767	pub fn _mm_maskz_getmant_round_sh<
9768	const NORM: _MM_MANTISSA_NORM_ENUM,
9769	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9770	const SAE: i32,
9771	>(
9772	k: __mmask8,
9773	a: __m128h,
9774	b: __m128h,
9775	) -> __m128h {
9776	static_assert_uimm_bits!(NORM, `4`);
9777	static_assert_uimm_bits!(SIGN, `2`);
9778	static_assert_sae!(SAE);
9779	_mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_setzero_ph(), k, a, b)
9780	}
9781
9782	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9783	/// specified by imm8, and store the results in dst.
9784	///
9785	/// Rounding is done according to the imm8 parameter, which can be one of:
9786	///
9787	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9788	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9789	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9790	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9791	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9792	///
9793	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
9794	#[inline]
9795	#[target_feature(enable = "avx512fp16,avx512vl")]
9796	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9797	#[rustc_legacy_const_generics(`1`)]
9798	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9799	pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
9800	static_assert_uimm_bits!(IMM8, `8`);
9801	_mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a)
9802	}
9803
9804	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9805	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9806	/// the corresponding mask bit is not set).
9807	///
9808	/// Rounding is done according to the imm8 parameter, which can be one of:
9809	///
9810	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9811	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9812	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9813	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9814	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9815	///
9816	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
9817	#[inline]
9818	#[target_feature(enable = "avx512fp16,avx512vl")]
9819	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9820	#[rustc_legacy_const_generics(`3`)]
9821	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9822	pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9823	unsafe {
9824	static_assert_uimm_bits!(IMM8, `8`);
9825	vrndscaleph_128(a, IMM8, src, k)
9826	}
9827	}
9828
9829	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9830	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9831	/// mask bit is not set).
9832	///
9833	/// Rounding is done according to the imm8 parameter, which can be one of:
9834	///
9835	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9836	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9837	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9838	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9839	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9840	///
9841	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
9842	#[inline]
9843	#[target_feature(enable = "avx512fp16,avx512vl")]
9844	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9845	#[rustc_legacy_const_generics(`2`)]
9846	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9847	pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
9848	static_assert_uimm_bits!(IMM8, `8`);
9849	_mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
9850	}
9851
9852	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9853	/// specified by imm8, and store the results in dst.
9854	///
9855	/// Rounding is done according to the imm8 parameter, which can be one of:
9856	///
9857	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9858	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9859	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9860	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9861	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9862	///
9863	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
9864	#[inline]
9865	#[target_feature(enable = "avx512fp16,avx512vl")]
9866	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9867	#[rustc_legacy_const_generics(`1`)]
9868	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9869	pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
9870	static_assert_uimm_bits!(IMM8, `8`);
9871	_mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:`0xffff`, a)
9872	}
9873
9874	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9875	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9876	/// the corresponding mask bit is not set).
9877	///
9878	/// Rounding is done according to the imm8 parameter, which can be one of:
9879	///
9880	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9881	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9882	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9883	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9884	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9885	///
9886	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
9887	#[inline]
9888	#[target_feature(enable = "avx512fp16,avx512vl")]
9889	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9890	#[rustc_legacy_const_generics(`3`)]
9891	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9892	pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
9893	src: __m256h,
9894	k: __mmask16,
9895	a: __m256h,
9896	) -> __m256h {
9897	unsafe {
9898	static_assert_uimm_bits!(IMM8, `8`);
9899	vrndscaleph_256(a, IMM8, src, k)
9900	}
9901	}
9902
9903	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9904	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9905	/// mask bit is not set).
9906	///
9907	/// Rounding is done according to the imm8 parameter, which can be one of:
9908	///
9909	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9910	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9911	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9912	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9913	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9914	///
9915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
9916	#[inline]
9917	#[target_feature(enable = "avx512fp16,avx512vl")]
9918	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9919	#[rustc_legacy_const_generics(`2`)]
9920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9921	pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
9922	static_assert_uimm_bits!(IMM8, `8`);
9923	_mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
9924	}
9925
9926	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9927	/// specified by imm8, and store the results in dst.
9928	///
9929	/// Rounding is done according to the imm8 parameter, which can be one of:
9930	///
9931	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9932	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9933	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9934	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9935	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9936	///
9937	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
9938	#[inline]
9939	#[target_feature(enable = "avx512fp16")]
9940	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9941	#[rustc_legacy_const_generics(`1`)]
9942	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9943	pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
9944	static_assert_uimm_bits!(IMM8, `8`);
9945	_mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9946	}
9947
9948	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9949	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9950	/// the corresponding mask bit is not set).
9951	///
9952	/// Rounding is done according to the imm8 parameter, which can be one of:
9953	///
9954	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9955	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9956	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9957	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9958	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9959	///
9960	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
9961	#[inline]
9962	#[target_feature(enable = "avx512fp16")]
9963	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9964	#[rustc_legacy_const_generics(`3`)]
9965	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9966	pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
9967	src: __m512h,
9968	k: __mmask32,
9969	a: __m512h,
9970	) -> __m512h {
9971	static_assert_uimm_bits!(IMM8, `8`);
9972	_mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9973	}
9974
9975	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9976	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9977	/// mask bit is not set).
9978	///
9979	/// Rounding is done according to the imm8 parameter, which can be one of:
9980	///
9981	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9982	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9983	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9984	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9985	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9986	///
9987	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
9988	#[inline]
9989	#[target_feature(enable = "avx512fp16")]
9990	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9991	#[rustc_legacy_const_generics(`2`)]
9992	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9993	pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
9994	static_assert_uimm_bits!(IMM8, `8`);
9995	_mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
9996	}
9997
9998	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9999	/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10000	/// in the sae parameter
10001	///
10002	/// Rounding is done according to the imm8 parameter, which can be one of:
10003	///
10004	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10005	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10006	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10007	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10008	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10009	///
10010	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10011	#[inline]
10012	#[target_feature(enable = "avx512fp16")]
10013	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10014	#[rustc_legacy_const_generics(`1`, `2`)]
10015	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10016	pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10017	static_assert_uimm_bits!(IMM8, `8`);
10018	static_assert_sae!(SAE);
10019	_mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10020	}
10021
10022	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10023	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10024	/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10025	/// in the sae parameter
10026	///
10027	/// Rounding is done according to the imm8 parameter, which can be one of:
10028	///
10029	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10030	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10031	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10032	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10033	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10034	///
10035	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10036	#[inline]
10037	#[target_feature(enable = "avx512fp16")]
10038	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10039	#[rustc_legacy_const_generics(`3`, `4`)]
10040	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10041	pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10042	src: __m512h,
10043	k: __mmask32,
10044	a: __m512h,
10045	) -> __m512h {
10046	unsafe {
10047	static_assert_uimm_bits!(IMM8, `8`);
10048	static_assert_sae!(SAE);
10049	vrndscaleph_512(a, IMM8, src, k, SAE)
10050	}
10051	}
10052
10053	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10054	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10055	/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10056	///
10057	/// Rounding is done according to the imm8 parameter, which can be one of:
10058	///
10059	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10060	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10061	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10062	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10063	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10064	///
10065	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10066	#[inline]
10067	#[target_feature(enable = "avx512fp16")]
10068	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10069	#[rustc_legacy_const_generics(`2`, `3`)]
10070	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10071	pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10072	k: __mmask32,
10073	a: __m512h,
10074	) -> __m512h {
10075	static_assert_uimm_bits!(IMM8, `8`);
10076	static_assert_sae!(SAE);
10077	_mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10078	}
10079
10080	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10081	/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10082	/// from a to the upper elements of dst.
10083	///
10084	/// Rounding is done according to the imm8 parameter, which can be one of:
10085	///
10086	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10087	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10088	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10089	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10090	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10091	///
10092	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10093	#[inline]
10094	#[target_feature(enable = "avx512fp16")]
10095	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10096	#[rustc_legacy_const_generics(`2`)]
10097	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10098	pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10099	static_assert_uimm_bits!(IMM8, `8`);
10100	_mm_mask_roundscale_sh::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a, b)
10101	}
10102
10103	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10104	/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10105	/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10106	///
10107	/// Rounding is done according to the imm8 parameter, which can be one of:
10108	///
10109	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10110	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10111	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10112	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10113	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10114	///
10115	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10116	#[inline]
10117	#[target_feature(enable = "avx512fp16")]
10118	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10119	#[rustc_legacy_const_generics(`4`)]
10120	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10121	pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10122	src: __m128h,
10123	k: __mmask8,
10124	a: __m128h,
10125	b: __m128h,
10126	) -> __m128h {
10127	static_assert_uimm_bits!(IMM8, `8`);
10128	_mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10129	}
10130
10131	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10132	/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10133	/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10134	///
10135	/// Rounding is done according to the imm8 parameter, which can be one of:
10136	///
10137	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10138	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10139	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10140	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10141	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10142	///
10143	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10144	#[inline]
10145	#[target_feature(enable = "avx512fp16")]
10146	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10147	#[rustc_legacy_const_generics(`3`)]
10148	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10149	pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10150	static_assert_uimm_bits!(IMM8, `8`);
10151	_mm_mask_roundscale_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b)
10152	}
10153
10154	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10155	/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10156	/// from a to the upper elements of dst.
10157	///
10158	/// Rounding is done according to the imm8 parameter, which can be one of:
10159	///
10160	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10161	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10162	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10163	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10164	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10165	///
10166	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10167	///
10168	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10169	#[inline]
10170	#[target_feature(enable = "avx512fp16")]
10171	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10172	#[rustc_legacy_const_generics(`2`, `3`)]
10173	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10174	pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10175	static_assert_uimm_bits!(IMM8, `8`);
10176	static_assert_sae!(SAE);
10177	_mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
10178	}
10179
10180	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10181	/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10182	/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10183	///
10184	/// Rounding is done according to the imm8 parameter, which can be one of:
10185	///
10186	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10187	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10188	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10189	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10190	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10191	///
10192	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10193	///
10194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10195	#[inline]
10196	#[target_feature(enable = "avx512fp16")]
10197	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10198	#[rustc_legacy_const_generics(`4`, `5`)]
10199	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10200	pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10201	src: __m128h,
10202	k: __mmask8,
10203	a: __m128h,
10204	b: __m128h,
10205	) -> __m128h {
10206	unsafe {
10207	static_assert_uimm_bits!(IMM8, `8`);
10208	static_assert_sae!(SAE);
10209	vrndscalesh(a, b, src, k, IMM8, SAE)
10210	}
10211	}
10212
10213	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10214	/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10215	/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10216	///
10217	/// Rounding is done according to the imm8 parameter, which can be one of:
10218	///
10219	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10220	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10221	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10222	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10223	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10224	///
10225	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10226	///
10227	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10228	#[inline]
10229	#[target_feature(enable = "avx512fp16")]
10230	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10231	#[rustc_legacy_const_generics(`3`, `4`)]
10232	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10233	pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10234	k: __mmask8,
10235	a: __m128h,
10236	b: __m128h,
10237	) -> __m128h {
10238	static_assert_uimm_bits!(IMM8, `8`);
10239	static_assert_sae!(SAE);
10240	_mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b)
10241	}
10242
10243	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10244	/// the results in dst.
10245	///
10246	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10247	#[inline]
10248	#[target_feature(enable = "avx512fp16,avx512vl")]
10249	#[cfg_attr(test, assert_instr(vscalefph))]
10250	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10251	pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10252	_mm_mask_scalef_ph(src:_mm_undefined_ph(), k:`0xff`, a, b)
10253	}
10254
10255	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10256	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10257	///
10258	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10259	#[inline]
10260	#[target_feature(enable = "avx512fp16,avx512vl")]
10261	#[cfg_attr(test, assert_instr(vscalefph))]
10262	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10263	pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10264	unsafe { vscalefph_128(a, b, src, k) }
10265	}
10266
10267	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10268	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10269	///
10270	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10271	#[inline]
10272	#[target_feature(enable = "avx512fp16,avx512vl")]
10273	#[cfg_attr(test, assert_instr(vscalefph))]
10274	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10275	pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10276	_mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10277	}
10278
10279	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10280	/// the results in dst.
10281	///
10282	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10283	#[inline]
10284	#[target_feature(enable = "avx512fp16,avx512vl")]
10285	#[cfg_attr(test, assert_instr(vscalefph))]
10286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10287	pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10288	_mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:`0xffff`, a, b)
10289	}
10290
10291	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10292	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10293	///
10294	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10295	#[inline]
10296	#[target_feature(enable = "avx512fp16,avx512vl")]
10297	#[cfg_attr(test, assert_instr(vscalefph))]
10298	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10299	pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10300	unsafe { vscalefph_256(a, b, src, k) }
10301	}
10302
10303	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10304	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10305	///
10306	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10307	#[inline]
10308	#[target_feature(enable = "avx512fp16,avx512vl")]
10309	#[cfg_attr(test, assert_instr(vscalefph))]
10310	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10311	pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10312	_mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10313	}
10314
10315	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10316	/// the results in dst.
10317	///
10318	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10319	#[inline]
10320	#[target_feature(enable = "avx512fp16")]
10321	#[cfg_attr(test, assert_instr(vscalefph))]
10322	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10323	pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10324	_mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a, b)
10325	}
10326
10327	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10328	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10329	///
10330	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10331	#[inline]
10332	#[target_feature(enable = "avx512fp16")]
10333	#[cfg_attr(test, assert_instr(vscalefph))]
10334	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10335	pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10336	_mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10337	}
10338
10339	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10340	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10341	///
10342	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10343	#[inline]
10344	#[target_feature(enable = "avx512fp16")]
10345	#[cfg_attr(test, assert_instr(vscalefph))]
10346	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10347	pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10348	_mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10349	}
10350
10351	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10352	/// the results in dst.
10353	///
10354	/// Rounding is done according to the rounding parameter, which can be one of:
10355	///
10356	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10357	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10358	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10359	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10360	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10361	///
10362	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10363	#[inline]
10364	#[target_feature(enable = "avx512fp16")]
10365	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10366	#[rustc_legacy_const_generics(`2`)]
10367	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10368	pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10369	static_assert_rounding!(ROUNDING);
10370	_mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffffffff`, a, b)
10371	}
10372
10373	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10374	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10375	///
10376	/// Rounding is done according to the rounding parameter, which can be one of:
10377	///
10378	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10379	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10380	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10381	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10382	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10383	///
10384	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10385	#[inline]
10386	#[target_feature(enable = "avx512fp16")]
10387	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10388	#[rustc_legacy_const_generics(`4`)]
10389	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10390	pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10391	src: __m512h,
10392	k: __mmask32,
10393	a: __m512h,
10394	b: __m512h,
10395	) -> __m512h {
10396	unsafe {
10397	static_assert_rounding!(ROUNDING);
10398	vscalefph_512(a, b, src, k, ROUNDING)
10399	}
10400	}
10401
10402	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10403	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10404	///
10405	/// Rounding is done according to the rounding parameter, which can be one of:
10406	///
10407	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10408	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10409	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10410	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10411	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10412	///
10413	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10414	#[inline]
10415	#[target_feature(enable = "avx512fp16")]
10416	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10417	#[rustc_legacy_const_generics(`3`)]
10418	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10419	pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10420	k: __mmask32,
10421	a: __m512h,
10422	b: __m512h,
10423	) -> __m512h {
10424	static_assert_rounding!(ROUNDING);
10425	_mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10426	}
10427
10428	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10429	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10430	/// elements of dst.
10431	///
10432	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10433	#[inline]
10434	#[target_feature(enable = "avx512fp16")]
10435	#[cfg_attr(test, assert_instr(vscalefsh))]
10436	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10437	pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10438	_mm_mask_scalef_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
10439	}
10440
10441	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10442	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10443	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10444	///
10445	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10446	#[inline]
10447	#[target_feature(enable = "avx512fp16")]
10448	#[cfg_attr(test, assert_instr(vscalefsh))]
10449	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10450	pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10451	_mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10452	}
10453
10454	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10455	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10456	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10457	///
10458	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10459	#[inline]
10460	#[target_feature(enable = "avx512fp16")]
10461	#[cfg_attr(test, assert_instr(vscalefsh))]
10462	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10463	pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10464	_mm_mask_scalef_sh(src:_mm_setzero_ph(), k, a, b)
10465	}
10466
10467	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10468	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10469	/// elements of dst.
10470	///
10471	/// Rounding is done according to the rounding parameter, which can be one of:
10472	///
10473	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10474	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10475	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10476	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10477	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10478	///
10479	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10480	#[inline]
10481	#[target_feature(enable = "avx512fp16")]
10482	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10483	#[rustc_legacy_const_generics(`2`)]
10484	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10485	pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10486	static_assert_rounding!(ROUNDING);
10487	_mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
10488	}
10489
10490	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10491	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10492	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10493	///
10494	/// Rounding is done according to the rounding parameter, which can be one of:
10495	///
10496	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10497	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10498	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10499	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10500	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10501	///
10502	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10503	#[inline]
10504	#[target_feature(enable = "avx512fp16")]
10505	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10506	#[rustc_legacy_const_generics(`4`)]
10507	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10508	pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10509	src: __m128h,
10510	k: __mmask8,
10511	a: __m128h,
10512	b: __m128h,
10513	) -> __m128h {
10514	unsafe {
10515	static_assert_rounding!(ROUNDING);
10516	vscalefsh(a, b, src, k, ROUNDING)
10517	}
10518	}
10519
10520	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10521	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10522	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10523	///
10524	/// Rounding is done according to the rounding parameter, which can be one of:
10525	///
10526	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10527	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10528	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10529	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10530	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10531	///
10532	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10533	#[inline]
10534	#[target_feature(enable = "avx512fp16")]
10535	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10536	#[rustc_legacy_const_generics(`3`)]
10537	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10538	pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10539	k: __mmask8,
10540	a: __m128h,
10541	b: __m128h,
10542	) -> __m128h {
10543	static_assert_rounding!(ROUNDING);
10544	_mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
10545	}
10546
10547	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10548	/// number of bits specified by imm8, and store the results in dst.
10549	///
10550	/// Rounding is done according to the imm8 parameter, which can be one of:
10551	///
10552	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10553	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10554	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10555	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10556	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10557	///
10558	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10559	#[inline]
10560	#[target_feature(enable = "avx512fp16,avx512vl")]
10561	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10562	#[rustc_legacy_const_generics(`1`)]
10563	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10564	pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10565	static_assert_uimm_bits!(IMM8, `8`);
10566	_mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a)
10567	}
10568
10569	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10570	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10571	/// from src when the corresponding mask bit is not set).
10572	///
10573	/// Rounding is done according to the imm8 parameter, which can be one of:
10574	///
10575	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10576	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10577	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10578	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10579	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10580	///
10581	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10582	#[inline]
10583	#[target_feature(enable = "avx512fp16,avx512vl")]
10584	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10585	#[rustc_legacy_const_generics(`3`)]
10586	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10587	pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10588	unsafe {
10589	static_assert_uimm_bits!(IMM8, `8`);
10590	vreduceph_128(a, IMM8, src, k)
10591	}
10592	}
10593
10594	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10595	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10596	/// out when the corresponding mask bit is not set).
10597	///
10598	/// Rounding is done according to the imm8 parameter, which can be one of:
10599	///
10600	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10601	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10602	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10603	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10604	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10605	///
10606	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10607	#[inline]
10608	#[target_feature(enable = "avx512fp16,avx512vl")]
10609	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10610	#[rustc_legacy_const_generics(`2`)]
10611	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10612	pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10613	static_assert_uimm_bits!(IMM8, `8`);
10614	_mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10615	}
10616
10617	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10618	/// number of bits specified by imm8, and store the results in dst.
10619	///
10620	/// Rounding is done according to the imm8 parameter, which can be one of:
10621	///
10622	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10623	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10624	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10625	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10626	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10627	///
10628	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10629	#[inline]
10630	#[target_feature(enable = "avx512fp16,avx512vl")]
10631	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10632	#[rustc_legacy_const_generics(`1`)]
10633	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10634	pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10635	static_assert_uimm_bits!(IMM8, `8`);
10636	_mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:`0xffff`, a)
10637	}
10638
10639	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10640	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10641	/// from src when the corresponding mask bit is not set).
10642	///
10643	/// Rounding is done according to the imm8 parameter, which can be one of:
10644	///
10645	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10646	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10647	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10648	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10649	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10650	///
10651	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10652	#[inline]
10653	#[target_feature(enable = "avx512fp16,avx512vl")]
10654	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10655	#[rustc_legacy_const_generics(`3`)]
10656	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10657	pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10658	unsafe {
10659	static_assert_uimm_bits!(IMM8, `8`);
10660	vreduceph_256(a, IMM8, src, k)
10661	}
10662	}
10663
10664	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10665	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10666	/// out when the corresponding mask bit is not set).
10667	///
10668	/// Rounding is done according to the imm8 parameter, which can be one of:
10669	///
10670	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10671	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10672	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10673	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10674	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10675	///
10676	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10677	#[inline]
10678	#[target_feature(enable = "avx512fp16,avx512vl")]
10679	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10680	#[rustc_legacy_const_generics(`2`)]
10681	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10682	pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10683	static_assert_uimm_bits!(IMM8, `8`);
10684	_mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10685	}
10686
10687	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10688	/// number of bits specified by imm8, and store the results in dst.
10689	///
10690	/// Rounding is done according to the imm8 parameter, which can be one of:
10691	///
10692	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10693	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10694	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10695	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10696	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10697	///
10698	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
10699	#[inline]
10700	#[target_feature(enable = "avx512fp16")]
10701	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10702	#[rustc_legacy_const_generics(`1`)]
10703	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10704	pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10705	static_assert_uimm_bits!(IMM8, `8`);
10706	_mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10707	}
10708
10709	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10710	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10711	/// from src when the corresponding mask bit is not set).
10712	///
10713	/// Rounding is done according to the imm8 parameter, which can be one of:
10714	///
10715	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10716	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10717	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10718	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10719	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10720	///
10721	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
10722	#[inline]
10723	#[target_feature(enable = "avx512fp16")]
10724	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10725	#[rustc_legacy_const_generics(`3`)]
10726	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10727	pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
10728	static_assert_uimm_bits!(IMM8, `8`);
10729	_mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10730	}
10731
10732	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10733	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10734	/// out when the corresponding mask bit is not set).
10735	///
10736	/// Rounding is done according to the imm8 parameter, which can be one of:
10737	///
10738	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10739	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10740	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10741	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10742	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10743	///
10744	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
10745	#[inline]
10746	#[target_feature(enable = "avx512fp16")]
10747	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10748	#[rustc_legacy_const_generics(`2`)]
10749	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10750	pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10751	static_assert_uimm_bits!(IMM8, `8`);
10752	_mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10753	}
10754
10755	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10756	/// number of bits specified by imm8, and store the results in dst.
10757	///
10758	/// Rounding is done according to the imm8 parameter, which can be one of:
10759	///
10760	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10761	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10762	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10763	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10764	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10765	///
10766	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10767	///
10768	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
10769	#[inline]
10770	#[target_feature(enable = "avx512fp16")]
10771	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10772	#[rustc_legacy_const_generics(`1`, `2`)]
10773	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10774	pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10775	static_assert_uimm_bits!(IMM8, `8`);
10776	static_assert_sae!(SAE);
10777	_mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10778	}
10779
10780	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10781	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10782	/// from src when the corresponding mask bit is not set).
10783	///
10784	/// Rounding is done according to the imm8 parameter, which can be one of:
10785	///
10786	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10787	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10788	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10789	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10790	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10791	///
10792	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10793	///
10794	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
10795	#[inline]
10796	#[target_feature(enable = "avx512fp16")]
10797	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10798	#[rustc_legacy_const_generics(`3`, `4`)]
10799	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10800	pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10801	src: __m512h,
10802	k: __mmask32,
10803	a: __m512h,
10804	) -> __m512h {
10805	unsafe {
10806	static_assert_uimm_bits!(IMM8, `8`);
10807	static_assert_sae!(SAE);
10808	vreduceph_512(a, IMM8, src, k, SAE)
10809	}
10810	}
10811
10812	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10813	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10814	/// out when the corresponding mask bit is not set).
10815	///
10816	/// Rounding is done according to the imm8 parameter, which can be one of:
10817	///
10818	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10819	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10820	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10821	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10822	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10823	///
10824	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10825	///
10826	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
10827	#[inline]
10828	#[target_feature(enable = "avx512fp16")]
10829	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10830	#[rustc_legacy_const_generics(`2`, `3`)]
10831	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10832	pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10833	k: __mmask32,
10834	a: __m512h,
10835	) -> __m512h {
10836	static_assert_uimm_bits!(IMM8, `8`);
10837	static_assert_sae!(SAE);
10838	_mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10839	}
10840
10841	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10842	/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
10843	/// upper 7 packed elements from a to the upper elements of dst.
10844	///
10845	/// Rounding is done according to the imm8 parameter, which can be one of:
10846	///
10847	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10848	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10849	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10850	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10851	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10852	///
10853	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
10854	#[inline]
10855	#[target_feature(enable = "avx512fp16")]
10856	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10857	#[rustc_legacy_const_generics(`2`)]
10858	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10859	pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10860	static_assert_uimm_bits!(IMM8, `8`);
10861	_mm_mask_reduce_sh::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a, b)
10862	}
10863
10864	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10865	/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10866	/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
10867	/// a to the upper elements of dst.
10868	///
10869	/// Rounding is done according to the imm8 parameter, which can be one of:
10870	///
10871	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10872	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10873	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10874	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10875	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10876	///
10877	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
10878	#[inline]
10879	#[target_feature(enable = "avx512fp16")]
10880	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10881	#[rustc_legacy_const_generics(`4`)]
10882	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10883	pub fn _mm_mask_reduce_sh<const IMM8: i32>(
10884	src: __m128h,
10885	k: __mmask8,
10886	a: __m128h,
10887	b: __m128h,
10888	) -> __m128h {
10889	static_assert_uimm_bits!(IMM8, `8`);
10890	_mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10891	}
10892
10893	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10894	/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10895	/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10896	/// to the upper elements of dst.
10897	///
10898	/// Rounding is done according to the imm8 parameter, which can be one of:
10899	///
10900	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10901	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10902	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10903	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10904	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10905	///
10906	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
10907	#[inline]
10908	#[target_feature(enable = "avx512fp16")]
10909	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10910	#[rustc_legacy_const_generics(`3`)]
10911	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10912	pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10913	static_assert_uimm_bits!(IMM8, `8`);
10914	_mm_mask_reduce_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b)
10915	}
10916
10917	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10918	/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
10919	/// 7 packed elements from a to the upper elements of dst.
10920	///
10921	/// Rounding is done according to the imm8 parameter, which can be one of:
10922	///
10923	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10924	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10925	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10926	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10927	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10928	///
10929	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10930	///
10931	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
10932	#[inline]
10933	#[target_feature(enable = "avx512fp16")]
10934	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
10935	#[rustc_legacy_const_generics(`2`, `3`)]
10936	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10937	pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10938	static_assert_uimm_bits!(IMM8, `8`);
10939	static_assert_sae!(SAE);
10940	_mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
10941	}
10942
10943	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10944	/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10945	/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
10946	/// to the upper elements of dst.
10947	///
10948	/// Rounding is done according to the imm8 parameter, which can be one of:
10949	///
10950	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10951	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10952	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10953	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10954	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10955	///
10956	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10957	///
10958	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
10959	#[inline]
10960	#[target_feature(enable = "avx512fp16")]
10961	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
10962	#[rustc_legacy_const_generics(`4`, `5`)]
10963	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10964	pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10965	src: __m128h,
10966	k: __mmask8,
10967	a: __m128h,
10968	b: __m128h,
10969	) -> __m128h {
10970	unsafe {
10971	static_assert_uimm_bits!(IMM8, `8`);
10972	static_assert_sae!(SAE);
10973	vreducesh(a, b, src, k, IMM8, SAE)
10974	}
10975	}
10976
10977	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10978	/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10979	/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10980	/// to the upper elements of dst.
10981	///
10982	/// Rounding is done according to the imm8 parameter, which can be one of:
10983	///
10984	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10985	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10986	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10987	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10988	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10989	///
10990	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10991	///
10992	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
10993	#[inline]
10994	#[target_feature(enable = "avx512fp16")]
10995	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
10996	#[rustc_legacy_const_generics(`3`, `4`)]
10997	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10998	pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10999	k: __mmask8,
11000	a: __m128h,
11001	b: __m128h,
11002	) -> __m128h {
11003	static_assert_uimm_bits!(IMM8, `8`);
11004	static_assert_sae!(SAE);
11005	_mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b)
11006	}
11007
11008	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11009	/// sum of all elements in a.
11010	///
11011	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11012	#[inline]
11013	#[target_feature(enable = "avx512fp16,avx512vl")]
11014	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11015	pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11016	unsafe {
11017	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11018	let a: __m128h = _mm_add_ph(a, b);
11019	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11020	let a: __m128h = _mm_add_ph(a, b);
11021	simd_extract::<_, f16>(x:a, idx:`0`) + simd_extract::<_, f16>(x:a, idx:`1`)
11022	}
11023	}
11024
11025	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11026	/// sum of all elements in a.
11027	///
11028	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11029	#[inline]
11030	#[target_feature(enable = "avx512fp16,avx512vl")]
11031	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11032	pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11033	unsafe {
11034	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11035	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11036	_mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11037	}
11038	}
11039
11040	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11041	/// sum of all elements in a.
11042	///
11043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11044	#[inline]
11045	#[target_feature(enable = "avx512fp16")]
11046	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11047	pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11048	unsafe {
11049	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11050	let q: __m256h = simd_shuffle!(
11051	a,
11052	a,
11053	[
11054	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11055	]
11056	);
11057	_mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11058	}
11059	}
11060
11061	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11062	/// the product of all elements in a.
11063	///
11064	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11065	#[inline]
11066	#[target_feature(enable = "avx512fp16,avx512vl")]
11067	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11068	pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11069	unsafe {
11070	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11071	let a: __m128h = _mm_mul_ph(a, b);
11072	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11073	let a: __m128h = _mm_mul_ph(a, b);
11074	simd_extract::<_, f16>(x:a, idx:`0`) * simd_extract::<_, f16>(x:a, idx:`1`)
11075	}
11076	}
11077
11078	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11079	/// the product of all elements in a.
11080	///
11081	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11082	#[inline]
11083	#[target_feature(enable = "avx512fp16,avx512vl")]
11084	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11085	pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11086	unsafe {
11087	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11088	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11089	_mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11090	}
11091	}
11092
11093	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11094	/// the product of all elements in a.
11095	///
11096	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11097	#[inline]
11098	#[target_feature(enable = "avx512fp16")]
11099	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11100	pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11101	unsafe {
11102	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11103	let q: __m256h = simd_shuffle!(
11104	a,
11105	a,
11106	[
11107	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11108	]
11109	);
11110	_mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11111	}
11112	}
11113
11114	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11115	/// minimum of all elements in a.
11116	///
11117	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11118	#[inline]
11119	#[target_feature(enable = "avx512fp16,avx512vl")]
11120	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11121	pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11122	unsafe {
11123	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11124	let a: __m128h = _mm_min_ph(a, b);
11125	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11126	let a: __m128h = _mm_min_ph(a, b);
11127	let b: __m128h = simd_shuffle!(a, a, [`1`, `0`, `2`, `3`, `4`, `5`, `6`, `7`]);
11128	simd_extract!(_mm_min_sh(a, b), `0`)
11129	}
11130	}
11131
11132	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11133	/// minimum of all elements in a.
11134	///
11135	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11136	#[inline]
11137	#[target_feature(enable = "avx512fp16,avx512vl")]
11138	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11139	pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11140	unsafe {
11141	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11142	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11143	_mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11144	}
11145	}
11146
11147	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11148	/// minimum of all elements in a.
11149	///
11150	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11151	#[inline]
11152	#[target_feature(enable = "avx512fp16")]
11153	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11154	pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11155	unsafe {
11156	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11157	let q: __m256h = simd_shuffle!(
11158	a,
11159	a,
11160	[
11161	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11162	]
11163	);
11164	_mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11165	}
11166	}
11167
11168	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11169	/// maximum of all elements in a.
11170	///
11171	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11172	#[inline]
11173	#[target_feature(enable = "avx512fp16,avx512vl")]
11174	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11175	pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11176	unsafe {
11177	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11178	let a: __m128h = _mm_max_ph(a, b);
11179	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11180	let a: __m128h = _mm_max_ph(a, b);
11181	let b: __m128h = simd_shuffle!(a, a, [`1`, `0`, `2`, `3`, `4`, `5`, `6`, `7`]);
11182	simd_extract!(_mm_max_sh(a, b), `0`)
11183	}
11184	}
11185
11186	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11187	/// maximum of all elements in a.
11188	///
11189	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11190	#[inline]
11191	#[target_feature(enable = "avx512fp16,avx512vl")]
11192	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11193	pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11194	unsafe {
11195	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11196	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11197	_mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11198	}
11199	}
11200
11201	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11202	/// maximum of all elements in a.
11203	///
11204	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11205	#[inline]
11206	#[target_feature(enable = "avx512fp16")]
11207	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11208	pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11209	unsafe {
11210	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11211	let q: __m256h = simd_shuffle!(
11212	a,
11213	a,
11214	[
11215	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11216	]
11217	);
11218	_mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11219	}
11220	}
11221
11222	macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11223	($mask_type: ty, $reg: ident, $a: expr) => {{
11224	let dst: $mask_type;
11225	asm!(
11226	"vfpclassph {k}, {src}, {imm8}",
11227	k = lateout(kreg) dst,
11228	src = in($reg) $a,
11229	imm8 = const IMM8,
11230	options(pure, nomem, nostack)
11231	);
11232	dst
11233	}};
11234	($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11235	let dst: $mask_type;
11236	asm!(
11237	"vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11238	k = lateout(kreg) dst,
11239	mask = in(kreg) $mask,
11240	src = in($reg) $a,
11241	imm8 = const IMM8,
11242	options(pure, nomem, nostack)
11243	);
11244	dst
11245	}};
11246	}
11247
11248	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11249	/// by imm8, and store the results in mask vector k.
11250	/// imm can be a combination of:
11251	///
11252	/// 0x01 // QNaN
11253	/// 0x02 // Positive Zero
11254	/// 0x04 // Negative Zero
11255	/// 0x08 // Positive Infinity
11256	/// 0x10 // Negative Infinity
11257	/// 0x20 // Denormal
11258	/// 0x40 // Negative
11259	/// 0x80 // SNaN
11260	///
11261	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11262	#[inline]
11263	#[target_feature(enable = "avx512fp16,avx512vl")]
11264	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11265	#[rustc_legacy_const_generics(`1`)]
11266	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11267	pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11268	unsafe {
11269	static_assert_uimm_bits!(IMM8, `8`);
11270	fpclass_asm!(__mmask8, xmm_reg, a)
11271	}
11272	}
11273
11274	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11275	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11276	/// corresponding mask bit is not set).
11277	/// imm can be a combination of:
11278	///
11279	/// 0x01 // QNaN
11280	/// 0x02 // Positive Zero
11281	/// 0x04 // Negative Zero
11282	/// 0x08 // Positive Infinity
11283	/// 0x10 // Negative Infinity
11284	/// 0x20 // Denormal
11285	/// 0x40 // Negative
11286	/// 0x80 // SNaN
11287	///
11288	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11289	#[inline]
11290	#[target_feature(enable = "avx512fp16,avx512vl")]
11291	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11292	#[rustc_legacy_const_generics(`2`)]
11293	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11294	pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11295	unsafe {
11296	static_assert_uimm_bits!(IMM8, `8`);
11297	fpclass_asm!(__mmask8, k1, xmm_reg, a)
11298	}
11299	}
11300
11301	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11302	/// by imm8, and store the results in mask vector k.
11303	/// imm can be a combination of:
11304	///
11305	/// 0x01 // QNaN
11306	/// 0x02 // Positive Zero
11307	/// 0x04 // Negative Zero
11308	/// 0x08 // Positive Infinity
11309	/// 0x10 // Negative Infinity
11310	/// 0x20 // Denormal
11311	/// 0x40 // Negative
11312	/// 0x80 // SNaN
11313	///
11314	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11315	#[inline]
11316	#[target_feature(enable = "avx512fp16,avx512vl")]
11317	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11318	#[rustc_legacy_const_generics(`1`)]
11319	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11320	pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11321	unsafe {
11322	static_assert_uimm_bits!(IMM8, `8`);
11323	fpclass_asm!(__mmask16, ymm_reg, a)
11324	}
11325	}
11326
11327	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11328	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11329	/// corresponding mask bit is not set).
11330	/// imm can be a combination of:
11331	///
11332	/// 0x01 // QNaN
11333	/// 0x02 // Positive Zero
11334	/// 0x04 // Negative Zero
11335	/// 0x08 // Positive Infinity
11336	/// 0x10 // Negative Infinity
11337	/// 0x20 // Denormal
11338	/// 0x40 // Negative
11339	/// 0x80 // SNaN
11340	///
11341	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11342	#[inline]
11343	#[target_feature(enable = "avx512fp16,avx512vl")]
11344	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11345	#[rustc_legacy_const_generics(`2`)]
11346	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11347	pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11348	unsafe {
11349	static_assert_uimm_bits!(IMM8, `8`);
11350	fpclass_asm!(__mmask16, k1, ymm_reg, a)
11351	}
11352	}
11353
11354	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11355	/// by imm8, and store the results in mask vector k.
11356	/// imm can be a combination of:
11357	///
11358	/// 0x01 // QNaN
11359	/// 0x02 // Positive Zero
11360	/// 0x04 // Negative Zero
11361	/// 0x08 // Positive Infinity
11362	/// 0x10 // Negative Infinity
11363	/// 0x20 // Denormal
11364	/// 0x40 // Negative
11365	/// 0x80 // SNaN
11366	///
11367	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11368	#[inline]
11369	#[target_feature(enable = "avx512fp16")]
11370	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11371	#[rustc_legacy_const_generics(`1`)]
11372	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11373	pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11374	unsafe {
11375	static_assert_uimm_bits!(IMM8, `8`);
11376	fpclass_asm!(__mmask32, zmm_reg, a)
11377	}
11378	}
11379
11380	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11381	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11382	/// corresponding mask bit is not set).
11383	/// imm can be a combination of:
11384	///
11385	/// 0x01 // QNaN
11386	/// 0x02 // Positive Zero
11387	/// 0x04 // Negative Zero
11388	/// 0x08 // Positive Infinity
11389	/// 0x10 // Negative Infinity
11390	/// 0x20 // Denormal
11391	/// 0x40 // Negative
11392	/// 0x80 // SNaN
11393	///
11394	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11395	#[inline]
11396	#[target_feature(enable = "avx512fp16")]
11397	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11398	#[rustc_legacy_const_generics(`2`)]
11399	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11400	pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11401	unsafe {
11402	static_assert_uimm_bits!(IMM8, `8`);
11403	fpclass_asm!(__mmask32, k1, zmm_reg, a)
11404	}
11405	}
11406
11407	/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11408	/// by imm8, and store the result in mask vector k.
11409	/// imm can be a combination of:
11410	///
11411	/// 0x01 // QNaN
11412	/// 0x02 // Positive Zero
11413	/// 0x04 // Negative Zero
11414	/// 0x08 // Positive Infinity
11415	/// 0x10 // Negative Infinity
11416	/// 0x20 // Denormal
11417	/// 0x40 // Negative
11418	/// 0x80 // SNaN
11419	///
11420	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11421	#[inline]
11422	#[target_feature(enable = "avx512fp16")]
11423	#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = `0`))]
11424	#[rustc_legacy_const_generics(`1`)]
11425	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11426	pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11427	_mm_mask_fpclass_sh_mask::<IMM8>(k1:`0xff`, a)
11428	}
11429
11430	/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11431	/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11432	/// corresponding mask bit is not set).
11433	/// imm can be a combination of:
11434	///
11435	/// 0x01 // QNaN
11436	/// 0x02 // Positive Zero
11437	/// 0x04 // Negative Zero
11438	/// 0x08 // Positive Infinity
11439	/// 0x10 // Negative Infinity
11440	/// 0x20 // Denormal
11441	/// 0x40 // Negative
11442	/// 0x80 // SNaN
11443	///
11444	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11445	#[inline]
11446	#[target_feature(enable = "avx512fp16")]
11447	#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = `0`))]
11448	#[rustc_legacy_const_generics(`2`)]
11449	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11450	pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11451	unsafe {
11452	static_assert_uimm_bits!(IMM8, `8`);
11453	vfpclasssh(a, IMM8, k:k1)
11454	}
11455	}
11456
11457	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11458	/// and store the results in dst.
11459	///
11460	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11461	#[inline]
11462	#[target_feature(enable = "avx512fp16,avx512vl")]
11463	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11464	pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11465	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11466	}
11467
11468	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11469	/// and store the results in dst.
11470	///
11471	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11472	#[inline]
11473	#[target_feature(enable = "avx512fp16,avx512vl")]
11474	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11475	pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11476	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11477	}
11478
11479	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11480	/// and store the results in dst.
11481	///
11482	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11483	#[inline]
11484	#[target_feature(enable = "avx512fp16")]
11485	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11486	pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11487	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11488	}
11489
11490	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11491	/// and index in idx, and store the results in dst.
11492	///
11493	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11494	#[inline]
11495	#[target_feature(enable = "avx512fp16,avx512vl")]
11496	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11497	pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11498	_mm_castsi128_ph(_mm_permutex2var_epi16(
11499	a:_mm_castph_si128(a),
11500	idx,
11501	b:_mm_castph_si128(b),
11502	))
11503	}
11504
11505	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11506	/// and index in idx, and store the results in dst.
11507	///
11508	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11509	#[inline]
11510	#[target_feature(enable = "avx512fp16,avx512vl")]
11511	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11512	pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11513	_mm256_castsi256_ph(_mm256_permutex2var_epi16(
11514	a:_mm256_castph_si256(a),
11515	idx,
11516	b:_mm256_castph_si256(b),
11517	))
11518	}
11519
11520	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11521	/// and index in idx, and store the results in dst.
11522	///
11523	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11524	#[inline]
11525	#[target_feature(enable = "avx512fp16")]
11526	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11527	pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11528	_mm512_castsi512_ph(_mm512_permutex2var_epi16(
11529	a:_mm512_castph_si512(a),
11530	idx,
11531	b:_mm512_castph_si512(b),
11532	))
11533	}
11534
11535	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11536	/// and store the results in dst.
11537	///
11538	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11539	#[inline]
11540	#[target_feature(enable = "avx512fp16,avx512vl")]
11541	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11542	pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11543	_mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11544	}
11545
11546	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11547	/// and store the results in dst.
11548	///
11549	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11550	#[inline]
11551	#[target_feature(enable = "avx512fp16,avx512vl")]
11552	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11553	pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11554	_mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11555	}
11556
11557	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11558	/// and store the results in dst.
11559	///
11560	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11561	#[inline]
11562	#[target_feature(enable = "avx512fp16")]
11563	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11564	pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11565	_mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11566	}
11567
11568	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11569	/// and store the results in dst.
11570	///
11571	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11572	#[inline]
11573	#[target_feature(enable = "avx512fp16,avx512vl")]
11574	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11575	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11576	pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11577	unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11578	}
11579
11580	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11581	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11582	/// mask bit is not set).
11583	///
11584	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11585	#[inline]
11586	#[target_feature(enable = "avx512fp16,avx512vl")]
11587	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11588	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11589	pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11590	unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11591	}
11592
11593	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11594	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11595	///
11596	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11597	#[inline]
11598	#[target_feature(enable = "avx512fp16,avx512vl")]
11599	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11600	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11601	pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11602	_mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11603	}
11604
11605	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11606	/// and store the results in dst.
11607	///
11608	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11609	#[inline]
11610	#[target_feature(enable = "avx512fp16,avx512vl")]
11611	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11612	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11613	pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11614	unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11615	}
11616
11617	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11618	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11619	/// mask bit is not set).
11620	///
11621	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11622	#[inline]
11623	#[target_feature(enable = "avx512fp16,avx512vl")]
11624	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11625	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11626	pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11627	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11628	}
11629
11630	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11631	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11632	///
11633	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11634	#[inline]
11635	#[target_feature(enable = "avx512fp16,avx512vl")]
11636	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11637	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11638	pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11639	_mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11640	}
11641
11642	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11643	/// and store the results in dst.
11644	///
11645	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11646	#[inline]
11647	#[target_feature(enable = "avx512fp16")]
11648	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11649	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11650	pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11651	unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11652	}
11653
11654	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11655	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11656	/// mask bit is not set).
11657	///
11658	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11659	#[inline]
11660	#[target_feature(enable = "avx512fp16")]
11661	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11662	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11663	pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11664	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11665	}
11666
11667	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11668	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11669	///
11670	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11671	#[inline]
11672	#[target_feature(enable = "avx512fp16")]
11673	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11675	pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11676	_mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11677	}
11678
11679	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11680	/// and store the results in dst.
11681	///
11682	/// Rounding is done according to the rounding parameter, which can be one of:
11683	///
11684	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11685	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11686	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11687	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11688	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11689	///
11690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
11691	#[inline]
11692	#[target_feature(enable = "avx512fp16")]
11693	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11694	#[rustc_legacy_const_generics(`1`)]
11695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11696	pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11697	unsafe {
11698	static_assert_rounding!(ROUNDING);
11699	vcvtw2ph_512(a.as_i16x32(), ROUNDING)
11700	}
11701	}
11702
11703	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11704	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11705	/// mask bit is not set).
11706	///
11707	/// Rounding is done according to the rounding parameter, which can be one of:
11708	///
11709	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11710	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11711	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11712	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11713	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11714	///
11715	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
11716	#[inline]
11717	#[target_feature(enable = "avx512fp16")]
11718	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11719	#[rustc_legacy_const_generics(`3`)]
11720	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11721	pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
11722	src: __m512h,
11723	k: __mmask32,
11724	a: __m512i,
11725	) -> __m512h {
11726	unsafe {
11727	static_assert_rounding!(ROUNDING);
11728	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
11729	}
11730	}
11731
11732	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11733	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11734	///
11735	/// Rounding is done according to the rounding parameter, which can be one of:
11736	///
11737	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11738	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11739	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11740	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11741	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11742	///
11743	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
11744	#[inline]
11745	#[target_feature(enable = "avx512fp16")]
11746	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11747	#[rustc_legacy_const_generics(`2`)]
11748	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11749	pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11750	static_assert_rounding!(ROUNDING);
11751	_mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11752	}
11753
11754	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11755	/// and store the results in dst.
11756	///
11757	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
11758	#[inline]
11759	#[target_feature(enable = "avx512fp16,avx512vl")]
11760	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11761	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11762	pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
11763	unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
11764	}
11765
11766	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11767	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11768	/// mask bit is not set).
11769	///
11770	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
11771	#[inline]
11772	#[target_feature(enable = "avx512fp16,avx512vl")]
11773	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11774	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11775	pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11776	unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
11777	}
11778
11779	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11780	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11781	///
11782	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
11783	#[inline]
11784	#[target_feature(enable = "avx512fp16,avx512vl")]
11785	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11786	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11787	pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
11788	_mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
11789	}
11790
11791	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11792	/// and store the results in dst.
11793	///
11794	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
11795	#[inline]
11796	#[target_feature(enable = "avx512fp16,avx512vl")]
11797	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11798	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11799	pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
11800	unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
11801	}
11802
11803	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11804	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11805	/// mask bit is not set).
11806	///
11807	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
11808	#[inline]
11809	#[target_feature(enable = "avx512fp16,avx512vl")]
11810	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11811	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11812	pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11813	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
11814	}
11815
11816	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11817	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818	///
11819	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
11820	#[inline]
11821	#[target_feature(enable = "avx512fp16,avx512vl")]
11822	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11823	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11824	pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
11825	_mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
11826	}
11827
11828	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11829	/// and store the results in dst.
11830	///
11831	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
11832	#[inline]
11833	#[target_feature(enable = "avx512fp16")]
11834	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11835	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11836	pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
11837	unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
11838	}
11839
11840	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11841	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11842	/// mask bit is not set).
11843	///
11844	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
11845	#[inline]
11846	#[target_feature(enable = "avx512fp16")]
11847	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11848	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11849	pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11850	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
11851	}
11852
11853	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11854	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11855	///
11856	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
11857	#[inline]
11858	#[target_feature(enable = "avx512fp16")]
11859	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11860	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11861	pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
11862	_mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
11863	}
11864
11865	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11866	/// and store the results in dst.
11867	///
11868	/// Rounding is done according to the rounding parameter, which can be one of:
11869	///
11870	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11871	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11872	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11873	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11874	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11875	///
11876	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
11877	#[inline]
11878	#[target_feature(enable = "avx512fp16")]
11879	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11880	#[rustc_legacy_const_generics(`1`)]
11881	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11882	pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11883	unsafe {
11884	static_assert_rounding!(ROUNDING);
11885	vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
11886	}
11887	}
11888
11889	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11890	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11891	/// mask bit is not set).
11892	///
11893	/// Rounding is done according to the rounding parameter, which can be one of:
11894	///
11895	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11896	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11897	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11898	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11899	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11900	///
11901	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
11902	#[inline]
11903	#[target_feature(enable = "avx512fp16")]
11904	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11905	#[rustc_legacy_const_generics(`3`)]
11906	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11907	pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
11908	src: __m512h,
11909	k: __mmask32,
11910	a: __m512i,
11911	) -> __m512h {
11912	unsafe {
11913	static_assert_rounding!(ROUNDING);
11914	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
11915	}
11916	}
11917
11918	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11919	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11920	///
11921	/// Rounding is done according to the rounding parameter, which can be one of:
11922	///
11923	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11924	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11925	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11926	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11927	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11928	///
11929	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
11930	#[inline]
11931	#[target_feature(enable = "avx512fp16")]
11932	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11933	#[rustc_legacy_const_generics(`2`)]
11934	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11935	pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11936	static_assert_rounding!(ROUNDING);
11937	_mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11938	}
11939
11940	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11941	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
11942	///
11943	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
11944	#[inline]
11945	#[target_feature(enable = "avx512fp16,avx512vl")]
11946	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11947	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11948	pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
11949	_mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:`0xff`, a)
11950	}
11951
11952	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11953	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11954	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
11955	///
11956	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
11957	#[inline]
11958	#[target_feature(enable = "avx512fp16,avx512vl")]
11959	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11960	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11961	pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11962	unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
11963	}
11964
11965	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11966	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11967	/// The upper 64 bits of dst are zeroed out.
11968	///
11969	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
11970	#[inline]
11971	#[target_feature(enable = "avx512fp16,avx512vl")]
11972	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11973	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11974	pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
11975	_mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
11976	}
11977
11978	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11979	/// and store the results in dst.
11980	///
11981	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
11982	#[inline]
11983	#[target_feature(enable = "avx512fp16,avx512vl")]
11984	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11985	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11986	pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
11987	unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
11988	}
11989
11990	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11991	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11992	/// mask bit is not set).
11993	///
11994	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
11995	#[inline]
11996	#[target_feature(enable = "avx512fp16,avx512vl")]
11997	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11998	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11999	pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12000	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12001	}
12002
12003	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12004	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12005	///
12006	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12007	#[inline]
12008	#[target_feature(enable = "avx512fp16,avx512vl")]
12009	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12010	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12011	pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12012	_mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12013	}
12014
12015	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12016	/// and store the results in dst.
12017	///
12018	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12019	#[inline]
12020	#[target_feature(enable = "avx512fp16")]
12021	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12022	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12023	pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12024	unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12025	}
12026
12027	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12028	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12029	/// mask bit is not set).
12030	///
12031	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12032	#[inline]
12033	#[target_feature(enable = "avx512fp16")]
12034	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12035	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12036	pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12037	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12038	}
12039
12040	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12041	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12042	///
12043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12044	#[inline]
12045	#[target_feature(enable = "avx512fp16")]
12046	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12047	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12048	pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12049	_mm512_mask_cvtepi32_ph(src:_mm256_setzero_ph(), k, a)
12050	}
12051
12052	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12053	/// and store the results in dst.
12054	///
12055	/// Rounding is done according to the rounding parameter, which can be one of:
12056	///
12057	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12058	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12059	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12060	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12061	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12062	///
12063	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12064	#[inline]
12065	#[target_feature(enable = "avx512fp16")]
12066	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12067	#[rustc_legacy_const_generics(`1`)]
12068	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12069	pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12070	unsafe {
12071	static_assert_rounding!(ROUNDING);
12072	vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12073	}
12074	}
12075
12076	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12077	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12078	/// mask bit is not set).
12079	///
12080	/// Rounding is done according to the rounding parameter, which can be one of:
12081	///
12082	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12083	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12084	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12085	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12086	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12087	///
12088	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12089	#[inline]
12090	#[target_feature(enable = "avx512fp16")]
12091	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12092	#[rustc_legacy_const_generics(`3`)]
12093	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12094	pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12095	src: __m256h,
12096	k: __mmask16,
12097	a: __m512i,
12098	) -> __m256h {
12099	unsafe {
12100	static_assert_rounding!(ROUNDING);
12101	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12102	}
12103	}
12104
12105	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12106	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12107	///
12108	/// Rounding is done according to the rounding parameter, which can be one of:
12109	///
12110	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12111	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12112	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12113	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12114	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12115	///
12116	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12117	#[inline]
12118	#[target_feature(enable = "avx512fp16")]
12119	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12120	#[rustc_legacy_const_generics(`2`)]
12121	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12122	pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12123	static_assert_rounding!(ROUNDING);
12124	_mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12125	}
12126
12127	/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12128	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12129	/// of dst.
12130	///
12131	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12132	#[inline]
12133	#[target_feature(enable = "avx512fp16")]
12134	#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12135	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12136	pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12137	unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12138	}
12139
12140	/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12141	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12142	/// of dst.
12143	///
12144	/// Rounding is done according to the rounding parameter, which can be one of:
12145	///
12146	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12147	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12148	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12149	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12150	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12151	///
12152	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12153	#[inline]
12154	#[target_feature(enable = "avx512fp16")]
12155	#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = `8`))]
12156	#[rustc_legacy_const_generics(`2`)]
12157	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12158	pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12159	unsafe {
12160	static_assert_rounding!(ROUNDING);
12161	vcvtsi2sh(a, b, ROUNDING)
12162	}
12163	}
12164
12165	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12166	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12167	///
12168	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12169	#[inline]
12170	#[target_feature(enable = "avx512fp16,avx512vl")]
12171	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12172	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12173	pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12174	_mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12175	}
12176
12177	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12178	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12179	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12180	///
12181	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12182	#[inline]
12183	#[target_feature(enable = "avx512fp16,avx512vl")]
12184	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12185	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12186	pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12187	unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12188	}
12189
12190	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12191	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12192	/// The upper 64 bits of dst are zeroed out.
12193	///
12194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12195	#[inline]
12196	#[target_feature(enable = "avx512fp16,avx512vl")]
12197	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12198	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12199	pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12200	_mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12201	}
12202
12203	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12204	/// and store the results in dst.
12205	///
12206	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12207	#[inline]
12208	#[target_feature(enable = "avx512fp16,avx512vl")]
12209	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12210	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12211	pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12212	unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12213	}
12214
12215	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12216	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12217	/// mask bit is not set).
12218	///
12219	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12220	#[inline]
12221	#[target_feature(enable = "avx512fp16,avx512vl")]
12222	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12223	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12224	pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12225	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12226	}
12227
12228	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12229	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12230	///
12231	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12232	#[inline]
12233	#[target_feature(enable = "avx512fp16,avx512vl")]
12234	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12235	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12236	pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12237	_mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12238	}
12239
12240	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12241	/// and store the results in dst.
12242	///
12243	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12244	#[inline]
12245	#[target_feature(enable = "avx512fp16")]
12246	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12247	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12248	pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12249	unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12250	}
12251
12252	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12253	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12254	/// mask bit is not set).
12255	///
12256	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12257	#[inline]
12258	#[target_feature(enable = "avx512fp16")]
12259	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12261	pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12262	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12263	}
12264
12265	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12266	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12267	///
12268	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12269	#[inline]
12270	#[target_feature(enable = "avx512fp16")]
12271	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12272	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12273	pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12274	_mm512_mask_cvtepu32_ph(src:_mm256_setzero_ph(), k, a)
12275	}
12276
12277	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12278	/// and store the results in dst.
12279	///
12280	/// Rounding is done according to the rounding parameter, which can be one of:
12281	///
12282	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12283	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12284	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12285	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12286	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12287	///
12288	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12289	#[inline]
12290	#[target_feature(enable = "avx512fp16")]
12291	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12292	#[rustc_legacy_const_generics(`1`)]
12293	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12294	pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12295	unsafe {
12296	static_assert_rounding!(ROUNDING);
12297	vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12298	}
12299	}
12300
12301	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12302	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12303	/// mask bit is not set).
12304	///
12305	/// Rounding is done according to the rounding parameter, which can be one of:
12306	///
12307	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12308	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12309	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12310	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12311	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12312	///
12313	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12314	#[inline]
12315	#[target_feature(enable = "avx512fp16")]
12316	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12317	#[rustc_legacy_const_generics(`3`)]
12318	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12319	pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12320	src: __m256h,
12321	k: __mmask16,
12322	a: __m512i,
12323	) -> __m256h {
12324	unsafe {
12325	static_assert_rounding!(ROUNDING);
12326	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12327	}
12328	}
12329
12330	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12331	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12332	///
12333	/// Rounding is done according to the rounding parameter, which can be one of:
12334	///
12335	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12336	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12337	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12338	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12339	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12340	///
12341	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12342	#[inline]
12343	#[target_feature(enable = "avx512fp16")]
12344	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12345	#[rustc_legacy_const_generics(`2`)]
12346	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12347	pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12348	static_assert_rounding!(ROUNDING);
12349	_mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12350	}
12351
12352	/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12353	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12354	/// of dst.
12355	///
12356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12357	#[inline]
12358	#[target_feature(enable = "avx512fp16")]
12359	#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12360	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12361	pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12362	unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12363	}
12364
12365	/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12366	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12367	/// of dst.
12368	///
12369	/// Rounding is done according to the rounding parameter, which can be one of:
12370	///
12371	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12372	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12373	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12374	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12375	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12376	///
12377	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12378	#[inline]
12379	#[target_feature(enable = "avx512fp16")]
12380	#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = `8`))]
12381	#[rustc_legacy_const_generics(`2`)]
12382	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12383	pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12384	unsafe {
12385	static_assert_rounding!(ROUNDING);
12386	vcvtusi2sh(a, b, ROUNDING)
12387	}
12388	}
12389
12390	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12391	/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12392	///
12393	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12394	#[inline]
12395	#[target_feature(enable = "avx512fp16,avx512vl")]
12396	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12397	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12398	pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12399	_mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12400	}
12401
12402	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12403	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12404	/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12405	///
12406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12407	#[inline]
12408	#[target_feature(enable = "avx512fp16,avx512vl")]
12409	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12410	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12411	pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12412	unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12413	}
12414
12415	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12416	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12417	/// The upper 96 bits of dst are zeroed out.
12418	///
12419	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12420	#[inline]
12421	#[target_feature(enable = "avx512fp16,avx512vl")]
12422	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12423	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12424	pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12425	_mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12426	}
12427
12428	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12429	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12430	///
12431	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12432	#[inline]
12433	#[target_feature(enable = "avx512fp16,avx512vl")]
12434	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12435	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12436	pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12437	_mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12438	}
12439
12440	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12441	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12442	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12443	///
12444	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12445	#[inline]
12446	#[target_feature(enable = "avx512fp16,avx512vl")]
12447	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12448	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12449	pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12450	unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12451	}
12452
12453	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12454	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12455	/// The upper 64 bits of dst are zeroed out.
12456	///
12457	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12458	#[inline]
12459	#[target_feature(enable = "avx512fp16,avx512vl")]
12460	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12461	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12462	pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12463	_mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12464	}
12465
12466	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12467	/// and store the results in dst.
12468	///
12469	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12470	#[inline]
12471	#[target_feature(enable = "avx512fp16")]
12472	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12473	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12474	pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12475	unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12476	}
12477
12478	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12479	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12480	/// mask bit is not set).
12481	///
12482	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12483	#[inline]
12484	#[target_feature(enable = "avx512fp16")]
12485	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12486	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12487	pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12488	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12489	}
12490
12491	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12492	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12493	///
12494	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12495	#[inline]
12496	#[target_feature(enable = "avx512fp16")]
12497	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12498	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12499	pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12500	_mm512_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12501	}
12502
12503	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12504	/// and store the results in dst.
12505	///
12506	/// Rounding is done according to the rounding parameter, which can be one of:
12507	///
12508	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12509	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12510	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12511	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12512	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12513	///
12514	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12515	#[inline]
12516	#[target_feature(enable = "avx512fp16")]
12517	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12518	#[rustc_legacy_const_generics(`1`)]
12519	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12520	pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12521	unsafe {
12522	static_assert_rounding!(ROUNDING);
12523	vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12524	}
12525	}
12526
12527	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12528	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12529	/// mask bit is not set).
12530	///
12531	/// Rounding is done according to the rounding parameter, which can be one of:
12532	///
12533	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12534	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12535	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12536	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12537	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12538	///
12539	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12540	#[inline]
12541	#[target_feature(enable = "avx512fp16")]
12542	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12543	#[rustc_legacy_const_generics(`3`)]
12544	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12545	pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12546	src: __m128h,
12547	k: __mmask8,
12548	a: __m512i,
12549	) -> __m128h {
12550	unsafe {
12551	static_assert_rounding!(ROUNDING);
12552	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12553	}
12554	}
12555
12556	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12557	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12558	///
12559	/// Rounding is done according to the rounding parameter, which can be one of:
12560	///
12561	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12562	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12563	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12564	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12565	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12566	///
12567	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12568	#[inline]
12569	#[target_feature(enable = "avx512fp16")]
12570	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12571	#[rustc_legacy_const_generics(`2`)]
12572	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12573	pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12574	static_assert_rounding!(ROUNDING);
12575	_mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
12576	}
12577
12578	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12579	/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12580	///
12581	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12582	#[inline]
12583	#[target_feature(enable = "avx512fp16,avx512vl")]
12584	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12585	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12586	pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12587	_mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12588	}
12589
12590	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12591	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12592	/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12593	///
12594	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12595	#[inline]
12596	#[target_feature(enable = "avx512fp16,avx512vl")]
12597	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12598	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12599	pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12600	unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12601	}
12602
12603	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12604	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12605	/// The upper 96 bits of dst are zeroed out.
12606	///
12607	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12608	#[inline]
12609	#[target_feature(enable = "avx512fp16,avx512vl")]
12610	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12611	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12612	pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12613	_mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12614	}
12615
12616	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12617	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12618	///
12619	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12620	#[inline]
12621	#[target_feature(enable = "avx512fp16,avx512vl")]
12622	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12623	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12624	pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12625	_mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12626	}
12627
12628	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12629	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12630	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12631	///
12632	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12633	#[inline]
12634	#[target_feature(enable = "avx512fp16,avx512vl")]
12635	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12636	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12637	pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12638	unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12639	}
12640
12641	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12642	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12643	/// The upper 64 bits of dst are zeroed out.
12644	///
12645	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12646	#[inline]
12647	#[target_feature(enable = "avx512fp16,avx512vl")]
12648	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12649	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12650	pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12651	_mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12652	}
12653
12654	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12655	/// and store the results in dst.
12656	///
12657	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12658	#[inline]
12659	#[target_feature(enable = "avx512fp16")]
12660	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12661	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12662	pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12663	unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12664	}
12665
12666	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12667	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12668	/// mask bit is not set).
12669	///
12670	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12671	#[inline]
12672	#[target_feature(enable = "avx512fp16")]
12673	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12675	pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12676	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12677	}
12678
12679	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12680	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12681	///
12682	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
12683	#[inline]
12684	#[target_feature(enable = "avx512fp16")]
12685	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12686	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12687	pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
12688	_mm512_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12689	}
12690
12691	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12692	/// and store the results in dst.
12693	///
12694	/// Rounding is done according to the rounding parameter, which can be one of:
12695	///
12696	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12697	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12698	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12699	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12700	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12701	///
12702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
12703	#[inline]
12704	#[target_feature(enable = "avx512fp16")]
12705	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12706	#[rustc_legacy_const_generics(`1`)]
12707	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12708	pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12709	unsafe {
12710	static_assert_rounding!(ROUNDING);
12711	vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
12712	}
12713	}
12714
12715	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12716	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12717	/// mask bit is not set).
12718	///
12719	/// Rounding is done according to the rounding parameter, which can be one of:
12720	///
12721	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12722	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12723	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12724	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12725	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12726	///
12727	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
12728	#[inline]
12729	#[target_feature(enable = "avx512fp16")]
12730	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12731	#[rustc_legacy_const_generics(`3`)]
12732	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12733	pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
12734	src: __m128h,
12735	k: __mmask8,
12736	a: __m512i,
12737	) -> __m128h {
12738	unsafe {
12739	static_assert_rounding!(ROUNDING);
12740	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
12741	}
12742	}
12743
12744	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12745	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12746	///
12747	/// Rounding is done according to the rounding parameter, which can be one of:
12748	///
12749	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12750	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12751	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12752	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12753	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12754	///
12755	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
12756	#[inline]
12757	#[target_feature(enable = "avx512fp16")]
12758	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12759	#[rustc_legacy_const_generics(`2`)]
12760	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12761	pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12762	static_assert_rounding!(ROUNDING);
12763	_mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
12764	}
12765
12766	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12767	/// floating-point elements, and store the results in dst.
12768	///
12769	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
12770	#[inline]
12771	#[target_feature(enable = "avx512fp16,avx512vl")]
12772	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12773	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12774	pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
12775	_mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12776	}
12777
12778	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12779	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12780	/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12781	///
12782	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
12783	#[inline]
12784	#[target_feature(enable = "avx512fp16,avx512vl")]
12785	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12786	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12787	pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
12788	unsafe { vcvtps2phx_128(a, src, k) }
12789	}
12790
12791	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12792	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12793	/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12794	///
12795	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
12796	#[inline]
12797	#[target_feature(enable = "avx512fp16,avx512vl")]
12798	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12799	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12800	pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
12801	_mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12802	}
12803
12804	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12805	/// floating-point elements, and store the results in dst.
12806	///
12807	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
12808	#[inline]
12809	#[target_feature(enable = "avx512fp16,avx512vl")]
12810	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12811	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12812	pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
12813	_mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12814	}
12815
12816	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12817	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12818	/// when the corresponding mask bit is not set).
12819	///
12820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
12821	#[inline]
12822	#[target_feature(enable = "avx512fp16,avx512vl")]
12823	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12824	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12825	pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
12826	unsafe { vcvtps2phx_256(a, src, k) }
12827	}
12828
12829	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12830	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12831	/// corresponding mask bit is not set).
12832	///
12833	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
12834	#[inline]
12835	#[target_feature(enable = "avx512fp16,avx512vl")]
12836	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12837	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12838	pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
12839	_mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12840	}
12841
12842	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12843	/// floating-point elements, and store the results in dst.
12844	///
12845	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
12846	#[inline]
12847	#[target_feature(enable = "avx512fp16")]
12848	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12849	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12850	pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
12851	_mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k:`0xffff`, a)
12852	}
12853
12854	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12855	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12856	/// when the corresponding mask bit is not set).
12857	///
12858	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
12859	#[inline]
12860	#[target_feature(enable = "avx512fp16")]
12861	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12862	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12863	pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
12864	unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
12865	}
12866
12867	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12868	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12869	/// corresponding mask bit is not set).
12870	///
12871	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
12872	#[inline]
12873	#[target_feature(enable = "avx512fp16")]
12874	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12875	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12876	pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
12877	_mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k, a)
12878	}
12879
12880	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12881	/// floating-point elements, and store the results in dst.
12882	///
12883	/// Rounding is done according to the rounding parameter, which can be one of:
12884	///
12885	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12886	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12887	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12888	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12889	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12890	///
12891	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
12892	#[inline]
12893	#[target_feature(enable = "avx512fp16")]
12894	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12895	#[rustc_legacy_const_generics(`1`)]
12896	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12897	pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
12898	static_assert_rounding!(ROUNDING);
12899	_mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k:`0xffff`, a)
12900	}
12901
12902	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12903	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12904	/// when the corresponding mask bit is not set).
12905	///
12906	/// Rounding is done according to the rounding parameter, which can be one of:
12907	///
12908	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12909	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12910	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12911	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12912	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12913	///
12914	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
12915	#[inline]
12916	#[target_feature(enable = "avx512fp16")]
12917	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12918	#[rustc_legacy_const_generics(`3`)]
12919	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12920	pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
12921	src: __m256h,
12922	k: __mmask16,
12923	a: __m512,
12924	) -> __m256h {
12925	unsafe {
12926	static_assert_rounding!(ROUNDING);
12927	vcvtps2phx_512(a, src, k, ROUNDING)
12928	}
12929	}
12930
12931	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12932	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12933	/// corresponding mask bit is not set).
12934	///
12935	/// Rounding is done according to the rounding parameter, which can be one of:
12936	///
12937	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12938	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12939	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12940	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12941	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12942	///
12943	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
12944	#[inline]
12945	#[target_feature(enable = "avx512fp16")]
12946	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12947	#[rustc_legacy_const_generics(`2`)]
12948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12949	pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
12950	static_assert_rounding!(ROUNDING);
12951	_mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12952	}
12953
12954	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12955	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12956	/// elements from a to the upper elements of dst.
12957	///
12958	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
12959	#[inline]
12960	#[target_feature(enable = "avx512fp16")]
12961	#[cfg_attr(test, assert_instr(vcvtss2sh))]
12962	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12963	pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
12964	_mm_mask_cvtss_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
12965	}
12966
12967	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12968	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
12969	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
12970	/// upper elements of dst.
12971	///
12972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
12973	#[inline]
12974	#[target_feature(enable = "avx512fp16")]
12975	#[cfg_attr(test, assert_instr(vcvtss2sh))]
12976	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12977	pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12978	unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
12979	}
12980
12981	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12982	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
12983	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
12984	/// elements of dst.
12985	///
12986	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
12987	#[inline]
12988	#[target_feature(enable = "avx512fp16")]
12989	#[cfg_attr(test, assert_instr(vcvtss2sh))]
12990	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12991	pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12992	_mm_mask_cvtss_sh(src:_mm_setzero_ph(), k, a, b)
12993	}
12994
12995	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12996	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12997	/// elements from a to the upper elements of dst.
12998	///
12999	/// Rounding is done according to the rounding parameter, which can be one of:
13000	///
13001	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13002	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13003	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13004	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13005	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13006	///
13007	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13008	#[inline]
13009	#[target_feature(enable = "avx512fp16")]
13010	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13011	#[rustc_legacy_const_generics(`2`)]
13012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13013	pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13014	static_assert_rounding!(ROUNDING);
13015	_mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
13016	}
13017
13018	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13019	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13020	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13021	/// upper elements of dst.
13022	///
13023	/// Rounding is done according to the rounding parameter, which can be one of:
13024	///
13025	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13026	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13027	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13028	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13029	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13030	///
13031	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13032	#[inline]
13033	#[target_feature(enable = "avx512fp16")]
13034	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13035	#[rustc_legacy_const_generics(`4`)]
13036	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13037	pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13038	src: __m128h,
13039	k: __mmask8,
13040	a: __m128h,
13041	b: __m128,
13042	) -> __m128h {
13043	unsafe {
13044	static_assert_rounding!(ROUNDING);
13045	vcvtss2sh(a, b, src, k, ROUNDING)
13046	}
13047	}
13048
13049	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13050	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13051	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13052	/// elements of dst.
13053	///
13054	/// Rounding is done according to the rounding parameter, which can be one of:
13055	///
13056	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13057	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13058	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13059	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13060	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13061	///
13062	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13063	#[inline]
13064	#[target_feature(enable = "avx512fp16")]
13065	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13066	#[rustc_legacy_const_generics(`3`)]
13067	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13068	pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13069	k: __mmask8,
13070	a: __m128h,
13071	b: __m128,
13072	) -> __m128h {
13073	static_assert_rounding!(ROUNDING);
13074	_mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
13075	}
13076
13077	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13078	/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13079	///
13080	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13081	#[inline]
13082	#[target_feature(enable = "avx512fp16,avx512vl")]
13083	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13084	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13085	pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13086	_mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:`0xff`, a)
13087	}
13088
13089	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13090	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13091	/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13092	///
13093	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13094	#[inline]
13095	#[target_feature(enable = "avx512fp16,avx512vl")]
13096	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13097	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13098	pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13099	unsafe { vcvtpd2ph_128(a, src, k) }
13100	}
13101
13102	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13103	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13104	/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13105	///
13106	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13107	#[inline]
13108	#[target_feature(enable = "avx512fp16,avx512vl")]
13109	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13110	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13111	pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13112	_mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13113	}
13114
13115	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13116	/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13117	///
13118	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13119	#[inline]
13120	#[target_feature(enable = "avx512fp16,avx512vl")]
13121	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13122	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13123	pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13124	_mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:`0xff`, a)
13125	}
13126
13127	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13128	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13129	/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13130	///
13131	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13132	#[inline]
13133	#[target_feature(enable = "avx512fp16,avx512vl")]
13134	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13135	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13136	pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13137	unsafe { vcvtpd2ph_256(a, src, k) }
13138	}
13139
13140	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13141	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13142	/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13143	///
13144	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13145	#[inline]
13146	#[target_feature(enable = "avx512fp16,avx512vl")]
13147	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13148	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13149	pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13150	_mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13151	}
13152
13153	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13154	/// floating-point elements, and store the results in dst.
13155	///
13156	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13157	#[inline]
13158	#[target_feature(enable = "avx512fp16")]
13159	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13160	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13161	pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13162	_mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k:`0xff`, a)
13163	}
13164
13165	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13166	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13167	/// when the corresponding mask bit is not set).
13168	///
13169	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13170	#[inline]
13171	#[target_feature(enable = "avx512fp16")]
13172	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13173	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13174	pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13175	unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13176	}
13177
13178	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13179	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13180	/// corresponding mask bit is not set).
13181	///
13182	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13183	#[inline]
13184	#[target_feature(enable = "avx512fp16")]
13185	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13186	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13187	pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13188	_mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13189	}
13190
13191	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13192	/// floating-point elements, and store the results in dst.
13193	///
13194	/// Rounding is done according to the rounding parameter, which can be one of:
13195	///
13196	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13197	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13198	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13199	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13200	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13201	///
13202	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13203	#[inline]
13204	#[target_feature(enable = "avx512fp16")]
13205	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13206	#[rustc_legacy_const_generics(`1`)]
13207	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13208	pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13209	static_assert_rounding!(ROUNDING);
13210	_mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k:`0xff`, a)
13211	}
13212
13213	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13214	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13215	/// when the corresponding mask bit is not set).
13216	///
13217	/// Rounding is done according to the rounding parameter, which can be one of:
13218	///
13219	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13220	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13221	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13222	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13223	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13224	///
13225	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13226	#[inline]
13227	#[target_feature(enable = "avx512fp16")]
13228	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13229	#[rustc_legacy_const_generics(`3`)]
13230	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13231	pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13232	src: __m128h,
13233	k: __mmask8,
13234	a: __m512d,
13235	) -> __m128h {
13236	unsafe {
13237	static_assert_rounding!(ROUNDING);
13238	vcvtpd2ph_512(a, src, k, ROUNDING)
13239	}
13240	}
13241
13242	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13243	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13244	/// corresponding mask bit is not set).
13245	///
13246	/// Rounding is done according to the rounding parameter, which can be one of:
13247	///
13248	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13249	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13250	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13251	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13252	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13253	///
13254	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13255	#[inline]
13256	#[target_feature(enable = "avx512fp16")]
13257	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13258	#[rustc_legacy_const_generics(`2`)]
13259	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13260	pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13261	static_assert_rounding!(ROUNDING);
13262	_mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
13263	}
13264
13265	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13266	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13267	/// elements from a to the upper elements of dst.
13268	///
13269	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13270	#[inline]
13271	#[target_feature(enable = "avx512fp16")]
13272	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13273	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13274	pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13275	_mm_mask_cvtsd_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
13276	}
13277
13278	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13279	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13280	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13281	/// upper elements of dst.
13282	///
13283	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13284	#[inline]
13285	#[target_feature(enable = "avx512fp16")]
13286	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13287	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13288	pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13289	unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13290	}
13291
13292	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13293	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13294	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13295	/// elements of dst.
13296	///
13297	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13298	#[inline]
13299	#[target_feature(enable = "avx512fp16")]
13300	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13301	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13302	pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13303	_mm_mask_cvtsd_sh(src:_mm_setzero_ph(), k, a, b)
13304	}
13305
13306	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13307	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13308	/// elements from a to the upper elements of dst.
13309	///
13310	/// Rounding is done according to the rounding parameter, which can be one of:
13311	///
13312	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13313	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13314	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13315	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13316	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13317	///
13318	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13319	#[inline]
13320	#[target_feature(enable = "avx512fp16")]
13321	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13322	#[rustc_legacy_const_generics(`2`)]
13323	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13324	pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13325	static_assert_rounding!(ROUNDING);
13326	_mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_undefined_ph(), k:`0xff`, a, b)
13327	}
13328
13329	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13330	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13331	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13332	/// upper elements of dst.
13333	///
13334	/// Rounding is done according to the rounding parameter, which can be one of:
13335	///
13336	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13337	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13338	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13339	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13340	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13341	///
13342	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13343	#[inline]
13344	#[target_feature(enable = "avx512fp16")]
13345	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13346	#[rustc_legacy_const_generics(`4`)]
13347	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13348	pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13349	src: __m128h,
13350	k: __mmask8,
13351	a: __m128h,
13352	b: __m128d,
13353	) -> __m128h {
13354	unsafe {
13355	static_assert_rounding!(ROUNDING);
13356	vcvtsd2sh(a, b, src, k, ROUNDING)
13357	}
13358	}
13359
13360	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13361	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13362	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13363	/// elements of dst.
13364	///
13365	/// Rounding is done according to the rounding parameter, which can be one of:
13366	///
13367	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13368	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13369	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13370	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13371	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13372	///
13373	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13374	#[inline]
13375	#[target_feature(enable = "avx512fp16")]
13376	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13377	#[rustc_legacy_const_generics(`3`)]
13378	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13379	pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13380	k: __mmask8,
13381	a: __m128h,
13382	b: __m128d,
13383	) -> __m128h {
13384	static_assert_rounding!(ROUNDING);
13385	_mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
13386	}
13387
13388	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13389	/// store the results in dst.
13390	///
13391	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13392	#[inline]
13393	#[target_feature(enable = "avx512fp16,avx512vl")]
13394	#[cfg_attr(test, assert_instr(vcvtph2w))]
13395	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13396	pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13397	_mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:`0xff`, a)
13398	}
13399
13400	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13401	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13402	/// mask bit is not set).
13403	///
13404	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13405	#[inline]
13406	#[target_feature(enable = "avx512fp16,avx512vl")]
13407	#[cfg_attr(test, assert_instr(vcvtph2w))]
13408	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13409	pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13410	unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13411	}
13412
13413	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13414	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13415	///
13416	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13417	#[inline]
13418	#[target_feature(enable = "avx512fp16,avx512vl")]
13419	#[cfg_attr(test, assert_instr(vcvtph2w))]
13420	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13421	pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13422	_mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13423	}
13424
13425	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13426	/// store the results in dst.
13427	///
13428	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13429	#[inline]
13430	#[target_feature(enable = "avx512fp16,avx512vl")]
13431	#[cfg_attr(test, assert_instr(vcvtph2w))]
13432	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13433	pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13434	_mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13435	}
13436
13437	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13438	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13439	/// mask bit is not set).
13440	///
13441	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13442	#[inline]
13443	#[target_feature(enable = "avx512fp16,avx512vl")]
13444	#[cfg_attr(test, assert_instr(vcvtph2w))]
13445	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13446	pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13447	unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13448	}
13449
13450	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13451	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13452	///
13453	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13454	#[inline]
13455	#[target_feature(enable = "avx512fp16,avx512vl")]
13456	#[cfg_attr(test, assert_instr(vcvtph2w))]
13457	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13458	pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13459	_mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13460	}
13461
13462	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13463	/// store the results in dst.
13464	///
13465	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13466	#[inline]
13467	#[target_feature(enable = "avx512fp16")]
13468	#[cfg_attr(test, assert_instr(vcvtph2w))]
13469	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13470	pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13471	_mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13472	}
13473
13474	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13475	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13476	/// mask bit is not set).
13477	///
13478	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13479	#[inline]
13480	#[target_feature(enable = "avx512fp16")]
13481	#[cfg_attr(test, assert_instr(vcvtph2w))]
13482	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13483	pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13484	unsafe {
13485	transmute(src:vcvtph2w_512(
13486	a,
13487	src.as_i16x32(),
13488	k,
13489	_MM_FROUND_CUR_DIRECTION,
13490	))
13491	}
13492	}
13493
13494	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13495	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13496	///
13497	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13498	#[inline]
13499	#[target_feature(enable = "avx512fp16")]
13500	#[cfg_attr(test, assert_instr(vcvtph2w))]
13501	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13502	pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13503	_mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13504	}
13505
13506	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13507	/// store the results in dst.
13508	///
13509	/// Rounding is done according to the rounding parameter, which can be one of:
13510	///
13511	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13512	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13513	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13514	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13515	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13516	///
13517	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13518	#[inline]
13519	#[target_feature(enable = "avx512fp16")]
13520	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13521	#[rustc_legacy_const_generics(`1`)]
13522	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13523	pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13524	static_assert_rounding!(ROUNDING);
13525	_mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13526	}
13527
13528	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13529	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13530	/// mask bit is not set).
13531	///
13532	/// Rounding is done according to the rounding parameter, which can be one of:
13533	///
13534	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13535	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13536	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13537	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13538	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13539	///
13540	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13541	#[inline]
13542	#[target_feature(enable = "avx512fp16")]
13543	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13544	#[rustc_legacy_const_generics(`3`)]
13545	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13546	pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13547	src: __m512i,
13548	k: __mmask32,
13549	a: __m512h,
13550	) -> __m512i {
13551	unsafe {
13552	static_assert_rounding!(ROUNDING);
13553	transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13554	}
13555	}
13556
13557	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13558	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13559	///
13560	/// Rounding is done according to the rounding parameter, which can be one of:
13561	///
13562	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13563	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13564	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13565	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13566	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13567	///
13568	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13569	#[inline]
13570	#[target_feature(enable = "avx512fp16")]
13571	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13572	#[rustc_legacy_const_generics(`2`)]
13573	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13574	pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13575	static_assert_rounding!(ROUNDING);
13576	_mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13577	}
13578
13579	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13580	/// and store the results in dst.
13581	///
13582	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13583	#[inline]
13584	#[target_feature(enable = "avx512fp16,avx512vl")]
13585	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13586	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13587	pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13588	_mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:`0xff`, a)
13589	}
13590
13591	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13592	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13593	/// mask bit is not set).
13594	///
13595	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13596	#[inline]
13597	#[target_feature(enable = "avx512fp16,avx512vl")]
13598	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13599	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13600	pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13601	unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13602	}
13603
13604	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13605	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13606	///
13607	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13608	#[inline]
13609	#[target_feature(enable = "avx512fp16,avx512vl")]
13610	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13611	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13612	pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13613	_mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13614	}
13615
13616	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13617	/// and store the results in dst.
13618	///
13619	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13620	#[inline]
13621	#[target_feature(enable = "avx512fp16,avx512vl")]
13622	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13623	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13624	pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13625	_mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13626	}
13627
13628	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13629	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13630	/// mask bit is not set).
13631	///
13632	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13633	#[inline]
13634	#[target_feature(enable = "avx512fp16,avx512vl")]
13635	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13636	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13637	pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13638	unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13639	}
13640
13641	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13642	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13643	///
13644	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13645	#[inline]
13646	#[target_feature(enable = "avx512fp16,avx512vl")]
13647	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13648	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13649	pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13650	_mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13651	}
13652
13653	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13654	/// and store the results in dst.
13655	///
13656	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13657	#[inline]
13658	#[target_feature(enable = "avx512fp16")]
13659	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13660	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13661	pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13662	_mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13663	}
13664
13665	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13666	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13667	/// mask bit is not set).
13668	///
13669	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13670	#[inline]
13671	#[target_feature(enable = "avx512fp16")]
13672	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13673	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13674	pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13675	unsafe {
13676	transmute(src:vcvtph2uw_512(
13677	a,
13678	src.as_u16x32(),
13679	k,
13680	_MM_FROUND_CUR_DIRECTION,
13681	))
13682	}
13683	}
13684
13685	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13686	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687	///
13688	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
13689	#[inline]
13690	#[target_feature(enable = "avx512fp16")]
13691	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13692	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13693	pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
13694	_mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
13695	}
13696
13697	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13698	/// and store the results in dst.
13699	///
13700	/// Rounding is done according to the rounding parameter, which can be one of:
13701	///
13702	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13703	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13704	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13705	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13706	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13707	///
13708	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
13709	#[inline]
13710	#[target_feature(enable = "avx512fp16")]
13711	#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = `8`))]
13712	#[rustc_legacy_const_generics(`1`)]
13713	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13714	pub fn _mm512_cvt_roundph_epu16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13715	static_assert_rounding!(ROUNDING);
13716	_mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13717	}
13718
13719	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13720	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13721	/// mask bit is not set).
13722	///
13723	/// Rounding is done according to the rounding parameter, which can be one of:
13724	///
13725	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13726	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13727	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13728	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13729	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13730	///
13731	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
13732	#[inline]
13733	#[target_feature(enable = "avx512fp16")]
13734	#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = `8`))]
13735	#[rustc_legacy_const_generics(`3`)]
13736	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13737	pub fn _mm512_mask_cvt_roundph_epu16<const ROUNDING: i32>(
13738	src: __m512i,
13739	k: __mmask32,
13740	a: __m512h,
13741	) -> __m512i {
13742	unsafe {
13743	static_assert_rounding!(ROUNDING);
13744	transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING))
13745	}
13746	}
13747
13748	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13749	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750	///
13751	/// Rounding is done according to the rounding parameter, which can be one of:
13752	///
13753	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13754	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13755	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13756	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13757	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13758	///
13759	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
13760	#[inline]
13761	#[target_feature(enable = "avx512fp16")]
13762	#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = `8`))]
13763	#[rustc_legacy_const_generics(`2`)]
13764	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13765	pub fn _mm512_maskz_cvt_roundph_epu16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13766	static_assert_rounding!(ROUNDING);
13767	_mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13768	}
13769
13770	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13771	/// truncation, and store the results in dst.
13772	///
13773	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
13774	#[inline]
13775	#[target_feature(enable = "avx512fp16,avx512vl")]
13776	#[cfg_attr(test, assert_instr(vcvttph2w))]
13777	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13778	pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
13779	_mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:`0xff`, a)
13780	}
13781
13782	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13783	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13784	/// mask bit is not set).
13785	///
13786	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
13787	#[inline]
13788	#[target_feature(enable = "avx512fp16,avx512vl")]
13789	#[cfg_attr(test, assert_instr(vcvttph2w))]
13790	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13791	pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13792	unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
13793	}
13794
13795	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13796	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13797	/// mask bit is not set).
13798	///
13799	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
13800	#[inline]
13801	#[target_feature(enable = "avx512fp16,avx512vl")]
13802	#[cfg_attr(test, assert_instr(vcvttph2w))]
13803	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13804	pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13805	_mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
13806	}
13807
13808	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13809	/// truncation, and store the results in dst.
13810	///
13811	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
13812	#[inline]
13813	#[target_feature(enable = "avx512fp16,avx512vl")]
13814	#[cfg_attr(test, assert_instr(vcvttph2w))]
13815	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13816	pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
13817	_mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13818	}
13819
13820	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13821	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13822	/// mask bit is not set).
13823	///
13824	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
13825	#[inline]
13826	#[target_feature(enable = "avx512fp16,avx512vl")]
13827	#[cfg_attr(test, assert_instr(vcvttph2w))]
13828	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13829	pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13830	unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
13831	}
13832
13833	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13834	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13835	/// mask bit is not set).
13836	///
13837	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
13838	#[inline]
13839	#[target_feature(enable = "avx512fp16,avx512vl")]
13840	#[cfg_attr(test, assert_instr(vcvttph2w))]
13841	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13842	pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13843	_mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
13844	}
13845
13846	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13847	/// truncation, and store the results in dst.
13848	///
13849	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
13850	#[inline]
13851	#[target_feature(enable = "avx512fp16")]
13852	#[cfg_attr(test, assert_instr(vcvttph2w))]
13853	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13854	pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
13855	_mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13856	}
13857
13858	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13859	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13860	/// mask bit is not set).
13861	///
13862	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
13863	#[inline]
13864	#[target_feature(enable = "avx512fp16")]
13865	#[cfg_attr(test, assert_instr(vcvttph2w))]
13866	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13867	pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13868	unsafe {
13869	transmute(src:vcvttph2w_512(
13870	a,
13871	src.as_i16x32(),
13872	k,
13873	_MM_FROUND_CUR_DIRECTION,
13874	))
13875	}
13876	}
13877
13878	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13879	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13880	/// mask bit is not set).
13881	///
13882	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
13883	#[inline]
13884	#[target_feature(enable = "avx512fp16")]
13885	#[cfg_attr(test, assert_instr(vcvttph2w))]
13886	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13887	pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13888	_mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
13889	}
13890
13891	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13892	/// truncation, and store the results in dst.
13893	///
13894	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13895	///
13896	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
13897	#[inline]
13898	#[target_feature(enable = "avx512fp16")]
13899	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13900	#[rustc_legacy_const_generics(`1`)]
13901	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13902	pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
13903	static_assert_sae!(SAE);
13904	_mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13905	}
13906
13907	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13908	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13909	/// mask bit is not set).
13910	///
13911	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13912	///
13913	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
13914	#[inline]
13915	#[target_feature(enable = "avx512fp16")]
13916	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13917	#[rustc_legacy_const_generics(`3`)]
13918	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13919	pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
13920	src: __m512i,
13921	k: __mmask32,
13922	a: __m512h,
13923	) -> __m512i {
13924	unsafe {
13925	static_assert_sae!(SAE);
13926	transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
13927	}
13928	}
13929
13930	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13931	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13932	/// mask bit is not set).
13933	///
13934	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13935	///
13936	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
13937	#[inline]
13938	#[target_feature(enable = "avx512fp16")]
13939	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13940	#[rustc_legacy_const_generics(`2`)]
13941	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13942	pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13943	static_assert_sae!(SAE);
13944	_mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
13945	}
13946
13947	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13948	/// truncation, and store the results in dst.
13949	///
13950	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
13951	#[inline]
13952	#[target_feature(enable = "avx512fp16,avx512vl")]
13953	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13954	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13955	pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
13956	_mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:`0xff`, a)
13957	}
13958
13959	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13960	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13961	/// mask bit is not set).
13962	///
13963	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
13964	#[inline]
13965	#[target_feature(enable = "avx512fp16,avx512vl")]
13966	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13967	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13968	pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13969	unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
13970	}
13971
13972	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13973	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13974	/// mask bit is not set).
13975	///
13976	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
13977	#[inline]
13978	#[target_feature(enable = "avx512fp16,avx512vl")]
13979	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13980	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13981	pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13982	_mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
13983	}
13984
13985	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13986	/// truncation, and store the results in dst.
13987	///
13988	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
13989	#[inline]
13990	#[target_feature(enable = "avx512fp16,avx512vl")]
13991	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13992	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13993	pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
13994	_mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13995	}
13996
13997	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13998	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13999	/// mask bit is not set).
14000	///
14001	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
14002	#[inline]
14003	#[target_feature(enable = "avx512fp16,avx512vl")]
14004	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14005	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14006	pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14007	unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14008	}
14009
14010	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14011	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14012	/// mask bit is not set).
14013	///
14014	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14015	#[inline]
14016	#[target_feature(enable = "avx512fp16,avx512vl")]
14017	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14018	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14019	pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14020	_mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14021	}
14022
14023	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14024	/// truncation, and store the results in dst.
14025	///
14026	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14027	#[inline]
14028	#[target_feature(enable = "avx512fp16")]
14029	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14030	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14031	pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14032	_mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
14033	}
14034
14035	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14036	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14037	/// mask bit is not set).
14038	///
14039	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14040	#[inline]
14041	#[target_feature(enable = "avx512fp16")]
14042	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14043	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14044	pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14045	unsafe {
14046	transmute(src:vcvttph2uw_512(
14047	a,
14048	src.as_u16x32(),
14049	k,
14050	_MM_FROUND_CUR_DIRECTION,
14051	))
14052	}
14053	}
14054
14055	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14056	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14057	/// mask bit is not set).
14058	///
14059	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14060	#[inline]
14061	#[target_feature(enable = "avx512fp16")]
14062	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14063	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14064	pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14065	_mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14066	}
14067
14068	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14069	/// truncation, and store the results in dst.
14070	///
14071	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14072	///
14073	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14074	#[inline]
14075	#[target_feature(enable = "avx512fp16")]
14076	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14077	#[rustc_legacy_const_generics(`1`)]
14078	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14079	pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14080	static_assert_sae!(SAE);
14081	_mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
14082	}
14083
14084	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14085	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14086	/// mask bit is not set).
14087	///
14088	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14089	///
14090	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14091	#[inline]
14092	#[target_feature(enable = "avx512fp16")]
14093	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14094	#[rustc_legacy_const_generics(`3`)]
14095	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14096	pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14097	src: __m512i,
14098	k: __mmask32,
14099	a: __m512h,
14100	) -> __m512i {
14101	unsafe {
14102	static_assert_sae!(SAE);
14103	transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14104	}
14105	}
14106
14107	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14108	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14109	/// mask bit is not set).
14110	///
14111	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14112	///
14113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14114	#[inline]
14115	#[target_feature(enable = "avx512fp16")]
14116	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14117	#[rustc_legacy_const_generics(`2`)]
14118	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14119	pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14120	static_assert_sae!(SAE);
14121	_mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14122	}
14123
14124	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14125	/// results in dst.
14126	///
14127	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14128	#[inline]
14129	#[target_feature(enable = "avx512fp16,avx512vl")]
14130	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14131	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14132	pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14133	_mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:`0xff`, a)
14134	}
14135
14136	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14137	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138	///
14139	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14140	#[inline]
14141	#[target_feature(enable = "avx512fp16,avx512vl")]
14142	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14143	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14144	pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14145	unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14146	}
14147
14148	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14149	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14150	///
14151	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14152	#[inline]
14153	#[target_feature(enable = "avx512fp16,avx512vl")]
14154	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14155	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14156	pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14157	_mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14158	}
14159
14160	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14161	/// results in dst.
14162	///
14163	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14164	#[inline]
14165	#[target_feature(enable = "avx512fp16,avx512vl")]
14166	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14167	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14168	pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14169	_mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:`0xff`, a)
14170	}
14171
14172	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14173	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14174	///
14175	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14176	#[inline]
14177	#[target_feature(enable = "avx512fp16,avx512vl")]
14178	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14179	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14180	pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14181	unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14182	}
14183
14184	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14185	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14186	///
14187	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14188	#[inline]
14189	#[target_feature(enable = "avx512fp16,avx512vl")]
14190	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14191	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14192	pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14193	_mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14194	}
14195
14196	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14197	/// results in dst.
14198	///
14199	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14200	#[inline]
14201	#[target_feature(enable = "avx512fp16")]
14202	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14203	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14204	pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14205	_mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14206	}
14207
14208	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14209	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14210	///
14211	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14212	#[inline]
14213	#[target_feature(enable = "avx512fp16")]
14214	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14215	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14216	pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14217	unsafe {
14218	transmute(src:vcvtph2dq_512(
14219	a,
14220	src.as_i32x16(),
14221	k,
14222	_MM_FROUND_CUR_DIRECTION,
14223	))
14224	}
14225	}
14226
14227	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14228	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14229	///
14230	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14231	#[inline]
14232	#[target_feature(enable = "avx512fp16")]
14233	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14234	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14235	pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14236	_mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14237	}
14238
14239	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14240	/// results in dst.
14241	///
14242	/// Rounding is done according to the rounding parameter, which can be one of:
14243	///
14244	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14245	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14246	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14247	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14248	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14249	///
14250	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14251	#[inline]
14252	#[target_feature(enable = "avx512fp16")]
14253	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14254	#[rustc_legacy_const_generics(`1`)]
14255	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14256	pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14257	static_assert_rounding!(ROUNDING);
14258	_mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14259	}
14260
14261	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14262	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14263	///
14264	/// Rounding is done according to the rounding parameter, which can be one of:
14265	///
14266	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14267	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14268	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14269	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14270	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14271	///
14272	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14273	#[inline]
14274	#[target_feature(enable = "avx512fp16")]
14275	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14276	#[rustc_legacy_const_generics(`3`)]
14277	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14278	pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14279	src: __m512i,
14280	k: __mmask16,
14281	a: __m256h,
14282	) -> __m512i {
14283	unsafe {
14284	static_assert_rounding!(ROUNDING);
14285	transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14286	}
14287	}
14288
14289	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14290	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14291	///
14292	/// Rounding is done according to the rounding parameter, which can be one of:
14293	///
14294	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14295	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14296	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14297	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14298	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14299	///
14300	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14301	#[inline]
14302	#[target_feature(enable = "avx512fp16")]
14303	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14304	#[rustc_legacy_const_generics(`2`)]
14305	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14306	pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14307	static_assert_rounding!(ROUNDING);
14308	_mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14309	}
14310
14311	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14312	/// the result in dst.
14313	///
14314	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14315	#[inline]
14316	#[target_feature(enable = "avx512fp16")]
14317	#[cfg_attr(test, assert_instr(vcvtsh2si))]
14318	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14319	pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14320	unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14321	}
14322
14323	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14324	/// the result in dst.
14325	///
14326	/// Rounding is done according to the rounding parameter, which can be one of:
14327	///
14328	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14329	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14330	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14331	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14332	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14333	///
14334	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14335	#[inline]
14336	#[target_feature(enable = "avx512fp16")]
14337	#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = `8`))]
14338	#[rustc_legacy_const_generics(`1`)]
14339	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14340	pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14341	unsafe {
14342	static_assert_rounding!(ROUNDING);
14343	vcvtsh2si32(a, ROUNDING)
14344	}
14345	}
14346
14347	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14348	/// results in dst.
14349	///
14350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14351	#[inline]
14352	#[target_feature(enable = "avx512fp16,avx512vl")]
14353	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14355	pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14356	_mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:`0xff`, a)
14357	}
14358
14359	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14360	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14361	///
14362	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14363	#[inline]
14364	#[target_feature(enable = "avx512fp16,avx512vl")]
14365	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14366	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14367	pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14368	unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14369	}
14370
14371	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14372	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14373	///
14374	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14375	#[inline]
14376	#[target_feature(enable = "avx512fp16,avx512vl")]
14377	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14378	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14379	pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14380	_mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14381	}
14382
14383	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14384	/// the results in dst.
14385	///
14386	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14387	#[inline]
14388	#[target_feature(enable = "avx512fp16,avx512vl")]
14389	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14390	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14391	pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14392	_mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:`0xff`, a)
14393	}
14394
14395	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14396	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14397	///
14398	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14399	#[inline]
14400	#[target_feature(enable = "avx512fp16,avx512vl")]
14401	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14402	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14403	pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14404	unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14405	}
14406
14407	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14408	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14409	///
14410	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14411	#[inline]
14412	#[target_feature(enable = "avx512fp16,avx512vl")]
14413	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14414	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14415	pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14416	_mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14417	}
14418
14419	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14420	/// the results in dst.
14421	///
14422	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14423	#[inline]
14424	#[target_feature(enable = "avx512fp16")]
14425	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14426	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14427	pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14428	_mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14429	}
14430
14431	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14432	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14433	///
14434	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14435	#[inline]
14436	#[target_feature(enable = "avx512fp16")]
14437	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14438	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14439	pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14440	unsafe {
14441	transmute(src:vcvtph2udq_512(
14442	a,
14443	src.as_u32x16(),
14444	k,
14445	_MM_FROUND_CUR_DIRECTION,
14446	))
14447	}
14448	}
14449
14450	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14451	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14452	///
14453	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14454	#[inline]
14455	#[target_feature(enable = "avx512fp16")]
14456	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14457	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14458	pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14459	_mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14460	}
14461
14462	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14463	/// the results in dst.
14464	///
14465	/// Rounding is done according to the rounding parameter, which can be one of:
14466	///
14467	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14468	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14469	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14470	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14471	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14472	///
14473	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14474	#[inline]
14475	#[target_feature(enable = "avx512fp16")]
14476	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14477	#[rustc_legacy_const_generics(`1`)]
14478	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14479	pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14480	static_assert_rounding!(ROUNDING);
14481	_mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14482	}
14483
14484	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14485	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14486	///
14487	/// Rounding is done according to the rounding parameter, which can be one of:
14488	///
14489	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14490	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14491	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14492	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14493	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14494	///
14495	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14496	#[inline]
14497	#[target_feature(enable = "avx512fp16")]
14498	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14499	#[rustc_legacy_const_generics(`3`)]
14500	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14501	pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14502	src: __m512i,
14503	k: __mmask16,
14504	a: __m256h,
14505	) -> __m512i {
14506	unsafe {
14507	static_assert_rounding!(ROUNDING);
14508	transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14509	}
14510	}
14511
14512	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14513	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14514	///
14515	/// Rounding is done according to the rounding parameter, which can be one of:
14516	///
14517	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14518	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14519	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14520	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14521	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14522	///
14523	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14524	#[inline]
14525	#[target_feature(enable = "avx512fp16")]
14526	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14527	#[rustc_legacy_const_generics(`2`)]
14528	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14529	pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14530	static_assert_rounding!(ROUNDING);
14531	_mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14532	}
14533
14534	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14535	/// the result in dst.
14536	///
14537	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14538	#[inline]
14539	#[target_feature(enable = "avx512fp16")]
14540	#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14541	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14542	pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14543	unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14544	}
14545
14546	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14547	/// the result in dst.
14548	///
14549	/// Rounding is done according to the rounding parameter, which can be one of:
14550	///
14551	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14552	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14553	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14554	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14555	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14556	///
14557	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14558	#[inline]
14559	#[target_feature(enable = "avx512fp16")]
14560	#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = `8`))]
14561	#[rustc_legacy_const_generics(`1`)]
14562	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14563	pub fn _mm_cvt_roundsh_u32<const ROUNDING: i32>(a: __m128h) -> u32 {
14564	unsafe {
14565	static_assert_rounding!(ROUNDING);
14566	vcvtsh2usi32(a, ROUNDING)
14567	}
14568	}
14569
14570	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14571	/// store the results in dst.
14572	///
14573	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14574	#[inline]
14575	#[target_feature(enable = "avx512fp16,avx512vl")]
14576	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14577	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14578	pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14579	_mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:`0xff`, a)
14580	}
14581
14582	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14583	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14584	///
14585	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14586	#[inline]
14587	#[target_feature(enable = "avx512fp16,avx512vl")]
14588	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14589	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14590	pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14591	unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14592	}
14593
14594	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14595	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14596	///
14597	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14598	#[inline]
14599	#[target_feature(enable = "avx512fp16,avx512vl")]
14600	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14601	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14602	pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14603	_mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14604	}
14605
14606	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14607	/// store the results in dst.
14608	///
14609	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14610	#[inline]
14611	#[target_feature(enable = "avx512fp16,avx512vl")]
14612	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14613	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14614	pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14615	_mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:`0xff`, a)
14616	}
14617
14618	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14619	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14620	///
14621	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14622	#[inline]
14623	#[target_feature(enable = "avx512fp16,avx512vl")]
14624	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14625	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14626	pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14627	unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14628	}
14629
14630	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14631	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14632	///
14633	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14634	#[inline]
14635	#[target_feature(enable = "avx512fp16,avx512vl")]
14636	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14637	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14638	pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14639	_mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14640	}
14641
14642	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14643	/// store the results in dst.
14644	///
14645	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14646	#[inline]
14647	#[target_feature(enable = "avx512fp16")]
14648	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14649	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14650	pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14651	_mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14652	}
14653
14654	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14655	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14656	///
14657	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14658	#[inline]
14659	#[target_feature(enable = "avx512fp16")]
14660	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14661	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14662	pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14663	unsafe {
14664	transmute(src:vcvttph2dq_512(
14665	a,
14666	src.as_i32x16(),
14667	k,
14668	_MM_FROUND_CUR_DIRECTION,
14669	))
14670	}
14671	}
14672
14673	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14674	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14675	///
14676	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14677	#[inline]
14678	#[target_feature(enable = "avx512fp16")]
14679	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14680	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14681	pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14682	_mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14683	}
14684
14685	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14686	/// store the results in dst.
14687	///
14688	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14689	///
14690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14691	#[inline]
14692	#[target_feature(enable = "avx512fp16")]
14693	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14694	#[rustc_legacy_const_generics(`1`)]
14695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14696	pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14697	static_assert_sae!(SAE);
14698	_mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14699	}
14700
14701	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14702	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14703	///
14704	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14705	///
14706	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
14707	#[inline]
14708	#[target_feature(enable = "avx512fp16")]
14709	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14710	#[rustc_legacy_const_generics(`3`)]
14711	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14712	pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
14713	src: __m512i,
14714	k: __mmask16,
14715	a: __m256h,
14716	) -> __m512i {
14717	unsafe {
14718	static_assert_sae!(SAE);
14719	transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
14720	}
14721	}
14722
14723	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14724	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14725	///
14726	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14727	///
14728	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
14729	#[inline]
14730	#[target_feature(enable = "avx512fp16")]
14731	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14732	#[rustc_legacy_const_generics(`2`)]
14733	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14734	pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14735	static_assert_sae!(SAE);
14736	_mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
14737	}
14738
14739	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14740	/// the result in dst.
14741	///
14742	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
14743	#[inline]
14744	#[target_feature(enable = "avx512fp16")]
14745	#[cfg_attr(test, assert_instr(vcvttsh2si))]
14746	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14747	pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
14748	unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14749	}
14750
14751	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14752	/// the result in dst.
14753	///
14754	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14755	///
14756	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
14757	#[inline]
14758	#[target_feature(enable = "avx512fp16")]
14759	#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = `8`))]
14760	#[rustc_legacy_const_generics(`1`)]
14761	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14762	pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
14763	unsafe {
14764	static_assert_sae!(SAE);
14765	vcvttsh2si32(a, SAE)
14766	}
14767	}
14768
14769	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14770	/// store the results in dst.
14771	///
14772	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
14773	#[inline]
14774	#[target_feature(enable = "avx512fp16,avx512vl")]
14775	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14776	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14777	pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
14778	_mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:`0xff`, a)
14779	}
14780
14781	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14782	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14783	///
14784	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
14785	#[inline]
14786	#[target_feature(enable = "avx512fp16,avx512vl")]
14787	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14788	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14789	pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14790	unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
14791	}
14792
14793	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14794	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14795	///
14796	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
14797	#[inline]
14798	#[target_feature(enable = "avx512fp16,avx512vl")]
14799	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14800	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14801	pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14802	_mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
14803	}
14804
14805	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14806	/// store the results in dst.
14807	///
14808	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
14809	#[inline]
14810	#[target_feature(enable = "avx512fp16,avx512vl")]
14811	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14812	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14813	pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
14814	_mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:`0xff`, a)
14815	}
14816
14817	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14818	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14819	///
14820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
14821	#[inline]
14822	#[target_feature(enable = "avx512fp16,avx512vl")]
14823	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14824	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14825	pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14826	unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
14827	}
14828
14829	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14830	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14831	///
14832	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
14833	#[inline]
14834	#[target_feature(enable = "avx512fp16,avx512vl")]
14835	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14836	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14837	pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14838	_mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
14839	}
14840
14841	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14842	/// store the results in dst.
14843	///
14844	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
14845	#[inline]
14846	#[target_feature(enable = "avx512fp16")]
14847	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14848	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14849	pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
14850	_mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14851	}
14852
14853	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14854	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14855	///
14856	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
14857	#[inline]
14858	#[target_feature(enable = "avx512fp16")]
14859	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14860	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14861	pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14862	unsafe {
14863	transmute(src:vcvttph2udq_512(
14864	a,
14865	src.as_u32x16(),
14866	k,
14867	_MM_FROUND_CUR_DIRECTION,
14868	))
14869	}
14870	}
14871
14872	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14873	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14874	///
14875	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
14876	#[inline]
14877	#[target_feature(enable = "avx512fp16")]
14878	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14879	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14880	pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14881	_mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
14882	}
14883
14884	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14885	/// store the results in dst.
14886	///
14887	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14888	///
14889	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
14890	#[inline]
14891	#[target_feature(enable = "avx512fp16")]
14892	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14893	#[rustc_legacy_const_generics(`1`)]
14894	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14895	pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
14896	static_assert_sae!(SAE);
14897	_mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14898	}
14899
14900	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14901	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14902	///
14903	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14904	///
14905	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
14906	#[inline]
14907	#[target_feature(enable = "avx512fp16")]
14908	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14909	#[rustc_legacy_const_generics(`3`)]
14910	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14911	pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
14912	src: __m512i,
14913	k: __mmask16,
14914	a: __m256h,
14915	) -> __m512i {
14916	unsafe {
14917	static_assert_sae!(SAE);
14918	transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
14919	}
14920	}
14921
14922	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14923	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14924	///
14925	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14926	///
14927	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
14928	#[inline]
14929	#[target_feature(enable = "avx512fp16")]
14930	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14931	#[rustc_legacy_const_generics(`2`)]
14932	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14933	pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14934	static_assert_sae!(SAE);
14935	_mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
14936	}
14937
14938	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14939	/// the result in dst.
14940	///
14941	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
14942	#[inline]
14943	#[target_feature(enable = "avx512fp16")]
14944	#[cfg_attr(test, assert_instr(vcvttsh2usi))]
14945	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14946	pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
14947	unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14948	}
14949
14950	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14951	/// the result in dst.
14952	///
14953	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14954	///
14955	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
14956	#[inline]
14957	#[target_feature(enable = "avx512fp16")]
14958	#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = `8`))]
14959	#[rustc_legacy_const_generics(`1`)]
14960	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14961	pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14962	unsafe {
14963	static_assert_sae!(SAE);
14964	vcvttsh2usi32(a, SAE)
14965	}
14966	}
14967
14968	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14969	/// store the results in dst.
14970	///
14971	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
14972	#[inline]
14973	#[target_feature(enable = "avx512fp16,avx512vl")]
14974	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14975	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14976	pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
14977	_mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
14978	}
14979
14980	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14981	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14982	///
14983	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
14984	#[inline]
14985	#[target_feature(enable = "avx512fp16,avx512vl")]
14986	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14987	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14988	pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14989	unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
14990	}
14991
14992	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14993	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14994	///
14995	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
14996	#[inline]
14997	#[target_feature(enable = "avx512fp16,avx512vl")]
14998	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14999	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15000	pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15001	_mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
15002	}
15003
15004	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15005	/// store the results in dst.
15006	///
15007	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
15008	#[inline]
15009	#[target_feature(enable = "avx512fp16,avx512vl")]
15010	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15011	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15012	pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15013	_mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
15014	}
15015
15016	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15017	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15018	///
15019	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15020	#[inline]
15021	#[target_feature(enable = "avx512fp16,avx512vl")]
15022	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15023	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15024	pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15025	unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15026	}
15027
15028	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15029	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15030	///
15031	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15032	#[inline]
15033	#[target_feature(enable = "avx512fp16,avx512vl")]
15034	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15035	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15036	pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15037	_mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15038	}
15039
15040	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15041	/// store the results in dst.
15042	///
15043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15044	#[inline]
15045	#[target_feature(enable = "avx512fp16")]
15046	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15047	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15048	pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15049	_mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15050	}
15051
15052	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15053	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15054	///
15055	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15056	#[inline]
15057	#[target_feature(enable = "avx512fp16")]
15058	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15059	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15060	pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15061	unsafe {
15062	transmute(src:vcvtph2qq_512(
15063	a,
15064	src.as_i64x8(),
15065	k,
15066	_MM_FROUND_CUR_DIRECTION,
15067	))
15068	}
15069	}
15070
15071	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15072	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15073	///
15074	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15075	#[inline]
15076	#[target_feature(enable = "avx512fp16")]
15077	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15078	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15079	pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15080	_mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15081	}
15082
15083	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15084	/// store the results in dst.
15085	///
15086	/// Rounding is done according to the rounding parameter, which can be one of:
15087	///
15088	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15089	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15090	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15091	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15092	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15093	///
15094	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15095	#[inline]
15096	#[target_feature(enable = "avx512fp16")]
15097	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15098	#[rustc_legacy_const_generics(`1`)]
15099	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15100	pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15101	static_assert_rounding!(ROUNDING);
15102	_mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15103	}
15104
15105	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15106	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15107	///
15108	/// Rounding is done according to the rounding parameter, which can be one of:
15109	///
15110	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15111	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15112	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15113	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15114	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15115	///
15116	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15117	#[inline]
15118	#[target_feature(enable = "avx512fp16")]
15119	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15120	#[rustc_legacy_const_generics(`3`)]
15121	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15122	pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15123	src: __m512i,
15124	k: __mmask8,
15125	a: __m128h,
15126	) -> __m512i {
15127	unsafe {
15128	static_assert_rounding!(ROUNDING);
15129	transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15130	}
15131	}
15132
15133	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15134	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15135	///
15136	/// Rounding is done according to the rounding parameter, which can be one of:
15137	///
15138	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143	///
15144	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15145	#[inline]
15146	#[target_feature(enable = "avx512fp16")]
15147	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15148	#[rustc_legacy_const_generics(`2`)]
15149	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15150	pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15151	static_assert_rounding!(ROUNDING);
15152	_mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15153	}
15154
15155	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15156	/// store the results in dst.
15157	///
15158	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15159	#[inline]
15160	#[target_feature(enable = "avx512fp16,avx512vl")]
15161	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15162	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15163	pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15164	_mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
15165	}
15166
15167	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15168	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15169	///
15170	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15171	#[inline]
15172	#[target_feature(enable = "avx512fp16,avx512vl")]
15173	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15174	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15175	pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15176	unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15177	}
15178
15179	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15180	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15181	///
15182	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15183	#[inline]
15184	#[target_feature(enable = "avx512fp16,avx512vl")]
15185	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15186	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15187	pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15188	_mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15189	}
15190
15191	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15192	/// store the results in dst.
15193	///
15194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15195	#[inline]
15196	#[target_feature(enable = "avx512fp16,avx512vl")]
15197	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15198	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15199	pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15200	_mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
15201	}
15202
15203	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15204	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15205	///
15206	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15207	#[inline]
15208	#[target_feature(enable = "avx512fp16,avx512vl")]
15209	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15210	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15211	pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15212	unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15213	}
15214
15215	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15216	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15217	///
15218	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15219	#[inline]
15220	#[target_feature(enable = "avx512fp16,avx512vl")]
15221	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15222	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15223	pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15224	_mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15225	}
15226
15227	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15228	/// store the results in dst.
15229	///
15230	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15231	#[inline]
15232	#[target_feature(enable = "avx512fp16")]
15233	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15234	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15235	pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15236	_mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15237	}
15238
15239	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15240	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15241	///
15242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15243	#[inline]
15244	#[target_feature(enable = "avx512fp16")]
15245	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15247	pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15248	unsafe {
15249	transmute(src:vcvtph2uqq_512(
15250	a,
15251	src.as_u64x8(),
15252	k,
15253	_MM_FROUND_CUR_DIRECTION,
15254	))
15255	}
15256	}
15257
15258	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15259	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15260	///
15261	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15262	#[inline]
15263	#[target_feature(enable = "avx512fp16")]
15264	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15265	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15266	pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15267	_mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15268	}
15269
15270	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15271	/// store the results in dst.
15272	///
15273	/// Rounding is done according to the rounding parameter, which can be one of:
15274	///
15275	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15276	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15277	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15278	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15279	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15280	///
15281	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15282	#[inline]
15283	#[target_feature(enable = "avx512fp16")]
15284	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15285	#[rustc_legacy_const_generics(`1`)]
15286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15287	pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15288	static_assert_rounding!(ROUNDING);
15289	_mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15290	}
15291
15292	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15293	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15294	///
15295	/// Rounding is done according to the rounding parameter, which can be one of:
15296	///
15297	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15298	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15299	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15300	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15301	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15302	///
15303	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15304	#[inline]
15305	#[target_feature(enable = "avx512fp16")]
15306	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15307	#[rustc_legacy_const_generics(`3`)]
15308	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15309	pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15310	src: __m512i,
15311	k: __mmask8,
15312	a: __m128h,
15313	) -> __m512i {
15314	unsafe {
15315	static_assert_rounding!(ROUNDING);
15316	transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15317	}
15318	}
15319
15320	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15321	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15322	///
15323	/// Rounding is done according to the rounding parameter, which can be one of:
15324	///
15325	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15326	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15327	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15328	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15329	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15330	///
15331	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15332	#[inline]
15333	#[target_feature(enable = "avx512fp16")]
15334	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15335	#[rustc_legacy_const_generics(`2`)]
15336	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15337	pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15338	static_assert_rounding!(ROUNDING);
15339	_mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15340	}
15341
15342	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15343	/// store the results in dst.
15344	///
15345	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15346	#[inline]
15347	#[target_feature(enable = "avx512fp16,avx512vl")]
15348	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15349	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15350	pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15351	_mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
15352	}
15353
15354	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15355	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15356	///
15357	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15358	#[inline]
15359	#[target_feature(enable = "avx512fp16,avx512vl")]
15360	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15361	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15362	pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15363	unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15364	}
15365
15366	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15367	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15368	///
15369	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15370	#[inline]
15371	#[target_feature(enable = "avx512fp16,avx512vl")]
15372	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15373	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15374	pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15375	_mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15376	}
15377
15378	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15379	/// store the results in dst.
15380	///
15381	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15382	#[inline]
15383	#[target_feature(enable = "avx512fp16,avx512vl")]
15384	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15385	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15386	pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15387	_mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
15388	}
15389
15390	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15391	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15392	///
15393	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15394	#[inline]
15395	#[target_feature(enable = "avx512fp16,avx512vl")]
15396	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15397	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15398	pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15399	unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15400	}
15401
15402	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15403	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15404	///
15405	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15406	#[inline]
15407	#[target_feature(enable = "avx512fp16,avx512vl")]
15408	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15409	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15410	pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15411	_mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15412	}
15413
15414	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15415	/// store the results in dst.
15416	///
15417	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15418	#[inline]
15419	#[target_feature(enable = "avx512fp16")]
15420	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15421	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15422	pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15423	_mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15424	}
15425
15426	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15427	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428	///
15429	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15430	#[inline]
15431	#[target_feature(enable = "avx512fp16")]
15432	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15433	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15434	pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15435	unsafe {
15436	transmute(src:vcvttph2qq_512(
15437	a,
15438	src.as_i64x8(),
15439	k,
15440	_MM_FROUND_CUR_DIRECTION,
15441	))
15442	}
15443	}
15444
15445	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15446	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15447	///
15448	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15449	#[inline]
15450	#[target_feature(enable = "avx512fp16")]
15451	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15452	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15453	pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15454	_mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15455	}
15456
15457	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15458	/// store the results in dst.
15459	///
15460	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15461	///
15462	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15463	#[inline]
15464	#[target_feature(enable = "avx512fp16")]
15465	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15466	#[rustc_legacy_const_generics(`1`)]
15467	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15468	pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15469	static_assert_sae!(SAE);
15470	_mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15471	}
15472
15473	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15474	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15475	///
15476	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15477	///
15478	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15479	#[inline]
15480	#[target_feature(enable = "avx512fp16")]
15481	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15482	#[rustc_legacy_const_generics(`3`)]
15483	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15484	pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15485	src: __m512i,
15486	k: __mmask8,
15487	a: __m128h,
15488	) -> __m512i {
15489	unsafe {
15490	static_assert_sae!(SAE);
15491	transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15492	}
15493	}
15494
15495	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15496	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15497	///
15498	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15499	///
15500	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15501	#[inline]
15502	#[target_feature(enable = "avx512fp16")]
15503	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15504	#[rustc_legacy_const_generics(`2`)]
15505	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15506	pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15507	static_assert_sae!(SAE);
15508	_mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15509	}
15510
15511	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15512	/// store the results in dst.
15513	///
15514	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15515	#[inline]
15516	#[target_feature(enable = "avx512fp16,avx512vl")]
15517	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15518	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15519	pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15520	_mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
15521	}
15522
15523	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15524	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15525	///
15526	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15527	#[inline]
15528	#[target_feature(enable = "avx512fp16,avx512vl")]
15529	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15530	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15531	pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15532	unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15533	}
15534
15535	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15536	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15537	///
15538	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15539	#[inline]
15540	#[target_feature(enable = "avx512fp16,avx512vl")]
15541	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15542	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15543	pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15544	_mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15545	}
15546
15547	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15548	/// store the results in dst.
15549	///
15550	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15551	#[inline]
15552	#[target_feature(enable = "avx512fp16,avx512vl")]
15553	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15554	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15555	pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15556	_mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
15557	}
15558
15559	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15560	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15561	///
15562	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15563	#[inline]
15564	#[target_feature(enable = "avx512fp16,avx512vl")]
15565	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15566	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15567	pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15568	unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15569	}
15570
15571	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15572	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15573	///
15574	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15575	#[inline]
15576	#[target_feature(enable = "avx512fp16,avx512vl")]
15577	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15578	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15579	pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15580	_mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15581	}
15582
15583	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15584	/// store the results in dst.
15585	///
15586	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15587	#[inline]
15588	#[target_feature(enable = "avx512fp16")]
15589	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15590	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15591	pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15592	_mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15593	}
15594
15595	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15596	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15597	///
15598	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15599	#[inline]
15600	#[target_feature(enable = "avx512fp16")]
15601	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15602	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15603	pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15604	unsafe {
15605	transmute(src:vcvttph2uqq_512(
15606	a,
15607	src.as_u64x8(),
15608	k,
15609	_MM_FROUND_CUR_DIRECTION,
15610	))
15611	}
15612	}
15613
15614	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15615	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15616	///
15617	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15618	#[inline]
15619	#[target_feature(enable = "avx512fp16")]
15620	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15621	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15622	pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15623	_mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15624	}
15625
15626	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15627	/// store the results in dst.
15628	///
15629	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15630	///
15631	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15632	#[inline]
15633	#[target_feature(enable = "avx512fp16")]
15634	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15635	#[rustc_legacy_const_generics(`1`)]
15636	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15637	pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15638	static_assert_sae!(SAE);
15639	_mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15640	}
15641
15642	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15643	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15644	///
15645	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15646	///
15647	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15648	#[inline]
15649	#[target_feature(enable = "avx512fp16")]
15650	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15651	#[rustc_legacy_const_generics(`3`)]
15652	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15653	pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15654	src: __m512i,
15655	k: __mmask8,
15656	a: __m128h,
15657	) -> __m512i {
15658	unsafe {
15659	static_assert_sae!(SAE);
15660	transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15661	}
15662	}
15663
15664	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15665	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15666	///
15667	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15668	///
15669	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15670	#[inline]
15671	#[target_feature(enable = "avx512fp16")]
15672	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15673	#[rustc_legacy_const_generics(`2`)]
15674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15675	pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15676	static_assert_sae!(SAE);
15677	_mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15678	}
15679
15680	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15681	/// floating-point elements, and store the results in dst.
15682	///
15683	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15684	#[inline]
15685	#[target_feature(enable = "avx512fp16,avx512vl")]
15686	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15687	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15688	pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15689	_mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:`0xff`, a)
15690	}
15691
15692	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15693	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15694	/// dst when the corresponding mask bit is not set).
15695	///
15696	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15697	#[inline]
15698	#[target_feature(enable = "avx512fp16,avx512vl")]
15699	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15700	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15701	pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
15702	unsafe { vcvtph2psx_128(a, src, k) }
15703	}
15704
15705	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15706	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15707	/// corresponding mask bit is not set).
15708	///
15709	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
15710	#[inline]
15711	#[target_feature(enable = "avx512fp16,avx512vl")]
15712	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15713	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15714	pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
15715	_mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
15716	}
15717
15718	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15719	/// floating-point elements, and store the results in dst.
15720	///
15721	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
15722	#[inline]
15723	#[target_feature(enable = "avx512fp16,avx512vl")]
15724	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15725	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15726	pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
15727	_mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:`0xff`, a)
15728	}
15729
15730	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15731	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15732	/// dst when the corresponding mask bit is not set).
15733	///
15734	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
15735	#[inline]
15736	#[target_feature(enable = "avx512fp16,avx512vl")]
15737	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15738	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15739	pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
15740	unsafe { vcvtph2psx_256(a, src, k) }
15741	}
15742
15743	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15744	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15745	/// corresponding mask bit is not set).
15746	///
15747	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
15748	#[inline]
15749	#[target_feature(enable = "avx512fp16,avx512vl")]
15750	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15751	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15752	pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
15753	_mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
15754	}
15755
15756	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15757	/// floating-point elements, and store the results in dst.
15758	///
15759	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
15760	#[inline]
15761	#[target_feature(enable = "avx512fp16")]
15762	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15763	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15764	pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
15765	_mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:`0xffff`, a)
15766	}
15767
15768	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15769	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15770	/// dst when the corresponding mask bit is not set).
15771	///
15772	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
15773	#[inline]
15774	#[target_feature(enable = "avx512fp16")]
15775	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15776	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15777	pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
15778	unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
15779	}
15780
15781	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15782	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15783	/// corresponding mask bit is not set).
15784	///
15785	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
15786	#[inline]
15787	#[target_feature(enable = "avx512fp16")]
15788	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15789	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15790	pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
15791	_mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
15792	}
15793
15794	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15795	/// floating-point elements, and store the results in dst.
15796	///
15797	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15798	///
15799	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
15800	#[inline]
15801	#[target_feature(enable = "avx512fp16")]
15802	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15803	#[rustc_legacy_const_generics(`1`)]
15804	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15805	pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
15806	static_assert_sae!(SAE);
15807	_mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:`0xffff`, a)
15808	}
15809
15810	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15811	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15812	/// dst when the corresponding mask bit is not set).
15813	///
15814	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15815	///
15816	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
15817	#[inline]
15818	#[target_feature(enable = "avx512fp16")]
15819	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15820	#[rustc_legacy_const_generics(`3`)]
15821	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15822	pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
15823	src: __m512,
15824	k: __mmask16,
15825	a: __m256h,
15826	) -> __m512 {
15827	unsafe {
15828	static_assert_sae!(SAE);
15829	vcvtph2psx_512(a, src, k, SAE)
15830	}
15831	}
15832
15833	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15834	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15835	/// corresponding mask bit is not set).
15836	///
15837	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15838	///
15839	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
15840	#[inline]
15841	#[target_feature(enable = "avx512fp16")]
15842	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15843	#[rustc_legacy_const_generics(`2`)]
15844	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15845	pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
15846	static_assert_sae!(SAE);
15847	_mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
15848	}
15849
15850	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15851	/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
15852	/// elements from a to the upper elements of dst.
15853	///
15854	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
15855	#[inline]
15856	#[target_feature(enable = "avx512fp16")]
15857	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15858	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15859	pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
15860	_mm_mask_cvtsh_ss(src:a, k:`0xff`, a, b)
15861	}
15862
15863	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15864	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15865	/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15866	/// upper elements of dst.
15867	///
15868	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
15869	#[inline]
15870	#[target_feature(enable = "avx512fp16")]
15871	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15872	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15873	pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15874	unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
15875	}
15876
15877	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15878	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15879	/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15880	/// of dst.
15881	///
15882	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
15883	#[inline]
15884	#[target_feature(enable = "avx512fp16")]
15885	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15886	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15887	pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15888	_mm_mask_cvtsh_ss(src:_mm_setzero_ps(), k, a, b)
15889	}
15890
15891	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15892	/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
15893	/// from a to the upper elements of dst.
15894	///
15895	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15896	///
15897	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
15898	#[inline]
15899	#[target_feature(enable = "avx512fp16")]
15900	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15901	#[rustc_legacy_const_generics(`2`)]
15902	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15903	pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
15904	static_assert_sae!(SAE);
15905	_mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:`0xff`, a, b)
15906	}
15907
15908	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15909	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15910	/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15911	/// upper elements of dst.
15912	///
15913	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15914	///
15915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
15916	#[inline]
15917	#[target_feature(enable = "avx512fp16")]
15918	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15919	#[rustc_legacy_const_generics(`4`)]
15920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15921	pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
15922	src: __m128,
15923	k: __mmask8,
15924	a: __m128,
15925	b: __m128h,
15926	) -> __m128 {
15927	unsafe {
15928	static_assert_sae!(SAE);
15929	vcvtsh2ss(a, b, src, k, SAE)
15930	}
15931	}
15932
15933	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15934	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15935	/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15936	/// of dst.
15937	///
15938	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15939	///
15940	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
15941	#[inline]
15942	#[target_feature(enable = "avx512fp16")]
15943	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15944	#[rustc_legacy_const_generics(`3`)]
15945	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15946	pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15947	static_assert_sae!(SAE);
15948	_mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_setzero_ps(), k, a, b)
15949	}
15950
15951	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15952	/// floating-point elements, and store the results in dst.
15953	///
15954	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
15955	#[inline]
15956	#[target_feature(enable = "avx512fp16,avx512vl")]
15957	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15958	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15959	pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
15960	_mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:`0xff`, a)
15961	}
15962
15963	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15964	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15965	/// dst when the corresponding mask bit is not set).
15966	///
15967	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
15968	#[inline]
15969	#[target_feature(enable = "avx512fp16,avx512vl")]
15970	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15971	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15972	pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
15973	unsafe { vcvtph2pd_128(a, src, k) }
15974	}
15975
15976	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15977	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15978	/// corresponding mask bit is not set).
15979	///
15980	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
15981	#[inline]
15982	#[target_feature(enable = "avx512fp16,avx512vl")]
15983	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15984	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15985	pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
15986	_mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
15987	}
15988
15989	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15990	/// floating-point elements, and store the results in dst.
15991	///
15992	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
15993	#[inline]
15994	#[target_feature(enable = "avx512fp16,avx512vl")]
15995	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15996	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15997	pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
15998	_mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:`0xff`, a)
15999	}
16000
16001	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16002	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16003	/// dst when the corresponding mask bit is not set).
16004	///
16005	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
16006	#[inline]
16007	#[target_feature(enable = "avx512fp16,avx512vl")]
16008	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16009	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16010	pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16011	unsafe { vcvtph2pd_256(a, src, k) }
16012	}
16013
16014	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16015	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16016	/// corresponding mask bit is not set).
16017	///
16018	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16019	#[inline]
16020	#[target_feature(enable = "avx512fp16,avx512vl")]
16021	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16022	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16023	pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16024	_mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16025	}
16026
16027	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16028	/// floating-point elements, and store the results in dst.
16029	///
16030	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16031	#[inline]
16032	#[target_feature(enable = "avx512fp16")]
16033	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16034	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16035	pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16036	_mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:`0xff`, a)
16037	}
16038
16039	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16040	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16041	/// dst when the corresponding mask bit is not set).
16042	///
16043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16044	#[inline]
16045	#[target_feature(enable = "avx512fp16")]
16046	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16047	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16048	pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16049	unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16050	}
16051
16052	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16053	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16054	/// corresponding mask bit is not set).
16055	///
16056	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16057	#[inline]
16058	#[target_feature(enable = "avx512fp16")]
16059	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16060	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16061	pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16062	_mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16063	}
16064
16065	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16066	/// floating-point elements, and store the results in dst.
16067	///
16068	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16069	///
16070	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16071	#[inline]
16072	#[target_feature(enable = "avx512fp16")]
16073	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16074	#[rustc_legacy_const_generics(`1`)]
16075	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16076	pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16077	static_assert_sae!(SAE);
16078	_mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:`0xff`, a)
16079	}
16080
16081	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16082	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16083	/// dst when the corresponding mask bit is not set).
16084	///
16085	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16086	///
16087	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16088	#[inline]
16089	#[target_feature(enable = "avx512fp16")]
16090	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16091	#[rustc_legacy_const_generics(`3`)]
16092	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16093	pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16094	src: __m512d,
16095	k: __mmask8,
16096	a: __m128h,
16097	) -> __m512d {
16098	unsafe {
16099	static_assert_sae!(SAE);
16100	vcvtph2pd_512(a, src, k, SAE)
16101	}
16102	}
16103
16104	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16105	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16106	/// corresponding mask bit is not set).
16107	///
16108	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16109	///
16110	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16111	#[inline]
16112	#[target_feature(enable = "avx512fp16")]
16113	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16114	#[rustc_legacy_const_generics(`2`)]
16115	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16116	pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16117	static_assert_sae!(SAE);
16118	_mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16119	}
16120
16121	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16122	/// floating-point element, store the result in the lower element of dst, and copy the upper element
16123	/// from a to the upper element of dst.
16124	///
16125	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16126	#[inline]
16127	#[target_feature(enable = "avx512fp16")]
16128	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16129	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16130	pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16131	_mm_mask_cvtsh_sd(src:a, k:`0xff`, a, b)
16132	}
16133
16134	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16135	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16136	/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16137	/// of dst.
16138	///
16139	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16140	#[inline]
16141	#[target_feature(enable = "avx512fp16")]
16142	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16143	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16144	pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16145	unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16146	}
16147
16148	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16149	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16150	/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16151	///
16152	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16153	#[inline]
16154	#[target_feature(enable = "avx512fp16")]
16155	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16156	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16157	pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16158	_mm_mask_cvtsh_sd(src:_mm_setzero_pd(), k, a, b)
16159	}
16160
16161	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16162	/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16163	/// to the upper element of dst.
16164	///
16165	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16166	///
16167	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16168	#[inline]
16169	#[target_feature(enable = "avx512fp16")]
16170	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16171	#[rustc_legacy_const_generics(`2`)]
16172	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16173	pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16174	static_assert_sae!(SAE);
16175	_mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:`0xff`, a, b)
16176	}
16177
16178	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16179	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16180	/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16181	/// of dst.
16182	///
16183	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16184	///
16185	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16186	#[inline]
16187	#[target_feature(enable = "avx512fp16")]
16188	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16189	#[rustc_legacy_const_generics(`4`)]
16190	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16191	pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16192	src: __m128d,
16193	k: __mmask8,
16194	a: __m128d,
16195	b: __m128h,
16196	) -> __m128d {
16197	unsafe {
16198	static_assert_sae!(SAE);
16199	vcvtsh2sd(a, b, src, k, SAE)
16200	}
16201	}
16202
16203	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16204	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16205	/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16206	///
16207	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16208	///
16209	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16210	#[inline]
16211	#[target_feature(enable = "avx512fp16")]
16212	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16213	#[rustc_legacy_const_generics(`3`)]
16214	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16215	pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16216	static_assert_sae!(SAE);
16217	_mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_setzero_pd(), k, a, b)
16218	}
16219
16220	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16221	///
16222	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16223	#[inline]
16224	#[target_feature(enable = "avx512fp16")]
16225	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16226	pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
16227	unsafe { simd_extract!(a, `0`) }
16228	}
16229
16230	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16231	///
16232	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16233	#[inline]
16234	#[target_feature(enable = "avx512fp16")]
16235	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16236	pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16237	unsafe { simd_extract!(a, `0`) }
16238	}
16239
16240	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16241	///
16242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16243	#[inline]
16244	#[target_feature(enable = "avx512fp16")]
16245	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16246	pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16247	unsafe { simd_extract!(a, `0`) }
16248	}
16249
16250	/// Copy the lower 16-bit integer in a to dst.
16251	///
16252	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16253	#[inline]
16254	#[target_feature(enable = "avx512fp16")]
16255	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16256	pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16257	unsafe { simd_extract!(a.as_i16x8(), `0`) }
16258	}
16259
16260	/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16261	///
16262	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16263	#[inline]
16264	#[target_feature(enable = "avx512fp16")]
16265	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16266	pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16267	unsafe { transmute(src:simd_insert!(i16x8::ZERO, `0`, a)) }
16268	}
16269
16270	#[allow(improper_ctypes)]
16271	unsafe extern "C" {
16272	#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16273	unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16274	#[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16275	unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16276
16277	#[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16278	unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16279	#[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16280	unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16281	#[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16282	unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16283	#[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16284	unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16285
16286	#[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16287	unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16288	#[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16289	unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16290	#[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16291	unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16292	#[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16293	unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16294
16295	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16296	unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16297	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16298	unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16299	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16300	unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16301	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16302	unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16303
16304	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16305	unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16306	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16307	unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16308	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16309	unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16310	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16311	unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16312
16313	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16314	unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16315	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16316	unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16317	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16318	unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16319	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16320	unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16321	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16322	unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16323	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16324	unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16325	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16326	unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16327	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16328	unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16329
16330	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16331	unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16332	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16333	unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16334	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16335	unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16336	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16337	unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16338	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16339	unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16340	-> __m512;
16341	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16342	unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16343	-> __m512;
16344	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16345	unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16346	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16347	unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16348
16349	#[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16350	unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16351	#[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16352	unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16353
16354	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
16355	unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
16356	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
16357	unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
16358	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16359	unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16360
16361	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16362	unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16363	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16364	unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16365	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16366	unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16367	#[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16368	unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16369
16370	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16371	unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16372	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16373	unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16374	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16375	unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16376	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16377	unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16378
16379	#[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16380	unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16381	#[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16382	unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16383
16384	#[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16385	unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16386	#[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16387	unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16388	#[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16389	unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16390	#[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16391	unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16392
16393	#[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16394	unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16395	#[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16396	unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16397	#[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16398	unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16399	#[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16400	unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16401
16402	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16403	unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16404	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16405	unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16406	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16407	unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16408	#[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16409	unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16410
16411	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16412	unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16413	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16414	unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16415	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16416	unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16417	#[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16418	unsafefn vgetmantsh(
16419	a: __m128h,
16420	b: __m128h,
16421	imm8: i32,
16422	src: __m128h,
16423	k: __mmask8,
16424	sae: i32,
16425	) -> __m128h;
16426
16427	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16428	unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16429	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16430	unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16431	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16432	unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16433	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16434	unsafefn vrndscalesh(
16435	a: __m128h,
16436	b: __m128h,
16437	src: __m128h,
16438	k: __mmask8,
16439	imm8: i32,
16440	sae: i32,
16441	) -> __m128h;
16442
16443	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16444	unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16445	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16446	unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16447	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16448	unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16449	#[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16450	unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16451
16452	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16453	unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16454	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16455	unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16456	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16457	unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16458	#[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16459	unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16460	-> __m128h;
16461
16462	#[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16463	unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16464
16465	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16466	unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16467	#[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16468	unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16469	#[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16470	unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16471	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u16"]
16472	unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16473	#[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u16"]
16474	unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16475	#[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32u16"]
16476	unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16477
16478	#[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16479	unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16480	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16481	unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16482	#[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16483	unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16484	#[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16485	unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16486	#[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16487	unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16488	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u32"]
16489	unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16490	#[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u32"]
16491	unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16492	#[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16493	unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16494
16495	#[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16496	unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16497	#[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16498	unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16499	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16500	unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16501	#[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16502	unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16503	#[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16504	unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16505	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u64"]
16506	unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16507
16508	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16509	unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16510	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16511	unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16512	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16513	unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16514	#[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16515	unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16516
16517	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16518	unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16519	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16520	unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16521	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16522	unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16523	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16524	unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16525
16526	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16527	unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16528	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16529	unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16530	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16531	unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16532	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16533	unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16534	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16535	unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16536	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16537	unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32;
16538
16539	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16540	unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16541	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16542	unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16543	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16544	unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16545	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16546	unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16547	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16548	unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16549	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16550	unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16551
16552	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16553	unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16554	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16555	unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16556	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16557	unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16558	#[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16559	unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16560	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16561	unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16562	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16563	unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16564	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16565	unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16566	#[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16567	unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16568
16569	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16570	unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16571	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16572	unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16573	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16574	unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16575	#[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16576	unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16577	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16578	unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16579	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16580	unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16581	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16582	unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16583	#[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16584	unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16585
16586	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16587	unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16588	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16589	unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16590	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16591	unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16592	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16593	unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16594	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16595	unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16596	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16597	unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16598
16599	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16600	unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16601	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16602	unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16603	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16604	unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16605	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16606	unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16607	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16608	unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16609	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16610	unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16611
16612	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16613	unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16614	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16615	unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16616	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16617	unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16618	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16619	unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16620
16621	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16622	unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16623	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16624	unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16625	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16626	unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16627	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16628	unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16629
16630	}
16631
16632	#[cfg(test)]
16633	mod tests {
16634	use crate::core_arch::x86::*;
16635	use crate::mem::transmute;
16636	use crate::ptr::{addr_of, addr_of_mut};
16637	use stdarch_test::simd_test;
16638
16639	#[target_feature(enable = "avx512fp16")]
16640	unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16641	_mm_setr_ph(re, im, re, im, re, im, re, im)
16642	}
16643
16644	#[target_feature(enable = "avx512fp16")]
16645	unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16646	_mm256_setr_ph(
16647	re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16648	)
16649	}
16650
16651	#[target_feature(enable = "avx512fp16")]
16652	unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16653	_mm512_setr_ph(
16654	re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16655	re, im, re, im, re, im, re, im, re, im,
16656	)
16657	}
16658
16659	#[simd_test(enable = "avx512fp16")]
16660	unsafe fn test_mm_set_ph() {
16661	let r = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
16662	let e = _mm_setr_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
16663	assert_eq_m128h(r, e);
16664	}
16665
16666	#[simd_test(enable = "avx512fp16")]
16667	unsafe fn test_mm256_set_ph() {
16668	let r = _mm256_set_ph(
16669	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16670	);
16671	let e = _mm256_setr_ph(
16672	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
16673	);
16674	assert_eq_m256h(r, e);
16675	}
16676
16677	#[simd_test(enable = "avx512fp16")]
16678	unsafe fn test_mm512_set_ph() {
16679	let r = _mm512_set_ph(
16680	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16681	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
16682	`31.0`, `32.0`,
16683	);
16684	let e = _mm512_setr_ph(
16685	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
16686	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
16687	`3.0`, `2.0`, `1.0`,
16688	);
16689	assert_eq_m512h(r, e);
16690	}
16691
16692	#[simd_test(enable = "avx512fp16")]
16693	unsafe fn test_mm_set_sh() {
16694	let r = _mm_set_sh(`1.0`);
16695	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`);
16696	assert_eq_m128h(r, e);
16697	}
16698
16699	#[simd_test(enable = "avx512fp16")]
16700	unsafe fn test_mm_set1_ph() {
16701	let r = _mm_set1_ph(`1.0`);
16702	let e = _mm_set_ph(`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`);
16703	assert_eq_m128h(r, e);
16704	}
16705
16706	#[simd_test(enable = "avx512fp16")]
16707	unsafe fn test_mm256_set1_ph() {
16708	let r = _mm256_set1_ph(`1.0`);
16709	let e = _mm256_set_ph(
16710	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16711	);
16712	assert_eq_m256h(r, e);
16713	}
16714
16715	#[simd_test(enable = "avx512fp16")]
16716	unsafe fn test_mm512_set1_ph() {
16717	let r = _mm512_set1_ph(`1.0`);
16718	let e = _mm512_set_ph(
16719	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16720	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16721	);
16722	assert_eq_m512h(r, e);
16723	}
16724
16725	#[simd_test(enable = "avx512fp16")]
16726	unsafe fn test_mm_setr_ph() {
16727	let r = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
16728	let e = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
16729	assert_eq_m128h(r, e);
16730	}
16731
16732	#[simd_test(enable = "avx512fp16")]
16733	unsafe fn test_mm256_setr_ph() {
16734	let r = _mm256_setr_ph(
16735	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16736	);
16737	let e = _mm256_set_ph(
16738	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
16739	);
16740	assert_eq_m256h(r, e);
16741	}
16742
16743	#[simd_test(enable = "avx512fp16")]
16744	unsafe fn test_mm512_setr_ph() {
16745	let r = _mm512_setr_ph(
16746	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16747	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
16748	`31.0`, `32.0`,
16749	);
16750	let e = _mm512_set_ph(
16751	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
16752	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
16753	`3.0`, `2.0`, `1.0`,
16754	);
16755	assert_eq_m512h(r, e);
16756	}
16757
16758	#[simd_test(enable = "avx512fp16,avx512vl")]
16759	unsafe fn test_mm_setzero_ph() {
16760	let r = _mm_setzero_ph();
16761	let e = _mm_set1_ph(`0.0`);
16762	assert_eq_m128h(r, e);
16763	}
16764
16765	#[simd_test(enable = "avx512fp16,avx512vl")]
16766	unsafe fn test_mm256_setzero_ph() {
16767	let r = _mm256_setzero_ph();
16768	let e = _mm256_set1_ph(`0.0`);
16769	assert_eq_m256h(r, e);
16770	}
16771
16772	#[simd_test(enable = "avx512fp16")]
16773	unsafe fn test_mm512_setzero_ph() {
16774	let r = _mm512_setzero_ph();
16775	let e = _mm512_set1_ph(`0.0`);
16776	assert_eq_m512h(r, e);
16777	}
16778
16779	#[simd_test(enable = "avx512fp16")]
16780	unsafe fn test_mm_castsi128_ph() {
16781	let a = _mm_set1_epi16(`0x3c00`);
16782	let r = _mm_castsi128_ph(a);
16783	let e = _mm_set1_ph(`1.0`);
16784	assert_eq_m128h(r, e);
16785	}
16786
16787	#[simd_test(enable = "avx512fp16")]
16788	unsafe fn test_mm256_castsi256_ph() {
16789	let a = _mm256_set1_epi16(`0x3c00`);
16790	let r = _mm256_castsi256_ph(a);
16791	let e = _mm256_set1_ph(`1.0`);
16792	assert_eq_m256h(r, e);
16793	}
16794
16795	#[simd_test(enable = "avx512fp16")]
16796	unsafe fn test_mm512_castsi512_ph() {
16797	let a = _mm512_set1_epi16(`0x3c00`);
16798	let r = _mm512_castsi512_ph(a);
16799	let e = _mm512_set1_ph(`1.0`);
16800	assert_eq_m512h(r, e);
16801	}
16802
16803	#[simd_test(enable = "avx512fp16")]
16804	unsafe fn test_mm_castph_si128() {
16805	let a = _mm_set1_ph(`1.0`);
16806	let r = _mm_castph_si128(a);
16807	let e = _mm_set1_epi16(`0x3c00`);
16808	assert_eq_m128i(r, e);
16809	}
16810
16811	#[simd_test(enable = "avx512fp16")]
16812	unsafe fn test_mm256_castph_si256() {
16813	let a = _mm256_set1_ph(`1.0`);
16814	let r = _mm256_castph_si256(a);
16815	let e = _mm256_set1_epi16(`0x3c00`);
16816	assert_eq_m256i(r, e);
16817	}
16818
16819	#[simd_test(enable = "avx512fp16")]
16820	unsafe fn test_mm512_castph_si512() {
16821	let a = _mm512_set1_ph(`1.0`);
16822	let r = _mm512_castph_si512(a);
16823	let e = _mm512_set1_epi16(`0x3c00`);
16824	assert_eq_m512i(r, e);
16825	}
16826
16827	#[simd_test(enable = "avx512fp16")]
16828	unsafe fn test_mm_castps_ph() {
16829	let a = _mm_castsi128_ps(_mm_set1_epi16(`0x3c00`));
16830	let r = _mm_castps_ph(a);
16831	let e = _mm_set1_ph(`1.0`);
16832	assert_eq_m128h(r, e);
16833	}
16834
16835	#[simd_test(enable = "avx512fp16")]
16836	unsafe fn test_mm256_castps_ph() {
16837	let a = _mm256_castsi256_ps(_mm256_set1_epi16(`0x3c00`));
16838	let r = _mm256_castps_ph(a);
16839	let e = _mm256_set1_ph(`1.0`);
16840	assert_eq_m256h(r, e);
16841	}
16842
16843	#[simd_test(enable = "avx512fp16")]
16844	unsafe fn test_mm512_castps_ph() {
16845	let a = _mm512_castsi512_ps(_mm512_set1_epi16(`0x3c00`));
16846	let r = _mm512_castps_ph(a);
16847	let e = _mm512_set1_ph(`1.0`);
16848	assert_eq_m512h(r, e);
16849	}
16850
16851	#[simd_test(enable = "avx512fp16")]
16852	unsafe fn test_mm_castph_ps() {
16853	let a = _mm_castsi128_ph(_mm_set1_epi32(`0x3f800000`));
16854	let r = _mm_castph_ps(a);
16855	let e = _mm_set1_ps(`1.0`);
16856	assert_eq_m128(r, e);
16857	}
16858
16859	#[simd_test(enable = "avx512fp16")]
16860	unsafe fn test_mm256_castph_ps() {
16861	let a = _mm256_castsi256_ph(_mm256_set1_epi32(`0x3f800000`));
16862	let r = _mm256_castph_ps(a);
16863	let e = _mm256_set1_ps(`1.0`);
16864	assert_eq_m256(r, e);
16865	}
16866
16867	#[simd_test(enable = "avx512fp16")]
16868	unsafe fn test_mm512_castph_ps() {
16869	let a = _mm512_castsi512_ph(_mm512_set1_epi32(`0x3f800000`));
16870	let r = _mm512_castph_ps(a);
16871	let e = _mm512_set1_ps(`1.0`);
16872	assert_eq_m512(r, e);
16873	}
16874
16875	#[simd_test(enable = "avx512fp16")]
16876	unsafe fn test_mm_castpd_ph() {
16877	let a = _mm_castsi128_pd(_mm_set1_epi16(`0x3c00`));
16878	let r = _mm_castpd_ph(a);
16879	let e = _mm_set1_ph(`1.0`);
16880	assert_eq_m128h(r, e);
16881	}
16882
16883	#[simd_test(enable = "avx512fp16")]
16884	unsafe fn test_mm256_castpd_ph() {
16885	let a = _mm256_castsi256_pd(_mm256_set1_epi16(`0x3c00`));
16886	let r = _mm256_castpd_ph(a);
16887	let e = _mm256_set1_ph(`1.0`);
16888	assert_eq_m256h(r, e);
16889	}
16890
16891	#[simd_test(enable = "avx512fp16")]
16892	unsafe fn test_mm512_castpd_ph() {
16893	let a = _mm512_castsi512_pd(_mm512_set1_epi16(`0x3c00`));
16894	let r = _mm512_castpd_ph(a);
16895	let e = _mm512_set1_ph(`1.0`);
16896	assert_eq_m512h(r, e);
16897	}
16898
16899	#[simd_test(enable = "avx512fp16")]
16900	unsafe fn test_mm_castph_pd() {
16901	let a = _mm_castsi128_ph(_mm_set1_epi64x(`0x3ff0000000000000`));
16902	let r = _mm_castph_pd(a);
16903	let e = _mm_set1_pd(`1.0`);
16904	assert_eq_m128d(r, e);
16905	}
16906
16907	#[simd_test(enable = "avx512fp16")]
16908	unsafe fn test_mm256_castph_pd() {
16909	let a = _mm256_castsi256_ph(_mm256_set1_epi64x(`0x3ff0000000000000`));
16910	let r = _mm256_castph_pd(a);
16911	let e = _mm256_set1_pd(`1.0`);
16912	assert_eq_m256d(r, e);
16913	}
16914
16915	#[simd_test(enable = "avx512fp16")]
16916	unsafe fn test_mm512_castph_pd() {
16917	let a = _mm512_castsi512_ph(_mm512_set1_epi64(`0x3ff0000000000000`));
16918	let r = _mm512_castph_pd(a);
16919	let e = _mm512_set1_pd(`1.0`);
16920	assert_eq_m512d(r, e);
16921	}
16922
16923	#[simd_test(enable = "avx512fp16")]
16924	unsafe fn test_mm256_castph256_ph128() {
16925	let a = _mm256_setr_ph(
16926	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16927	);
16928	let r = _mm256_castph256_ph128(a);
16929	let e = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16930	assert_eq_m128h(r, e);
16931	}
16932
16933	#[simd_test(enable = "avx512fp16")]
16934	unsafe fn test_mm512_castph512_ph128() {
16935	let a = _mm512_setr_ph(
16936	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`,
16937	`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
16938	);
16939	let r = _mm512_castph512_ph128(a);
16940	let e = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16941	assert_eq_m128h(r, e);
16942	}
16943
16944	#[simd_test(enable = "avx512fp16")]
16945	unsafe fn test_mm512_castph512_ph256() {
16946	let a = _mm512_setr_ph(
16947	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`,
16948	`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
16949	);
16950	let r = _mm512_castph512_ph256(a);
16951	let e = _mm256_setr_ph(
16952	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16953	);
16954	assert_eq_m256h(r, e);
16955	}
16956
16957	#[simd_test(enable = "avx512fp16")]
16958	unsafe fn test_mm256_castph128_ph256() {
16959	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16960	let r = _mm256_castph128_ph256(a);
16961	assert_eq_m128h(_mm256_castph256_ph128(r), a);
16962	}
16963
16964	#[simd_test(enable = "avx512fp16")]
16965	unsafe fn test_mm512_castph128_ph512() {
16966	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16967	let r = _mm512_castph128_ph512(a);
16968	assert_eq_m128h(_mm512_castph512_ph128(r), a);
16969	}
16970
16971	#[simd_test(enable = "avx512fp16")]
16972	unsafe fn test_mm512_castph256_ph512() {
16973	let a = _mm256_setr_ph(
16974	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16975	);
16976	let r = _mm512_castph256_ph512(a);
16977	assert_eq_m256h(_mm512_castph512_ph256(r), a);
16978	}
16979
16980	#[simd_test(enable = "avx512fp16")]
16981	unsafe fn test_mm256_zextph128_ph256() {
16982	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16983	let r = _mm256_zextph128_ph256(a);
16984	let e = _mm256_setr_ph(
16985	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16986	);
16987	assert_eq_m256h(r, e);
16988	}
16989
16990	#[simd_test(enable = "avx512fp16")]
16991	unsafe fn test_mm512_zextph128_ph512() {
16992	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16993	let r = _mm512_zextph128_ph512(a);
16994	let e = _mm512_setr_ph(
16995	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16996	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16997	);
16998	assert_eq_m512h(r, e);
16999	}
17000
17001	#[simd_test(enable = "avx512fp16")]
17002	unsafe fn test_mm512_zextph256_ph512() {
17003	let a = _mm256_setr_ph(
17004	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
17005	);
17006	let r = _mm512_zextph256_ph512(a);
17007	let e = _mm512_setr_ph(
17008	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `0.`, `0.`, `0.`, `0.`,
17009	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
17010	);
17011	assert_eq_m512h(r, e);
17012	}
17013
17014	#[simd_test(enable = "avx512fp16,avx512vl")]
17015	unsafe fn test_mm_cmp_ph_mask() {
17016	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17017	let b = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
17018	let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17019	assert_eq!(r, `0b11110000`);
17020	}
17021
17022	#[simd_test(enable = "avx512fp16,avx512vl")]
17023	unsafe fn test_mm_mask_cmp_ph_mask() {
17024	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17025	let b = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
17026	let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b01010101`, a, b);
17027	assert_eq!(r, `0b01010000`);
17028	}
17029
17030	#[simd_test(enable = "avx512fp16,avx512vl")]
17031	unsafe fn test_mm256_cmp_ph_mask() {
17032	let a = _mm256_set_ph(
17033	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17034	);
17035	let b = _mm256_set_ph(
17036	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17037	`-16.0`,
17038	);
17039	let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17040	assert_eq!(r, `0b1111000011110000`);
17041	}
17042
17043	#[simd_test(enable = "avx512fp16,avx512vl")]
17044	unsafe fn test_mm256_mask_cmp_ph_mask() {
17045	let a = _mm256_set_ph(
17046	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17047	);
17048	let b = _mm256_set_ph(
17049	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17050	`-16.0`,
17051	);
17052	let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b0101010101010101`, a, b);
17053	assert_eq!(r, `0b0101000001010000`);
17054	}
17055
17056	#[simd_test(enable = "avx512fp16")]
17057	unsafe fn test_mm512_cmp_ph_mask() {
17058	let a = _mm512_set_ph(
17059	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17060	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17061	`31.0`, `32.0`,
17062	);
17063	let b = _mm512_set_ph(
17064	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17065	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17066	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17067	);
17068	let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17069	assert_eq!(r, `0b11110000111100001111000011110000`);
17070	}
17071
17072	#[simd_test(enable = "avx512fp16")]
17073	unsafe fn test_mm512_mask_cmp_ph_mask() {
17074	let a = _mm512_set_ph(
17075	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17076	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17077	`31.0`, `32.0`,
17078	);
17079	let b = _mm512_set_ph(
17080	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17081	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17082	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17083	);
17084	let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b01010101010101010101010101010101`, a, b);
17085	assert_eq!(r, `0b01010000010100000101000001010000`);
17086	}
17087
17088	#[simd_test(enable = "avx512fp16")]
17089	unsafe fn test_mm512_cmp_round_ph_mask() {
17090	let a = _mm512_set_ph(
17091	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17092	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17093	`31.0`, `32.0`,
17094	);
17095	let b = _mm512_set_ph(
17096	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17097	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17098	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17099	);
17100	let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17101	assert_eq!(r, `0b11110000111100001111000011110000`);
17102	}
17103
17104	#[simd_test(enable = "avx512fp16")]
17105	unsafe fn test_mm512_mask_cmp_round_ph_mask() {
17106	let a = _mm512_set_ph(
17107	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17108	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17109	`31.0`, `32.0`,
17110	);
17111	let b = _mm512_set_ph(
17112	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17113	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17114	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17115	);
17116	let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17117	`0b01010101010101010101010101010101`,
17118	a,
17119	b,
17120	);
17121	assert_eq!(r, `0b01010000010100000101000001010000`);
17122	}
17123
17124	#[simd_test(enable = "avx512fp16")]
17125	unsafe fn test_mm_cmp_round_sh_mask() {
17126	let a = _mm_set_sh(`1.0`);
17127	let b = _mm_set_sh(`1.0`);
17128	let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17129	assert_eq!(r, `1`);
17130	}
17131
17132	#[simd_test(enable = "avx512fp16")]
17133	unsafe fn test_mm_mask_cmp_round_sh_mask() {
17134	let a = _mm_set_sh(`1.0`);
17135	let b = _mm_set_sh(`1.0`);
17136	let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(`0`, a, b);
17137	assert_eq!(r, `0`);
17138	}
17139
17140	#[simd_test(enable = "avx512fp16")]
17141	unsafe fn test_mm_cmp_sh_mask() {
17142	let a = _mm_set_sh(`1.0`);
17143	let b = _mm_set_sh(`1.0`);
17144	let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17145	assert_eq!(r, `1`);
17146	}
17147
17148	#[simd_test(enable = "avx512fp16")]
17149	unsafe fn test_mm_mask_cmp_sh_mask() {
17150	let a = _mm_set_sh(`1.0`);
17151	let b = _mm_set_sh(`1.0`);
17152	let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(`0`, a, b);
17153	assert_eq!(r, `0`);
17154	}
17155
17156	#[simd_test(enable = "avx512fp16")]
17157	unsafe fn test_mm_comi_round_sh() {
17158	let a = _mm_set_sh(`1.0`);
17159	let b = _mm_set_sh(`1.0`);
17160	let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17161	assert_eq!(r, `1`);
17162	}
17163
17164	#[simd_test(enable = "avx512fp16")]
17165	unsafe fn test_mm_comi_sh() {
17166	let a = _mm_set_sh(`1.0`);
17167	let b = _mm_set_sh(`1.0`);
17168	let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17169	assert_eq!(r, `1`);
17170	}
17171
17172	#[simd_test(enable = "avx512fp16")]
17173	unsafe fn test_mm_comieq_sh() {
17174	let a = _mm_set_sh(`1.0`);
17175	let b = _mm_set_sh(`1.0`);
17176	let r = _mm_comieq_sh(a, b);
17177	assert_eq!(r, `1`);
17178	}
17179
17180	#[simd_test(enable = "avx512fp16")]
17181	unsafe fn test_mm_comige_sh() {
17182	let a = _mm_set_sh(`2.0`);
17183	let b = _mm_set_sh(`1.0`);
17184	let r = _mm_comige_sh(a, b);
17185	assert_eq!(r, `1`);
17186	}
17187
17188	#[simd_test(enable = "avx512fp16")]
17189	unsafe fn test_mm_comigt_sh() {
17190	let a = _mm_set_sh(`2.0`);
17191	let b = _mm_set_sh(`1.0`);
17192	let r = _mm_comigt_sh(a, b);
17193	assert_eq!(r, `1`);
17194	}
17195
17196	#[simd_test(enable = "avx512fp16")]
17197	unsafe fn test_mm_comile_sh() {
17198	let a = _mm_set_sh(`1.0`);
17199	let b = _mm_set_sh(`2.0`);
17200	let r = _mm_comile_sh(a, b);
17201	assert_eq!(r, `1`);
17202	}
17203
17204	#[simd_test(enable = "avx512fp16")]
17205	unsafe fn test_mm_comilt_sh() {
17206	let a = _mm_set_sh(`1.0`);
17207	let b = _mm_set_sh(`2.0`);
17208	let r = _mm_comilt_sh(a, b);
17209	assert_eq!(r, `1`);
17210	}
17211
17212	#[simd_test(enable = "avx512fp16")]
17213	unsafe fn test_mm_comineq_sh() {
17214	let a = _mm_set_sh(`1.0`);
17215	let b = _mm_set_sh(`2.0`);
17216	let r = _mm_comineq_sh(a, b);
17217	assert_eq!(r, `1`);
17218	}
17219
17220	#[simd_test(enable = "avx512fp16")]
17221	unsafe fn test_mm_ucomieq_sh() {
17222	let a = _mm_set_sh(`1.0`);
17223	let b = _mm_set_sh(`1.0`);
17224	let r = _mm_ucomieq_sh(a, b);
17225	assert_eq!(r, `1`);
17226	}
17227
17228	#[simd_test(enable = "avx512fp16")]
17229	unsafe fn test_mm_ucomige_sh() {
17230	let a = _mm_set_sh(`2.0`);
17231	let b = _mm_set_sh(`1.0`);
17232	let r = _mm_ucomige_sh(a, b);
17233	assert_eq!(r, `1`);
17234	}
17235
17236	#[simd_test(enable = "avx512fp16")]
17237	unsafe fn test_mm_ucomigt_sh() {
17238	let a = _mm_set_sh(`2.0`);
17239	let b = _mm_set_sh(`1.0`);
17240	let r = _mm_ucomigt_sh(a, b);
17241	assert_eq!(r, `1`);
17242	}
17243
17244	#[simd_test(enable = "avx512fp16")]
17245	unsafe fn test_mm_ucomile_sh() {
17246	let a = _mm_set_sh(`1.0`);
17247	let b = _mm_set_sh(`2.0`);
17248	let r = _mm_ucomile_sh(a, b);
17249	assert_eq!(r, `1`);
17250	}
17251
17252	#[simd_test(enable = "avx512fp16")]
17253	unsafe fn test_mm_ucomilt_sh() {
17254	let a = _mm_set_sh(`1.0`);
17255	let b = _mm_set_sh(`2.0`);
17256	let r = _mm_ucomilt_sh(a, b);
17257	assert_eq!(r, `1`);
17258	}
17259
17260	#[simd_test(enable = "avx512fp16")]
17261	unsafe fn test_mm_ucomineq_sh() {
17262	let a = _mm_set_sh(`1.0`);
17263	let b = _mm_set_sh(`2.0`);
17264	let r = _mm_ucomineq_sh(a, b);
17265	assert_eq!(r, `1`);
17266	}
17267
17268	#[simd_test(enable = "avx512fp16,avx512vl")]
17269	unsafe fn test_mm_load_ph() {
17270	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17271	let b = _mm_load_ph(addr_of!(a).cast());
17272	assert_eq_m128h(a, b);
17273	}
17274
17275	#[simd_test(enable = "avx512fp16,avx512vl")]
17276	unsafe fn test_mm256_load_ph() {
17277	let a = _mm256_set_ph(
17278	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17279	);
17280	let b = _mm256_load_ph(addr_of!(a).cast());
17281	assert_eq_m256h(a, b);
17282	}
17283
17284	#[simd_test(enable = "avx512fp16")]
17285	unsafe fn test_mm512_load_ph() {
17286	let a = _mm512_set_ph(
17287	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17288	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17289	`31.0`, `32.0`,
17290	);
17291	let b = _mm512_load_ph(addr_of!(a).cast());
17292	assert_eq_m512h(a, b);
17293	}
17294
17295	#[simd_test(enable = "avx512fp16")]
17296	unsafe fn test_mm_load_sh() {
17297	let a = _mm_set_sh(`1.0`);
17298	let b = _mm_load_sh(addr_of!(a).cast());
17299	assert_eq_m128h(a, b);
17300	}
17301
17302	#[simd_test(enable = "avx512fp16")]
17303	unsafe fn test_mm_mask_load_sh() {
17304	let a = _mm_set_sh(`1.0`);
17305	let src = _mm_set_sh(`2.`);
17306	let b = _mm_mask_load_sh(src, `1`, addr_of!(a).cast());
17307	assert_eq_m128h(a, b);
17308	let b = _mm_mask_load_sh(src, `0`, addr_of!(a).cast());
17309	assert_eq_m128h(src, b);
17310	}
17311
17312	#[simd_test(enable = "avx512fp16")]
17313	unsafe fn test_mm_maskz_load_sh() {
17314	let a = _mm_set_sh(`1.0`);
17315	let b = _mm_maskz_load_sh(`1`, addr_of!(a).cast());
17316	assert_eq_m128h(a, b);
17317	let b = _mm_maskz_load_sh(`0`, addr_of!(a).cast());
17318	assert_eq_m128h(_mm_setzero_ph(), b);
17319	}
17320
17321	#[simd_test(enable = "avx512fp16,avx512vl")]
17322	unsafe fn test_mm_loadu_ph() {
17323	let array = [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
17324	let r = _mm_loadu_ph(array.as_ptr());
17325	let e = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17326	assert_eq_m128h(r, e);
17327	}
17328
17329	#[simd_test(enable = "avx512fp16,avx512vl")]
17330	unsafe fn test_mm256_loadu_ph() {
17331	let array = [
17332	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17333	];
17334	let r = _mm256_loadu_ph(array.as_ptr());
17335	let e = _mm256_setr_ph(
17336	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17337	);
17338	assert_eq_m256h(r, e);
17339	}
17340
17341	#[simd_test(enable = "avx512fp16")]
17342	unsafe fn test_mm512_loadu_ph() {
17343	let array = [
17344	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17345	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17346	`31.0`, `32.0`,
17347	];
17348	let r = _mm512_loadu_ph(array.as_ptr());
17349	let e = _mm512_setr_ph(
17350	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17351	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17352	`31.0`, `32.0`,
17353	);
17354	assert_eq_m512h(r, e);
17355	}
17356
17357	#[simd_test(enable = "avx512fp16")]
17358	unsafe fn test_mm_move_sh() {
17359	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17360	let b = _mm_set_sh(`9.0`);
17361	let r = _mm_move_sh(a, b);
17362	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `9.0`);
17363	assert_eq_m128h(r, e);
17364	}
17365
17366	#[simd_test(enable = "avx512fp16")]
17367	unsafe fn test_mm_mask_move_sh() {
17368	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17369	let b = _mm_set_sh(`9.0`);
17370	let src = _mm_set_sh(`10.0`);
17371	let r = _mm_mask_move_sh(src, `0`, a, b);
17372	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `10.0`);
17373	assert_eq_m128h(r, e);
17374	}
17375
17376	#[simd_test(enable = "avx512fp16")]
17377	unsafe fn test_mm_maskz_move_sh() {
17378	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17379	let b = _mm_set_sh(`9.0`);
17380	let r = _mm_maskz_move_sh(`0`, a, b);
17381	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `0.0`);
17382	assert_eq_m128h(r, e);
17383	}
17384
17385	#[simd_test(enable = "avx512fp16,avx512vl")]
17386	unsafe fn test_mm_store_ph() {
17387	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17388	let mut b = _mm_setzero_ph();
17389	_mm_store_ph(addr_of_mut!(b).cast(), a);
17390	assert_eq_m128h(a, b);
17391	}
17392
17393	#[simd_test(enable = "avx512fp16,avx512vl")]
17394	unsafe fn test_mm256_store_ph() {
17395	let a = _mm256_set_ph(
17396	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17397	);
17398	let mut b = _mm256_setzero_ph();
17399	_mm256_store_ph(addr_of_mut!(b).cast(), a);
17400	assert_eq_m256h(a, b);
17401	}
17402
17403	#[simd_test(enable = "avx512fp16")]
17404	unsafe fn test_mm512_store_ph() {
17405	let a = _mm512_set_ph(
17406	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17407	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17408	`31.0`, `32.0`,
17409	);
17410	let mut b = _mm512_setzero_ph();
17411	_mm512_store_ph(addr_of_mut!(b).cast(), a);
17412	assert_eq_m512h(a, b);
17413	}
17414
17415	#[simd_test(enable = "avx512fp16")]
17416	unsafe fn test_mm_store_sh() {
17417	let a = _mm_set_sh(`1.0`);
17418	let mut b = _mm_setzero_ph();
17419	_mm_store_sh(addr_of_mut!(b).cast(), a);
17420	assert_eq_m128h(a, b);
17421	}
17422
17423	#[simd_test(enable = "avx512fp16")]
17424	unsafe fn test_mm_mask_store_sh() {
17425	let a = _mm_set_sh(`1.0`);
17426	let mut b = _mm_setzero_ph();
17427	_mm_mask_store_sh(addr_of_mut!(b).cast(), `0`, a);
17428	assert_eq_m128h(_mm_setzero_ph(), b);
17429	_mm_mask_store_sh(addr_of_mut!(b).cast(), `1`, a);
17430	assert_eq_m128h(a, b);
17431	}
17432
17433	#[simd_test(enable = "avx512fp16,avx512vl")]
17434	unsafe fn test_mm_storeu_ph() {
17435	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17436	let mut array = [`0.0`; `8`];
17437	_mm_storeu_ph(array.as_mut_ptr(), a);
17438	assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
17439	}
17440
17441	#[simd_test(enable = "avx512fp16,avx512vl")]
17442	unsafe fn test_mm256_storeu_ph() {
17443	let a = _mm256_set_ph(
17444	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17445	);
17446	let mut array = [`0.0`; `16`];
17447	_mm256_storeu_ph(array.as_mut_ptr(), a);
17448	assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
17449	}
17450
17451	#[simd_test(enable = "avx512fp16")]
17452	unsafe fn test_mm512_storeu_ph() {
17453	let a = _mm512_set_ph(
17454	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17455	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17456	`31.0`, `32.0`,
17457	);
17458	let mut array = [`0.0`; `32`];
17459	_mm512_storeu_ph(array.as_mut_ptr(), a);
17460	assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
17461	}
17462
17463	#[simd_test(enable = "avx512fp16,avx512vl")]
17464	unsafe fn test_mm_add_ph() {
17465	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17466	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17467	let r = _mm_add_ph(a, b);
17468	let e = _mm_set1_ph(`9.0`);
17469	assert_eq_m128h(r, e);
17470	}
17471
17472	#[simd_test(enable = "avx512fp16,avx512vl")]
17473	unsafe fn test_mm_mask_add_ph() {
17474	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17475	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17476	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
17477	let r = _mm_mask_add_ph(src, `0b01010101`, a, b);
17478	let e = _mm_set_ph(`10.`, `9.`, `12.`, `9.`, `14.`, `9.`, `16.`, `9.`);
17479	assert_eq_m128h(r, e);
17480	}
17481
17482	#[simd_test(enable = "avx512fp16,avx512vl")]
17483	unsafe fn test_mm_maskz_add_ph() {
17484	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17485	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17486	let r = _mm_maskz_add_ph(`0b01010101`, a, b);
17487	let e = _mm_set_ph(`0.`, `9.`, `0.`, `9.`, `0.`, `9.`, `0.`, `9.`);
17488	assert_eq_m128h(r, e);
17489	}
17490
17491	#[simd_test(enable = "avx512fp16,avx512vl")]
17492	unsafe fn test_mm256_add_ph() {
17493	let a = _mm256_set_ph(
17494	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17495	);
17496	let b = _mm256_set_ph(
17497	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17498	);
17499	let r = _mm256_add_ph(a, b);
17500	let e = _mm256_set1_ph(`17.0`);
17501	assert_eq_m256h(r, e);
17502	}
17503
17504	#[simd_test(enable = "avx512fp16,avx512vl")]
17505	unsafe fn test_mm256_mask_add_ph() {
17506	let a = _mm256_set_ph(
17507	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17508	);
17509	let b = _mm256_set_ph(
17510	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17511	);
17512	let src = _mm256_set_ph(
17513	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
17514	);
17515	let r = _mm256_mask_add_ph(src, `0b0101010101010101`, a, b);
17516	let e = _mm256_set_ph(
17517	`18.`, `17.`, `20.`, `17.`, `22.`, `17.`, `24.`, `17.`, `26.`, `17.`, `28.`, `17.`, `30.`, `17.`, `32.`, `17.`,
17518	);
17519	assert_eq_m256h(r, e);
17520	}
17521
17522	#[simd_test(enable = "avx512fp16,avx512vl")]
17523	unsafe fn test_mm256_maskz_add_ph() {
17524	let a = _mm256_set_ph(
17525	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17526	);
17527	let b = _mm256_set_ph(
17528	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17529	);
17530	let r = _mm256_maskz_add_ph(`0b0101010101010101`, a, b);
17531	let e = _mm256_set_ph(
17532	`0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`,
17533	);
17534	assert_eq_m256h(r, e);
17535	}
17536
17537	#[simd_test(enable = "avx512fp16")]
17538	unsafe fn test_mm512_add_ph() {
17539	let a = _mm512_set_ph(
17540	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17541	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17542	`31.0`, `32.0`,
17543	);
17544	let b = _mm512_set_ph(
17545	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17546	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17547	`3.0`, `2.0`, `1.0`,
17548	);
17549	let r = _mm512_add_ph(a, b);
17550	let e = _mm512_set1_ph(`33.0`);
17551	assert_eq_m512h(r, e);
17552	}
17553
17554	#[simd_test(enable = "avx512fp16")]
17555	unsafe fn test_mm512_mask_add_ph() {
17556	let a = _mm512_set_ph(
17557	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17558	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17559	`31.0`, `32.0`,
17560	);
17561	let b = _mm512_set_ph(
17562	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17563	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17564	`3.0`, `2.0`, `1.0`,
17565	);
17566	let src = _mm512_set_ph(
17567	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17568	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17569	);
17570	let r = _mm512_mask_add_ph(src, `0b01010101010101010101010101010101`, a, b);
17571	let e = _mm512_set_ph(
17572	`34.`, `33.`, `36.`, `33.`, `38.`, `33.`, `40.`, `33.`, `42.`, `33.`, `44.`, `33.`, `46.`, `33.`, `48.`, `33.`, `50.`,
17573	`33.`, `52.`, `33.`, `54.`, `33.`, `56.`, `33.`, `58.`, `33.`, `60.`, `33.`, `62.`, `33.`, `64.`, `33.`,
17574	);
17575	assert_eq_m512h(r, e);
17576	}
17577
17578	#[simd_test(enable = "avx512fp16")]
17579	unsafe fn test_mm512_maskz_add_ph() {
17580	let a = _mm512_set_ph(
17581	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17582	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17583	`31.0`, `32.0`,
17584	);
17585	let b = _mm512_set_ph(
17586	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17587	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17588	`3.0`, `2.0`, `1.0`,
17589	);
17590	let r = _mm512_maskz_add_ph(`0b01010101010101010101010101010101`, a, b);
17591	let e = _mm512_set_ph(
17592	`0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`,
17593	`33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`,
17594	);
17595	assert_eq_m512h(r, e);
17596	}
17597
17598	#[simd_test(enable = "avx512fp16")]
17599	unsafe fn test_mm512_add_round_ph() {
17600	let a = _mm512_set_ph(
17601	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17602	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17603	`31.0`, `32.0`,
17604	);
17605	let b = _mm512_set_ph(
17606	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17607	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17608	`3.0`, `2.0`, `1.0`,
17609	);
17610	let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17611	let e = _mm512_set1_ph(`33.0`);
17612	assert_eq_m512h(r, e);
17613	}
17614
17615	#[simd_test(enable = "avx512fp16")]
17616	unsafe fn test_mm512_mask_add_round_ph() {
17617	let a = _mm512_set_ph(
17618	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17619	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17620	`31.0`, `32.0`,
17621	);
17622	let b = _mm512_set_ph(
17623	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17624	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17625	`3.0`, `2.0`, `1.0`,
17626	);
17627	let src = _mm512_set_ph(
17628	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17629	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17630	);
17631	let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17632	src,
17633	`0b01010101010101010101010101010101`,
17634	a,
17635	b,
17636	);
17637	let e = _mm512_set_ph(
17638	`34.`, `33.`, `36.`, `33.`, `38.`, `33.`, `40.`, `33.`, `42.`, `33.`, `44.`, `33.`, `46.`, `33.`, `48.`, `33.`, `50.`,
17639	`33.`, `52.`, `33.`, `54.`, `33.`, `56.`, `33.`, `58.`, `33.`, `60.`, `33.`, `62.`, `33.`, `64.`, `33.`,
17640	);
17641	assert_eq_m512h(r, e);
17642	}
17643
17644	#[simd_test(enable = "avx512fp16")]
17645	unsafe fn test_mm512_maskz_add_round_ph() {
17646	let a = _mm512_set_ph(
17647	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17648	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17649	`31.0`, `32.0`,
17650	);
17651	let b = _mm512_set_ph(
17652	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17653	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17654	`3.0`, `2.0`, `1.0`,
17655	);
17656	let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17657	`0b01010101010101010101010101010101`,
17658	a,
17659	b,
17660	);
17661	let e = _mm512_set_ph(
17662	`0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`,
17663	`33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`,
17664	);
17665	assert_eq_m512h(r, e);
17666	}
17667
17668	#[simd_test(enable = "avx512fp16")]
17669	unsafe fn test_mm_add_round_sh() {
17670	let a = _mm_set_sh(`1.0`);
17671	let b = _mm_set_sh(`2.0`);
17672	let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17673	let e = _mm_set_sh(`3.0`);
17674	assert_eq_m128h(r, e);
17675	}
17676
17677	#[simd_test(enable = "avx512fp16")]
17678	unsafe fn test_mm_mask_add_round_sh() {
17679	let a = _mm_set_sh(`1.0`);
17680	let b = _mm_set_sh(`2.0`);
17681	let src = _mm_set_sh(`4.0`);
17682	let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17683	src, `0`, a, b,
17684	);
17685	let e = _mm_set_sh(`4.0`);
17686	assert_eq_m128h(r, e);
17687	let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17688	src, `1`, a, b,
17689	);
17690	let e = _mm_set_sh(`3.0`);
17691	assert_eq_m128h(r, e);
17692	}
17693
17694	#[simd_test(enable = "avx512fp16")]
17695	unsafe fn test_mm_maskz_add_round_sh() {
17696	let a = _mm_set_sh(`1.0`);
17697	let b = _mm_set_sh(`2.0`);
17698	let r =
17699	_mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
17700	let e = _mm_set_sh(`0.0`);
17701	assert_eq_m128h(r, e);
17702	let r =
17703	_mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
17704	let e = _mm_set_sh(`3.0`);
17705	assert_eq_m128h(r, e);
17706	}
17707
17708	#[simd_test(enable = "avx512fp16")]
17709	unsafe fn test_mm_add_sh() {
17710	let a = _mm_set_sh(`1.0`);
17711	let b = _mm_set_sh(`2.0`);
17712	let r = _mm_add_sh(a, b);
17713	let e = _mm_set_sh(`3.0`);
17714	assert_eq_m128h(r, e);
17715	}
17716
17717	#[simd_test(enable = "avx512fp16")]
17718	unsafe fn test_mm_mask_add_sh() {
17719	let a = _mm_set_sh(`1.0`);
17720	let b = _mm_set_sh(`2.0`);
17721	let src = _mm_set_sh(`4.0`);
17722	let r = _mm_mask_add_sh(src, `0`, a, b);
17723	let e = _mm_set_sh(`4.0`);
17724	assert_eq_m128h(r, e);
17725	let r = _mm_mask_add_sh(src, `1`, a, b);
17726	let e = _mm_set_sh(`3.0`);
17727	assert_eq_m128h(r, e);
17728	}
17729
17730	#[simd_test(enable = "avx512fp16")]
17731	unsafe fn test_mm_maskz_add_sh() {
17732	let a = _mm_set_sh(`1.0`);
17733	let b = _mm_set_sh(`2.0`);
17734	let r = _mm_maskz_add_sh(`0`, a, b);
17735	let e = _mm_set_sh(`0.0`);
17736	assert_eq_m128h(r, e);
17737	let r = _mm_maskz_add_sh(`1`, a, b);
17738	let e = _mm_set_sh(`3.0`);
17739	assert_eq_m128h(r, e);
17740	}
17741
17742	#[simd_test(enable = "avx512fp16,avx512vl")]
17743	unsafe fn test_mm_sub_ph() {
17744	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17745	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17746	let r = _mm_sub_ph(a, b);
17747	let e = _mm_set_ph(`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`);
17748	assert_eq_m128h(r, e);
17749	}
17750
17751	#[simd_test(enable = "avx512fp16,avx512vl")]
17752	unsafe fn test_mm_mask_sub_ph() {
17753	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17754	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17755	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
17756	let r = _mm_mask_sub_ph(src, `0b01010101`, a, b);
17757	let e = _mm_set_ph(`10.`, `-5.`, `12.`, `-1.`, `14.`, `3.`, `16.`, `7.`);
17758	assert_eq_m128h(r, e);
17759	}
17760
17761	#[simd_test(enable = "avx512fp16,avx512vl")]
17762	unsafe fn test_mm_maskz_sub_ph() {
17763	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17764	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17765	let r = _mm_maskz_sub_ph(`0b01010101`, a, b);
17766	let e = _mm_set_ph(`0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`, `0.`, `7.`);
17767	assert_eq_m128h(r, e);
17768	}
17769
17770	#[simd_test(enable = "avx512fp16,avx512vl")]
17771	unsafe fn test_mm256_sub_ph() {
17772	let a = _mm256_set_ph(
17773	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17774	);
17775	let b = _mm256_set_ph(
17776	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17777	);
17778	let r = _mm256_sub_ph(a, b);
17779	let e = _mm256_set_ph(
17780	`-15.0`, `-13.0`, `-11.0`, `-9.0`, `-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`,
17781	`15.0`,
17782	);
17783	assert_eq_m256h(r, e);
17784	}
17785
17786	#[simd_test(enable = "avx512fp16,avx512vl")]
17787	unsafe fn test_mm256_mask_sub_ph() {
17788	let a = _mm256_set_ph(
17789	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17790	);
17791	let b = _mm256_set_ph(
17792	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17793	);
17794	let src = _mm256_set_ph(
17795	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
17796	);
17797	let r = _mm256_mask_sub_ph(src, `0b0101010101010101`, a, b);
17798	let e = _mm256_set_ph(
17799	`18.`, `-13.`, `20.`, `-9.`, `22.`, `-5.`, `24.`, `-1.`, `26.`, `3.`, `28.`, `7.`, `30.`, `11.`, `32.`, `15.`,
17800	);
17801	assert_eq_m256h(r, e);
17802	}
17803
17804	#[simd_test(enable = "avx512fp16,avx512vl")]
17805	unsafe fn test_mm256_maskz_sub_ph() {
17806	let a = _mm256_set_ph(
17807	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17808	);
17809	let b = _mm256_set_ph(
17810	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17811	);
17812	let r = _mm256_maskz_sub_ph(`0b0101010101010101`, a, b);
17813	let e = _mm256_set_ph(
17814	`0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`, `0.`, `7.`, `0.`, `11.`, `0.`, `15.`,
17815	);
17816	assert_eq_m256h(r, e);
17817	}
17818
17819	#[simd_test(enable = "avx512fp16")]
17820	unsafe fn test_mm512_sub_ph() {
17821	let a = _mm512_set_ph(
17822	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17823	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17824	`31.0`, `32.0`,
17825	);
17826	let b = _mm512_set_ph(
17827	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17828	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17829	`3.0`, `2.0`, `1.0`,
17830	);
17831	let r = _mm512_sub_ph(a, b);
17832	let e = _mm512_set_ph(
17833	`-31.0`, `-29.0`, `-27.0`, `-25.0`, `-23.0`, `-21.0`, `-19.0`, `-17.0`, `-15.0`, `-13.0`, `-11.0`, `-9.0`,
17834	`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`,
17835	`23.0`, `25.0`, `27.0`, `29.0`, `31.0`,
17836	);
17837	assert_eq_m512h(r, e);
17838	}
17839
17840	#[simd_test(enable = "avx512fp16")]
17841	unsafe fn test_mm512_mask_sub_ph() {
17842	let a = _mm512_set_ph(
17843	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17844	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17845	`31.0`, `32.0`,
17846	);
17847	let b = _mm512_set_ph(
17848	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17849	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17850	`3.0`, `2.0`, `1.0`,
17851	);
17852	let src = _mm512_set_ph(
17853	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17854	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17855	);
17856	let r = _mm512_mask_sub_ph(src, `0b01010101010101010101010101010101`, a, b);
17857	let e = _mm512_set_ph(
17858	`34.`, `-29.`, `36.`, `-25.`, `38.`, `-21.`, `40.`, `-17.`, `42.`, `-13.`, `44.`, `-9.`, `46.`, `-5.`, `48.`, `-1.`,
17859	`50.`, `3.`, `52.`, `7.`, `54.`, `11.`, `56.`, `15.`, `58.`, `19.`, `60.`, `23.`, `62.`, `27.`, `64.`, `31.`,
17860	);
17861	assert_eq_m512h(r, e);
17862	}
17863
17864	#[simd_test(enable = "avx512fp16")]
17865	unsafe fn test_mm512_maskz_sub_ph() {
17866	let a = _mm512_set_ph(
17867	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17868	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17869	`31.0`, `32.0`,
17870	);
17871	let b = _mm512_set_ph(
17872	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17873	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17874	`3.0`, `2.0`, `1.0`,
17875	);
17876	let r = _mm512_maskz_sub_ph(`0b01010101010101010101010101010101`, a, b);
17877	let e = _mm512_set_ph(
17878	`0.`, `-29.`, `0.`, `-25.`, `0.`, `-21.`, `0.`, `-17.`, `0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`,
17879	`0.`, `7.`, `0.`, `11.`, `0.`, `15.`, `0.`, `19.`, `0.`, `23.`, `0.`, `27.`, `0.`, `31.`,
17880	);
17881	assert_eq_m512h(r, e);
17882	}
17883
17884	#[simd_test(enable = "avx512fp16")]
17885	unsafe fn test_mm512_sub_round_ph() {
17886	let a = _mm512_set_ph(
17887	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17888	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17889	`31.0`, `32.0`,
17890	);
17891	let b = _mm512_set_ph(
17892	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17893	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17894	`3.0`, `2.0`, `1.0`,
17895	);
17896	let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17897	let e = _mm512_set_ph(
17898	`-31.0`, `-29.0`, `-27.0`, `-25.0`, `-23.0`, `-21.0`, `-19.0`, `-17.0`, `-15.0`, `-13.0`, `-11.0`, `-9.0`,
17899	`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`,
17900	`23.0`, `25.0`, `27.0`, `29.0`, `31.0`,
17901	);
17902	assert_eq_m512h(r, e);
17903	}
17904
17905	#[simd_test(enable = "avx512fp16")]
17906	unsafe fn test_mm512_mask_sub_round_ph() {
17907	let a = _mm512_set_ph(
17908	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17909	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17910	`31.0`, `32.0`,
17911	);
17912	let b = _mm512_set_ph(
17913	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17914	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17915	`3.0`, `2.0`, `1.0`,
17916	);
17917	let src = _mm512_set_ph(
17918	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17919	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17920	);
17921	let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17922	src,
17923	`0b01010101010101010101010101010101`,
17924	a,
17925	b,
17926	);
17927	let e = _mm512_set_ph(
17928	`34.`, `-29.`, `36.`, `-25.`, `38.`, `-21.`, `40.`, `-17.`, `42.`, `-13.`, `44.`, `-9.`, `46.`, `-5.`, `48.`, `-1.`,
17929	`50.`, `3.`, `52.`, `7.`, `54.`, `11.`, `56.`, `15.`, `58.`, `19.`, `60.`, `23.`, `62.`, `27.`, `64.`, `31.`,
17930	);
17931	assert_eq_m512h(r, e);
17932	}
17933
17934	#[simd_test(enable = "avx512fp16")]
17935	unsafe fn test_mm512_maskz_sub_round_ph() {
17936	let a = _mm512_set_ph(
17937	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17938	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17939	`31.0`, `32.0`,
17940	);
17941	let b = _mm512_set_ph(
17942	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17943	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17944	`3.0`, `2.0`, `1.0`,
17945	);
17946	let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17947	`0b01010101010101010101010101010101`,
17948	a,
17949	b,
17950	);
17951	let e = _mm512_set_ph(
17952	`0.`, `-29.`, `0.`, `-25.`, `0.`, `-21.`, `0.`, `-17.`, `0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`,
17953	`0.`, `7.`, `0.`, `11.`, `0.`, `15.`, `0.`, `19.`, `0.`, `23.`, `0.`, `27.`, `0.`, `31.`,
17954	);
17955	assert_eq_m512h(r, e);
17956	}
17957
17958	#[simd_test(enable = "avx512fp16")]
17959	unsafe fn test_mm_sub_round_sh() {
17960	let a = _mm_set_sh(`1.0`);
17961	let b = _mm_set_sh(`2.0`);
17962	let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17963	let e = _mm_set_sh(`-1.0`);
17964	assert_eq_m128h(r, e);
17965	}
17966
17967	#[simd_test(enable = "avx512fp16")]
17968	unsafe fn test_mm_mask_sub_round_sh() {
17969	let a = _mm_set_sh(`1.0`);
17970	let b = _mm_set_sh(`2.0`);
17971	let src = _mm_set_sh(`4.0`);
17972	let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17973	src, `0`, a, b,
17974	);
17975	let e = _mm_set_sh(`4.0`);
17976	assert_eq_m128h(r, e);
17977	let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17978	src, `1`, a, b,
17979	);
17980	let e = _mm_set_sh(`-1.0`);
17981	assert_eq_m128h(r, e);
17982	}
17983
17984	#[simd_test(enable = "avx512fp16")]
17985	unsafe fn test_mm_maskz_sub_round_sh() {
17986	let a = _mm_set_sh(`1.0`);
17987	let b = _mm_set_sh(`2.0`);
17988	let r =
17989	_mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
17990	let e = _mm_set_sh(`0.0`);
17991	assert_eq_m128h(r, e);
17992	let r =
17993	_mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
17994	let e = _mm_set_sh(`-1.0`);
17995	assert_eq_m128h(r, e);
17996	}
17997
17998	#[simd_test(enable = "avx512fp16")]
17999	unsafe fn test_mm_sub_sh() {
18000	let a = _mm_set_sh(`1.0`);
18001	let b = _mm_set_sh(`2.0`);
18002	let r = _mm_sub_sh(a, b);
18003	let e = _mm_set_sh(`-1.0`);
18004	assert_eq_m128h(r, e);
18005	}
18006
18007	#[simd_test(enable = "avx512fp16")]
18008	unsafe fn test_mm_mask_sub_sh() {
18009	let a = _mm_set_sh(`1.0`);
18010	let b = _mm_set_sh(`2.0`);
18011	let src = _mm_set_sh(`4.0`);
18012	let r = _mm_mask_sub_sh(src, `0`, a, b);
18013	let e = _mm_set_sh(`4.0`);
18014	assert_eq_m128h(r, e);
18015	let r = _mm_mask_sub_sh(src, `1`, a, b);
18016	let e = _mm_set_sh(`-1.0`);
18017	assert_eq_m128h(r, e);
18018	}
18019
18020	#[simd_test(enable = "avx512fp16")]
18021	unsafe fn test_mm_maskz_sub_sh() {
18022	let a = _mm_set_sh(`1.0`);
18023	let b = _mm_set_sh(`2.0`);
18024	let r = _mm_maskz_sub_sh(`0`, a, b);
18025	let e = _mm_set_sh(`0.0`);
18026	assert_eq_m128h(r, e);
18027	let r = _mm_maskz_sub_sh(`1`, a, b);
18028	let e = _mm_set_sh(`-1.0`);
18029	assert_eq_m128h(r, e);
18030	}
18031
18032	#[simd_test(enable = "avx512fp16,avx512vl")]
18033	unsafe fn test_mm_mul_ph() {
18034	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18035	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18036	let r = _mm_mul_ph(a, b);
18037	let e = _mm_set_ph(`8.0`, `14.0`, `18.0`, `20.0`, `20.0`, `18.0`, `14.0`, `8.0`);
18038	assert_eq_m128h(r, e);
18039	}
18040
18041	#[simd_test(enable = "avx512fp16,avx512vl")]
18042	unsafe fn test_mm_mask_mul_ph() {
18043	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18044	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18045	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
18046	let r = _mm_mask_mul_ph(src, `0b01010101`, a, b);
18047	let e = _mm_set_ph(`10.`, `14.`, `12.`, `20.`, `14.`, `18.`, `16.`, `8.`);
18048	assert_eq_m128h(r, e);
18049	}
18050
18051	#[simd_test(enable = "avx512fp16,avx512vl")]
18052	unsafe fn test_mm_maskz_mul_ph() {
18053	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18054	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18055	let r = _mm_maskz_mul_ph(`0b01010101`, a, b);
18056	let e = _mm_set_ph(`0.`, `14.`, `0.`, `20.`, `0.`, `18.`, `0.`, `8.`);
18057	assert_eq_m128h(r, e);
18058	}
18059
18060	#[simd_test(enable = "avx512fp16,avx512vl")]
18061	unsafe fn test_mm256_mul_ph() {
18062	let a = _mm256_set_ph(
18063	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18064	);
18065	let b = _mm256_set_ph(
18066	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18067	);
18068	let r = _mm256_mul_ph(a, b);
18069	let e = _mm256_set_ph(
18070	`16.0`, `30.0`, `42.0`, `52.0`, `60.0`, `66.0`, `70.0`, `72.0`, `72.0`, `70.0`, `66.0`, `60.0`, `52.0`, `42.0`,
18071	`30.0`, `16.0`,
18072	);
18073	assert_eq_m256h(r, e);
18074	}
18075
18076	#[simd_test(enable = "avx512fp16,avx512vl")]
18077	unsafe fn test_mm256_mask_mul_ph() {
18078	let a = _mm256_set_ph(
18079	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18080	);
18081	let b = _mm256_set_ph(
18082	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18083	);
18084	let src = _mm256_set_ph(
18085	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
18086	);
18087	let r = _mm256_mask_mul_ph(src, `0b0101010101010101`, a, b);
18088	let e = _mm256_set_ph(
18089	`18.`, `30.`, `20.`, `52.`, `22.`, `66.`, `24.`, `72.`, `26.`, `70.`, `28.`, `60.`, `30.`, `42.`, `32.`, `16.`,
18090	);
18091	assert_eq_m256h(r, e);
18092	}
18093
18094	#[simd_test(enable = "avx512fp16,avx512vl")]
18095	unsafe fn test_mm256_maskz_mul_ph() {
18096	let a = _mm256_set_ph(
18097	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18098	);
18099	let b = _mm256_set_ph(
18100	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18101	);
18102	let r = _mm256_maskz_mul_ph(`0b0101010101010101`, a, b);
18103	let e = _mm256_set_ph(
18104	`0.`, `30.`, `0.`, `52.`, `0.`, `66.`, `0.`, `72.`, `0.`, `70.`, `0.`, `60.`, `0.`, `42.`, `0.`, `16.`,
18105	);
18106	assert_eq_m256h(r, e);
18107	}
18108
18109	#[simd_test(enable = "avx512fp16")]
18110	unsafe fn test_mm512_mul_ph() {
18111	let a = _mm512_set_ph(
18112	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18113	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18114	`31.0`, `32.0`,
18115	);
18116	let b = _mm512_set_ph(
18117	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18118	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18119	`3.0`, `2.0`, `1.0`,
18120	);
18121	let r = _mm512_mul_ph(a, b);
18122	let e = _mm512_set_ph(
18123	`32.0`, `62.0`, `90.0`, `116.0`, `140.0`, `162.0`, `182.0`, `200.0`, `216.0`, `230.0`, `242.0`, `252.0`, `260.0`,
18124	`266.0`, `270.0`, `272.0`, `272.0`, `270.0`, `266.0`, `260.0`, `252.0`, `242.0`, `230.0`, `216.0`, `200.0`,
18125	`182.0`, `162.0`, `140.0`, `116.0`, `90.0`, `62.0`, `32.0`,
18126	);
18127	assert_eq_m512h(r, e);
18128	}
18129
18130	#[simd_test(enable = "avx512fp16")]
18131	unsafe fn test_mm512_mask_mul_ph() {
18132	let a = _mm512_set_ph(
18133	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18134	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18135	`31.0`, `32.0`,
18136	);
18137	let b = _mm512_set_ph(
18138	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18139	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18140	`3.0`, `2.0`, `1.0`,
18141	);
18142	let src = _mm512_set_ph(
18143	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
18144	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
18145	);
18146	let r = _mm512_mask_mul_ph(src, `0b01010101010101010101010101010101`, a, b);
18147	let e = _mm512_set_ph(
18148	`34.`, `62.`, `36.`, `116.`, `38.`, `162.`, `40.`, `200.`, `42.`, `230.`, `44.`, `252.`, `46.`, `266.`, `48.`, `272.`,
18149	`50.`, `270.`, `52.`, `260.`, `54.`, `242.`, `56.`, `216.`, `58.`, `182.`, `60.`, `140.`, `62.`, `90.`, `64.`, `32.`,
18150	);
18151	assert_eq_m512h(r, e);
18152	}
18153
18154	#[simd_test(enable = "avx512fp16")]
18155	unsafe fn test_mm512_maskz_mul_ph() {
18156	let a = _mm512_set_ph(
18157	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18158	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18159	`31.0`, `32.0`,
18160	);
18161	let b = _mm512_set_ph(
18162	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18163	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18164	`3.0`, `2.0`, `1.0`,
18165	);
18166	let r = _mm512_maskz_mul_ph(`0b01010101010101010101010101010101`, a, b);
18167	let e = _mm512_set_ph(
18168	`0.`, `62.`, `0.`, `116.`, `0.`, `162.`, `0.`, `200.`, `0.`, `230.`, `0.`, `252.`, `0.`, `266.`, `0.`, `272.`, `0.`,
18169	`270.`, `0.`, `260.`, `0.`, `242.`, `0.`, `216.`, `0.`, `182.`, `0.`, `140.`, `0.`, `90.`, `0.`, `32.`,
18170	);
18171	assert_eq_m512h(r, e);
18172	}
18173
18174	#[simd_test(enable = "avx512fp16")]
18175	unsafe fn test_mm512_mul_round_ph() {
18176	let a = _mm512_set_ph(
18177	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18178	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18179	`31.0`, `32.0`,
18180	);
18181	let b = _mm512_set_ph(
18182	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18183	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18184	`3.0`, `2.0`, `1.0`,
18185	);
18186	let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18187	let e = _mm512_set_ph(
18188	`32.0`, `62.0`, `90.0`, `116.0`, `140.0`, `162.0`, `182.0`, `200.0`, `216.0`, `230.0`, `242.0`, `252.0`, `260.0`,
18189	`266.0`, `270.0`, `272.0`, `272.0`, `270.0`, `266.0`, `260.0`, `252.0`, `242.0`, `230.0`, `216.0`, `200.0`,
18190	`182.0`, `162.0`, `140.0`, `116.0`, `90.0`, `62.0`, `32.0`,
18191	);
18192	assert_eq_m512h(r, e);
18193	}
18194
18195	#[simd_test(enable = "avx512fp16")]
18196	unsafe fn test_mm512_mask_mul_round_ph() {
18197	let a = _mm512_set_ph(
18198	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18199	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18200	`31.0`, `32.0`,
18201	);
18202	let b = _mm512_set_ph(
18203	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18204	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18205	`3.0`, `2.0`, `1.0`,
18206	);
18207	let src = _mm512_set_ph(
18208	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
18209	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
18210	);
18211	let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18212	src,
18213	`0b01010101010101010101010101010101`,
18214	a,
18215	b,
18216	);
18217	let e = _mm512_set_ph(
18218	`34.`, `62.`, `36.`, `116.`, `38.`, `162.`, `40.`, `200.`, `42.`, `230.`, `44.`, `252.`, `46.`, `266.`, `48.`, `272.`,
18219	`50.`, `270.`, `52.`, `260.`, `54.`, `242.`, `56.`, `216.`, `58.`, `182.`, `60.`, `140.`, `62.`, `90.`, `64.`, `32.`,
18220	);
18221	assert_eq_m512h(r, e);
18222	}
18223
18224	#[simd_test(enable = "avx512fp16")]
18225	unsafe fn test_mm512_maskz_mul_round_ph() {
18226	let a = _mm512_set_ph(
18227	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18228	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18229	`31.0`, `32.0`,
18230	);
18231	let b = _mm512_set_ph(
18232	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18233	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18234	`3.0`, `2.0`, `1.0`,
18235	);
18236	let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18237	`0b01010101010101010101010101010101`,
18238	a,
18239	b,
18240	);
18241	let e = _mm512_set_ph(
18242	`0.`, `62.`, `0.`, `116.`, `0.`, `162.`, `0.`, `200.`, `0.`, `230.`, `0.`, `252.`, `0.`, `266.`, `0.`, `272.`, `0.`,
18243	`270.`, `0.`, `260.`, `0.`, `242.`, `0.`, `216.`, `0.`, `182.`, `0.`, `140.`, `0.`, `90.`, `0.`, `32.`,
18244	);
18245	assert_eq_m512h(r, e);
18246	}
18247
18248	#[simd_test(enable = "avx512fp16")]
18249	unsafe fn test_mm_mul_round_sh() {
18250	let a = _mm_set_sh(`1.0`);
18251	let b = _mm_set_sh(`2.0`);
18252	let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18253	let e = _mm_set_sh(`2.0`);
18254	assert_eq_m128h(r, e);
18255	}
18256
18257	#[simd_test(enable = "avx512fp16")]
18258	unsafe fn test_mm_mask_mul_round_sh() {
18259	let a = _mm_set_sh(`1.0`);
18260	let b = _mm_set_sh(`2.0`);
18261	let src = _mm_set_sh(`4.0`);
18262	let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18263	src, `0`, a, b,
18264	);
18265	let e = _mm_set_sh(`4.0`);
18266	assert_eq_m128h(r, e);
18267	let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18268	src, `1`, a, b,
18269	);
18270	let e = _mm_set_sh(`2.0`);
18271	assert_eq_m128h(r, e);
18272	}
18273
18274	#[simd_test(enable = "avx512fp16")]
18275	unsafe fn test_mm_maskz_mul_round_sh() {
18276	let a = _mm_set_sh(`1.0`);
18277	let b = _mm_set_sh(`2.0`);
18278	let r =
18279	_mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18280	let e = _mm_set_sh(`0.0`);
18281	assert_eq_m128h(r, e);
18282	let r =
18283	_mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
18284	let e = _mm_set_sh(`2.0`);
18285	assert_eq_m128h(r, e);
18286	}
18287
18288	#[simd_test(enable = "avx512fp16")]
18289	unsafe fn test_mm_mul_sh() {
18290	let a = _mm_set_sh(`1.0`);
18291	let b = _mm_set_sh(`2.0`);
18292	let r = _mm_mul_sh(a, b);
18293	let e = _mm_set_sh(`2.0`);
18294	assert_eq_m128h(r, e);
18295	}
18296
18297	#[simd_test(enable = "avx512fp16")]
18298	unsafe fn test_mm_mask_mul_sh() {
18299	let a = _mm_set_sh(`1.0`);
18300	let b = _mm_set_sh(`2.0`);
18301	let src = _mm_set_sh(`4.0`);
18302	let r = _mm_mask_mul_sh(src, `0`, a, b);
18303	let e = _mm_set_sh(`4.0`);
18304	assert_eq_m128h(r, e);
18305	let r = _mm_mask_mul_sh(src, `1`, a, b);
18306	let e = _mm_set_sh(`2.0`);
18307	assert_eq_m128h(r, e);
18308	}
18309
18310	#[simd_test(enable = "avx512fp16")]
18311	unsafe fn test_mm_maskz_mul_sh() {
18312	let a = _mm_set_sh(`1.0`);
18313	let b = _mm_set_sh(`2.0`);
18314	let r = _mm_maskz_mul_sh(`0`, a, b);
18315	let e = _mm_set_sh(`0.0`);
18316	assert_eq_m128h(r, e);
18317	let r = _mm_maskz_mul_sh(`1`, a, b);
18318	let e = _mm_set_sh(`2.0`);
18319	assert_eq_m128h(r, e);
18320	}
18321
18322	#[simd_test(enable = "avx512fp16,avx512vl")]
18323	unsafe fn test_mm_div_ph() {
18324	let a = _mm_set1_ph(`1.0`);
18325	let b = _mm_set1_ph(`2.0`);
18326	let r = _mm_div_ph(a, b);
18327	let e = _mm_set1_ph(`0.5`);
18328	assert_eq_m128h(r, e);
18329	}
18330
18331	#[simd_test(enable = "avx512fp16,avx512vl")]
18332	unsafe fn test_mm_mask_div_ph() {
18333	let a = _mm_set1_ph(`1.0`);
18334	let b = _mm_set1_ph(`2.0`);
18335	let src = _mm_set_ph(`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`);
18336	let r = _mm_mask_div_ph(src, `0b01010101`, a, b);
18337	let e = _mm_set_ph(`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`);
18338	assert_eq_m128h(r, e);
18339	}
18340
18341	#[simd_test(enable = "avx512fp16,avx512vl")]
18342	unsafe fn test_mm_maskz_div_ph() {
18343	let a = _mm_set1_ph(`1.0`);
18344	let b = _mm_set1_ph(`2.0`);
18345	let r = _mm_maskz_div_ph(`0b01010101`, a, b);
18346	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
18347	assert_eq_m128h(r, e);
18348	}
18349
18350	#[simd_test(enable = "avx512fp16,avx512vl")]
18351	unsafe fn test_mm256_div_ph() {
18352	let a = _mm256_set1_ph(`1.0`);
18353	let b = _mm256_set1_ph(`2.0`);
18354	let r = _mm256_div_ph(a, b);
18355	let e = _mm256_set1_ph(`0.5`);
18356	assert_eq_m256h(r, e);
18357	}
18358
18359	#[simd_test(enable = "avx512fp16,avx512vl")]
18360	unsafe fn test_mm256_mask_div_ph() {
18361	let a = _mm256_set1_ph(`1.0`);
18362	let b = _mm256_set1_ph(`2.0`);
18363	let src = _mm256_set_ph(
18364	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18365	`19.0`,
18366	);
18367	let r = _mm256_mask_div_ph(src, `0b0101010101010101`, a, b);
18368	let e = _mm256_set_ph(
18369	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18370	);
18371	assert_eq_m256h(r, e);
18372	}
18373
18374	#[simd_test(enable = "avx512fp16,avx512vl")]
18375	unsafe fn test_mm256_maskz_div_ph() {
18376	let a = _mm256_set1_ph(`1.0`);
18377	let b = _mm256_set1_ph(`2.0`);
18378	let r = _mm256_maskz_div_ph(`0b0101010101010101`, a, b);
18379	let e = _mm256_set_ph(
18380	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18381	);
18382	assert_eq_m256h(r, e);
18383	}
18384
18385	#[simd_test(enable = "avx512fp16")]
18386	unsafe fn test_mm512_div_ph() {
18387	let a = _mm512_set1_ph(`1.0`);
18388	let b = _mm512_set1_ph(`2.0`);
18389	let r = _mm512_div_ph(a, b);
18390	let e = _mm512_set1_ph(`0.5`);
18391	assert_eq_m512h(r, e);
18392	}
18393
18394	#[simd_test(enable = "avx512fp16")]
18395	unsafe fn test_mm512_mask_div_ph() {
18396	let a = _mm512_set1_ph(`1.0`);
18397	let b = _mm512_set1_ph(`2.0`);
18398	let src = _mm512_set_ph(
18399	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18400	`19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`, `32.0`,
18401	`33.0`, `34.0`, `35.0`,
18402	);
18403	let r = _mm512_mask_div_ph(src, `0b01010101010101010101010101010101`, a, b);
18404	let e = _mm512_set_ph(
18405	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18406	`20.0`, `0.5`, `22.0`, `0.5`, `24.0`, `0.5`, `26.0`, `0.5`, `28.0`, `0.5`, `30.0`, `0.5`, `32.0`, `0.5`, `34.0`, `0.5`,
18407	);
18408	assert_eq_m512h(r, e);
18409	}
18410
18411	#[simd_test(enable = "avx512fp16")]
18412	unsafe fn test_mm512_maskz_div_ph() {
18413	let a = _mm512_set1_ph(`1.0`);
18414	let b = _mm512_set1_ph(`2.0`);
18415	let r = _mm512_maskz_div_ph(`0b01010101010101010101010101010101`, a, b);
18416	let e = _mm512_set_ph(
18417	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
18418	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18419	);
18420	assert_eq_m512h(r, e);
18421	}
18422
18423	#[simd_test(enable = "avx512fp16")]
18424	unsafe fn test_mm512_div_round_ph() {
18425	let a = _mm512_set1_ph(`1.0`);
18426	let b = _mm512_set1_ph(`2.0`);
18427	let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18428	let e = _mm512_set1_ph(`0.5`);
18429	assert_eq_m512h(r, e);
18430	}
18431
18432	#[simd_test(enable = "avx512fp16")]
18433	unsafe fn test_mm512_mask_div_round_ph() {
18434	let a = _mm512_set1_ph(`1.0`);
18435	let b = _mm512_set1_ph(`2.0`);
18436	let src = _mm512_set_ph(
18437	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18438	`19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`, `32.0`,
18439	`33.0`, `34.0`, `35.0`,
18440	);
18441	let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18442	src,
18443	`0b01010101010101010101010101010101`,
18444	a,
18445	b,
18446	);
18447	let e = _mm512_set_ph(
18448	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18449	`20.0`, `0.5`, `22.0`, `0.5`, `24.0`, `0.5`, `26.0`, `0.5`, `28.0`, `0.5`, `30.0`, `0.5`, `32.0`, `0.5`, `34.0`, `0.5`,
18450	);
18451	assert_eq_m512h(r, e);
18452	}
18453
18454	#[simd_test(enable = "avx512fp16")]
18455	unsafe fn test_mm512_maskz_div_round_ph() {
18456	let a = _mm512_set1_ph(`1.0`);
18457	let b = _mm512_set1_ph(`2.0`);
18458	let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18459	`0b01010101010101010101010101010101`,
18460	a,
18461	b,
18462	);
18463	let e = _mm512_set_ph(
18464	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
18465	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18466	);
18467	assert_eq_m512h(r, e);
18468	}
18469
18470	#[simd_test(enable = "avx512fp16")]
18471	unsafe fn test_mm_div_round_sh() {
18472	let a = _mm_set_sh(`1.0`);
18473	let b = _mm_set_sh(`2.0`);
18474	let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18475	let e = _mm_set_sh(`0.5`);
18476	assert_eq_m128h(r, e);
18477	}
18478
18479	#[simd_test(enable = "avx512fp16")]
18480	unsafe fn test_mm_mask_div_round_sh() {
18481	let a = _mm_set_sh(`1.0`);
18482	let b = _mm_set_sh(`2.0`);
18483	let src = _mm_set_sh(`4.0`);
18484	let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18485	src, `0`, a, b,
18486	);
18487	let e = _mm_set_sh(`4.0`);
18488	assert_eq_m128h(r, e);
18489	let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18490	src, `1`, a, b,
18491	);
18492	let e = _mm_set_sh(`0.5`);
18493	assert_eq_m128h(r, e);
18494	}
18495
18496	#[simd_test(enable = "avx512fp16")]
18497	unsafe fn test_mm_maskz_div_round_sh() {
18498	let a = _mm_set_sh(`1.0`);
18499	let b = _mm_set_sh(`2.0`);
18500	let r =
18501	_mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18502	let e = _mm_set_sh(`0.0`);
18503	assert_eq_m128h(r, e);
18504	let r =
18505	_mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
18506	let e = _mm_set_sh(`0.5`);
18507	assert_eq_m128h(r, e);
18508	}
18509
18510	#[simd_test(enable = "avx512fp16")]
18511	unsafe fn test_mm_div_sh() {
18512	let a = _mm_set_sh(`1.0`);
18513	let b = _mm_set_sh(`2.0`);
18514	let r = _mm_div_sh(a, b);
18515	let e = _mm_set_sh(`0.5`);
18516	assert_eq_m128h(r, e);
18517	}
18518
18519	#[simd_test(enable = "avx512fp16")]
18520	unsafe fn test_mm_mask_div_sh() {
18521	let a = _mm_set_sh(`1.0`);
18522	let b = _mm_set_sh(`2.0`);
18523	let src = _mm_set_sh(`4.0`);
18524	let r = _mm_mask_div_sh(src, `0`, a, b);
18525	let e = _mm_set_sh(`4.0`);
18526	assert_eq_m128h(r, e);
18527	let r = _mm_mask_div_sh(src, `1`, a, b);
18528	let e = _mm_set_sh(`0.5`);
18529	assert_eq_m128h(r, e);
18530	}
18531
18532	#[simd_test(enable = "avx512fp16")]
18533	unsafe fn test_mm_maskz_div_sh() {
18534	let a = _mm_set_sh(`1.0`);
18535	let b = _mm_set_sh(`2.0`);
18536	let r = _mm_maskz_div_sh(`0`, a, b);
18537	let e = _mm_set_sh(`0.0`);
18538	assert_eq_m128h(r, e);
18539	let r = _mm_maskz_div_sh(`1`, a, b);
18540	let e = _mm_set_sh(`0.5`);
18541	assert_eq_m128h(r, e);
18542	}
18543
18544	#[simd_test(enable = "avx512fp16,avx512vl")]
18545	unsafe fn test_mm_mul_pch() {
18546	let a = _mm_set1_pch(`0.0`, `1.0`);
18547	let b = _mm_set1_pch(`0.0`, `1.0`);
18548	let r = _mm_mul_pch(a, b);
18549	let e = _mm_set1_pch(`-1.0`, `0.0`);
18550	assert_eq_m128h(r, e);
18551	}
18552
18553	#[simd_test(enable = "avx512fp16,avx512vl")]
18554	unsafe fn test_mm_mask_mul_pch() {
18555	let a = _mm_set1_pch(`0.0`, `1.0`);
18556	let b = _mm_set1_pch(`0.0`, `1.0`);
18557	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18558	let r = _mm_mask_mul_pch(src, `0b0101`, a, b);
18559	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18560	assert_eq_m128h(r, e);
18561	}
18562
18563	#[simd_test(enable = "avx512fp16,avx512vl")]
18564	unsafe fn test_mm_maskz_mul_pch() {
18565	let a = _mm_set1_pch(`0.0`, `1.0`);
18566	let b = _mm_set1_pch(`0.0`, `1.0`);
18567	let r = _mm_maskz_mul_pch(`0b0101`, a, b);
18568	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18569	assert_eq_m128h(r, e);
18570	}
18571
18572	#[simd_test(enable = "avx512fp16,avx512vl")]
18573	unsafe fn test_mm256_mul_pch() {
18574	let a = _mm256_set1_pch(`0.0`, `1.0`);
18575	let b = _mm256_set1_pch(`0.0`, `1.0`);
18576	let r = _mm256_mul_pch(a, b);
18577	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18578	assert_eq_m256h(r, e);
18579	}
18580
18581	#[simd_test(enable = "avx512fp16,avx512vl")]
18582	unsafe fn test_mm256_mask_mul_pch() {
18583	let a = _mm256_set1_pch(`0.0`, `1.0`);
18584	let b = _mm256_set1_pch(`0.0`, `1.0`);
18585	let src = _mm256_setr_ph(
18586	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18587	);
18588	let r = _mm256_mask_mul_pch(src, `0b01010101`, a, b);
18589	let e = _mm256_setr_ph(
18590	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18591	);
18592	assert_eq_m256h(r, e);
18593	}
18594
18595	#[simd_test(enable = "avx512fp16,avx512vl")]
18596	unsafe fn test_mm256_maskz_mul_pch() {
18597	let a = _mm256_set1_pch(`0.0`, `1.0`);
18598	let b = _mm256_set1_pch(`0.0`, `1.0`);
18599	let r = _mm256_maskz_mul_pch(`0b01010101`, a, b);
18600	let e = _mm256_setr_ph(
18601	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18602	);
18603	assert_eq_m256h(r, e);
18604	}
18605
18606	#[simd_test(enable = "avx512fp16")]
18607	unsafe fn test_mm512_mul_pch() {
18608	let a = _mm512_set1_pch(`0.0`, `1.0`);
18609	let b = _mm512_set1_pch(`0.0`, `1.0`);
18610	let r = _mm512_mul_pch(a, b);
18611	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18612	assert_eq_m512h(r, e);
18613	}
18614
18615	#[simd_test(enable = "avx512fp16")]
18616	unsafe fn test_mm512_mask_mul_pch() {
18617	let a = _mm512_set1_pch(`0.0`, `1.0`);
18618	let b = _mm512_set1_pch(`0.0`, `1.0`);
18619	let src = _mm512_setr_ph(
18620	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18621	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18622	`32.0`, `33.0`,
18623	);
18624	let r = _mm512_mask_mul_pch(src, `0b0101010101010101`, a, b);
18625	let e = _mm512_setr_ph(
18626	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18627	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18628	`33.0`,
18629	);
18630	assert_eq_m512h(r, e);
18631	}
18632
18633	#[simd_test(enable = "avx512fp16")]
18634	unsafe fn test_mm512_maskz_mul_pch() {
18635	let a = _mm512_set1_pch(`0.0`, `1.0`);
18636	let b = _mm512_set1_pch(`0.0`, `1.0`);
18637	let r = _mm512_maskz_mul_pch(`0b0101010101010101`, a, b);
18638	let e = _mm512_setr_ph(
18639	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18640	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18641	);
18642	assert_eq_m512h(r, e);
18643	}
18644
18645	#[simd_test(enable = "avx512fp16")]
18646	unsafe fn test_mm512_mul_round_pch() {
18647	let a = _mm512_set1_pch(`0.0`, `1.0`);
18648	let b = _mm512_set1_pch(`0.0`, `1.0`);
18649	let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18650	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18651	assert_eq_m512h(r, e);
18652	}
18653
18654	#[simd_test(enable = "avx512fp16")]
18655	unsafe fn test_mm512_mask_mul_round_pch() {
18656	let a = _mm512_set1_pch(`0.0`, `1.0`);
18657	let b = _mm512_set1_pch(`0.0`, `1.0`);
18658	let src = _mm512_setr_ph(
18659	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18660	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18661	`32.0`, `33.0`,
18662	);
18663	let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18664	src,
18665	`0b0101010101010101`,
18666	a,
18667	b,
18668	);
18669	let e = _mm512_setr_ph(
18670	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18671	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18672	`33.0`,
18673	);
18674	assert_eq_m512h(r, e);
18675	}
18676
18677	#[simd_test(enable = "avx512fp16")]
18678	unsafe fn test_mm512_maskz_mul_round_pch() {
18679	let a = _mm512_set1_pch(`0.0`, `1.0`);
18680	let b = _mm512_set1_pch(`0.0`, `1.0`);
18681	let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18682	`0b0101010101010101`,
18683	a,
18684	b,
18685	);
18686	let e = _mm512_setr_ph(
18687	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18688	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18689	);
18690	assert_eq_m512h(r, e);
18691	}
18692
18693	#[simd_test(enable = "avx512fp16")]
18694	unsafe fn test_mm_mul_round_sch() {
18695	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18696	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18697	let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18698	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18699	assert_eq_m128h(r, e);
18700	}
18701
18702	#[simd_test(enable = "avx512fp16")]
18703	unsafe fn test_mm_mask_mul_round_sch() {
18704	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18705	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18706	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18707	let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18708	src, `0`, a, b,
18709	);
18710	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18711	assert_eq_m128h(r, e);
18712	}
18713
18714	#[simd_test(enable = "avx512fp16")]
18715	unsafe fn test_mm_maskz_mul_round_sch() {
18716	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18717	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18718	let r =
18719	_mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18720	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18721	assert_eq_m128h(r, e);
18722	}
18723
18724	#[simd_test(enable = "avx512fp16")]
18725	unsafe fn test_mm_mul_sch() {
18726	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18727	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18728	let r = _mm_mul_sch(a, b);
18729	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18730	assert_eq_m128h(r, e);
18731	}
18732
18733	#[simd_test(enable = "avx512fp16")]
18734	unsafe fn test_mm_mask_mul_sch() {
18735	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18736	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18737	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18738	let r = _mm_mask_mul_sch(src, `0`, a, b);
18739	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18740	assert_eq_m128h(r, e);
18741	}
18742
18743	#[simd_test(enable = "avx512fp16")]
18744	unsafe fn test_mm_maskz_mul_sch() {
18745	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18746	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18747	let r = _mm_maskz_mul_sch(`0`, a, b);
18748	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18749	assert_eq_m128h(r, e);
18750	}
18751
18752	#[simd_test(enable = "avx512fp16,avx512vl")]
18753	unsafe fn test_mm_fmul_pch() {
18754	let a = _mm_set1_pch(`0.0`, `1.0`);
18755	let b = _mm_set1_pch(`0.0`, `1.0`);
18756	let r = _mm_fmul_pch(a, b);
18757	let e = _mm_set1_pch(`-1.0`, `0.0`);
18758	assert_eq_m128h(r, e);
18759	}
18760
18761	#[simd_test(enable = "avx512fp16,avx512vl")]
18762	unsafe fn test_mm_mask_fmul_pch() {
18763	let a = _mm_set1_pch(`0.0`, `1.0`);
18764	let b = _mm_set1_pch(`0.0`, `1.0`);
18765	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18766	let r = _mm_mask_fmul_pch(src, `0b0101`, a, b);
18767	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18768	assert_eq_m128h(r, e);
18769	}
18770
18771	#[simd_test(enable = "avx512fp16,avx512vl")]
18772	unsafe fn test_mm_maskz_fmul_pch() {
18773	let a = _mm_set1_pch(`0.0`, `1.0`);
18774	let b = _mm_set1_pch(`0.0`, `1.0`);
18775	let r = _mm_maskz_fmul_pch(`0b0101`, a, b);
18776	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18777	assert_eq_m128h(r, e);
18778	}
18779
18780	#[simd_test(enable = "avx512fp16,avx512vl")]
18781	unsafe fn test_mm256_fmul_pch() {
18782	let a = _mm256_set1_pch(`0.0`, `1.0`);
18783	let b = _mm256_set1_pch(`0.0`, `1.0`);
18784	let r = _mm256_fmul_pch(a, b);
18785	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18786	assert_eq_m256h(r, e);
18787	}
18788
18789	#[simd_test(enable = "avx512fp16,avx512vl")]
18790	unsafe fn test_mm256_mask_fmul_pch() {
18791	let a = _mm256_set1_pch(`0.0`, `1.0`);
18792	let b = _mm256_set1_pch(`0.0`, `1.0`);
18793	let src = _mm256_setr_ph(
18794	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18795	);
18796	let r = _mm256_mask_fmul_pch(src, `0b01010101`, a, b);
18797	let e = _mm256_setr_ph(
18798	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18799	);
18800	assert_eq_m256h(r, e);
18801	}
18802
18803	#[simd_test(enable = "avx512fp16,avx512vl")]
18804	unsafe fn test_mm256_maskz_fmul_pch() {
18805	let a = _mm256_set1_pch(`0.0`, `1.0`);
18806	let b = _mm256_set1_pch(`0.0`, `1.0`);
18807	let r = _mm256_maskz_fmul_pch(`0b01010101`, a, b);
18808	let e = _mm256_setr_ph(
18809	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18810	);
18811	assert_eq_m256h(r, e);
18812	}
18813
18814	#[simd_test(enable = "avx512fp16")]
18815	unsafe fn test_mm512_fmul_pch() {
18816	let a = _mm512_set1_pch(`0.0`, `1.0`);
18817	let b = _mm512_set1_pch(`0.0`, `1.0`);
18818	let r = _mm512_fmul_pch(a, b);
18819	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18820	assert_eq_m512h(r, e);
18821	}
18822
18823	#[simd_test(enable = "avx512fp16")]
18824	unsafe fn test_mm512_mask_fmul_pch() {
18825	let a = _mm512_set1_pch(`0.0`, `1.0`);
18826	let b = _mm512_set1_pch(`0.0`, `1.0`);
18827	let src = _mm512_setr_ph(
18828	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18829	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18830	`32.0`, `33.0`,
18831	);
18832	let r = _mm512_mask_fmul_pch(src, `0b0101010101010101`, a, b);
18833	let e = _mm512_setr_ph(
18834	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18835	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18836	`33.0`,
18837	);
18838	assert_eq_m512h(r, e);
18839	}
18840
18841	#[simd_test(enable = "avx512fp16")]
18842	unsafe fn test_mm512_maskz_fmul_pch() {
18843	let a = _mm512_set1_pch(`0.0`, `1.0`);
18844	let b = _mm512_set1_pch(`0.0`, `1.0`);
18845	let r = _mm512_maskz_fmul_pch(`0b0101010101010101`, a, b);
18846	let e = _mm512_setr_ph(
18847	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18848	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18849	);
18850	assert_eq_m512h(r, e);
18851	}
18852
18853	#[simd_test(enable = "avx512fp16")]
18854	unsafe fn test_mm512_fmul_round_pch() {
18855	let a = _mm512_set1_pch(`0.0`, `1.0`);
18856	let b = _mm512_set1_pch(`0.0`, `1.0`);
18857	let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18858	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18859	assert_eq_m512h(r, e);
18860	}
18861
18862	#[simd_test(enable = "avx512fp16")]
18863	unsafe fn test_mm512_mask_fmul_round_pch() {
18864	let a = _mm512_set1_pch(`0.0`, `1.0`);
18865	let b = _mm512_set1_pch(`0.0`, `1.0`);
18866	let src = _mm512_setr_ph(
18867	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18868	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18869	`32.0`, `33.0`,
18870	);
18871	let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18872	src,
18873	`0b0101010101010101`,
18874	a,
18875	b,
18876	);
18877	let e = _mm512_setr_ph(
18878	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18879	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18880	`33.0`,
18881	);
18882	assert_eq_m512h(r, e);
18883	}
18884
18885	#[simd_test(enable = "avx512fp16")]
18886	unsafe fn test_mm512_maskz_fmul_round_pch() {
18887	let a = _mm512_set1_pch(`0.0`, `1.0`);
18888	let b = _mm512_set1_pch(`0.0`, `1.0`);
18889	let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18890	`0b0101010101010101`,
18891	a,
18892	b,
18893	);
18894	let e = _mm512_setr_ph(
18895	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18896	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18897	);
18898	assert_eq_m512h(r, e);
18899	}
18900
18901	#[simd_test(enable = "avx512fp16")]
18902	unsafe fn test_mm_fmul_round_sch() {
18903	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18904	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18905	let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18906	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18907	assert_eq_m128h(r, e);
18908	}
18909
18910	#[simd_test(enable = "avx512fp16")]
18911	unsafe fn test_mm_mask_fmul_round_sch() {
18912	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18913	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18914	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18915	let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18916	src, `0`, a, b,
18917	);
18918	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18919	assert_eq_m128h(r, e);
18920	}
18921
18922	#[simd_test(enable = "avx512fp16")]
18923	unsafe fn test_mm_maskz_fmul_round_sch() {
18924	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18925	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18926	let r =
18927	_mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18928	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18929	assert_eq_m128h(r, e);
18930	}
18931
18932	#[simd_test(enable = "avx512fp16")]
18933	unsafe fn test_mm_fmul_sch() {
18934	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18935	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18936	let r = _mm_fmul_sch(a, b);
18937	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18938	assert_eq_m128h(r, e);
18939	}
18940
18941	#[simd_test(enable = "avx512fp16")]
18942	unsafe fn test_mm_mask_fmul_sch() {
18943	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18944	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18945	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18946	let r = _mm_mask_fmul_sch(src, `0`, a, b);
18947	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18948	assert_eq_m128h(r, e);
18949	}
18950
18951	#[simd_test(enable = "avx512fp16")]
18952	unsafe fn test_mm_maskz_fmul_sch() {
18953	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18954	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18955	let r = _mm_maskz_fmul_sch(`0`, a, b);
18956	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18957	assert_eq_m128h(r, e);
18958	}
18959
18960	#[simd_test(enable = "avx512fp16,avx512vl")]
18961	unsafe fn test_mm_cmul_pch() {
18962	let a = _mm_set1_pch(`0.0`, `1.0`);
18963	let b = _mm_set1_pch(`0.0`, `-1.0`);
18964	let r = _mm_cmul_pch(a, b);
18965	let e = _mm_set1_pch(`-1.0`, `0.0`);
18966	assert_eq_m128h(r, e);
18967	}
18968
18969	#[simd_test(enable = "avx512fp16,avx512vl")]
18970	unsafe fn test_mm_mask_cmul_pch() {
18971	let a = _mm_set1_pch(`0.0`, `1.0`);
18972	let b = _mm_set1_pch(`0.0`, `-1.0`);
18973	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18974	let r = _mm_mask_cmul_pch(src, `0b0101`, a, b);
18975	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18976	assert_eq_m128h(r, e);
18977	}
18978
18979	#[simd_test(enable = "avx512fp16,avx512vl")]
18980	unsafe fn test_mm_maskz_cmul_pch() {
18981	let a = _mm_set1_pch(`0.0`, `1.0`);
18982	let b = _mm_set1_pch(`0.0`, `-1.0`);
18983	let r = _mm_maskz_cmul_pch(`0b0101`, a, b);
18984	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18985	assert_eq_m128h(r, e);
18986	}
18987
18988	#[simd_test(enable = "avx512fp16,avx512vl")]
18989	unsafe fn test_mm256_cmul_pch() {
18990	let a = _mm256_set1_pch(`0.0`, `1.0`);
18991	let b = _mm256_set1_pch(`0.0`, `-1.0`);
18992	let r = _mm256_cmul_pch(a, b);
18993	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18994	assert_eq_m256h(r, e);
18995	}
18996
18997	#[simd_test(enable = "avx512fp16,avx512vl")]
18998	unsafe fn test_mm256_mask_cmul_pch() {
18999	let a = _mm256_set1_pch(`0.0`, `1.0`);
19000	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19001	let src = _mm256_setr_ph(
19002	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19003	);
19004	let r = _mm256_mask_cmul_pch(src, `0b01010101`, a, b);
19005	let e = _mm256_setr_ph(
19006	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19007	);
19008	assert_eq_m256h(r, e);
19009	}
19010
19011	#[simd_test(enable = "avx512fp16,avx512vl")]
19012	unsafe fn test_mm256_maskz_cmul_pch() {
19013	let a = _mm256_set1_pch(`0.0`, `1.0`);
19014	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19015	let r = _mm256_maskz_cmul_pch(`0b01010101`, a, b);
19016	let e = _mm256_setr_ph(
19017	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19018	);
19019	assert_eq_m256h(r, e);
19020	}
19021
19022	#[simd_test(enable = "avx512fp16")]
19023	unsafe fn test_mm512_cmul_pch() {
19024	let a = _mm512_set1_pch(`0.0`, `1.0`);
19025	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19026	let r = _mm512_cmul_pch(a, b);
19027	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19028	assert_eq_m512h(r, e);
19029	}
19030
19031	#[simd_test(enable = "avx512fp16")]
19032	unsafe fn test_mm512_mask_cmul_pch() {
19033	let a = _mm512_set1_pch(`0.0`, `1.0`);
19034	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19035	let src = _mm512_setr_ph(
19036	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19037	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19038	`32.0`, `33.0`,
19039	);
19040	let r = _mm512_mask_cmul_pch(src, `0b0101010101010101`, a, b);
19041	let e = _mm512_setr_ph(
19042	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19043	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19044	`33.0`,
19045	);
19046	assert_eq_m512h(r, e);
19047	}
19048
19049	#[simd_test(enable = "avx512fp16")]
19050	unsafe fn test_mm512_maskz_cmul_pch() {
19051	let a = _mm512_set1_pch(`0.0`, `1.0`);
19052	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19053	let r = _mm512_maskz_cmul_pch(`0b0101010101010101`, a, b);
19054	let e = _mm512_setr_ph(
19055	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19056	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19057	);
19058	assert_eq_m512h(r, e);
19059	}
19060
19061	#[simd_test(enable = "avx512fp16")]
19062	unsafe fn test_mm512_cmul_round_pch() {
19063	let a = _mm512_set1_pch(`0.0`, `1.0`);
19064	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19065	let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19066	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19067	assert_eq_m512h(r, e);
19068	}
19069
19070	#[simd_test(enable = "avx512fp16")]
19071	unsafe fn test_mm512_mask_cmul_round_pch() {
19072	let a = _mm512_set1_pch(`0.0`, `1.0`);
19073	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19074	let src = _mm512_setr_ph(
19075	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19076	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19077	`32.0`, `33.0`,
19078	);
19079	let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19080	src,
19081	`0b0101010101010101`,
19082	a,
19083	b,
19084	);
19085	let e = _mm512_setr_ph(
19086	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19087	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19088	`33.0`,
19089	);
19090	assert_eq_m512h(r, e);
19091	}
19092
19093	#[simd_test(enable = "avx512fp16")]
19094	unsafe fn test_mm512_maskz_cmul_round_pch() {
19095	let a = _mm512_set1_pch(`0.0`, `1.0`);
19096	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19097	let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19098	`0b0101010101010101`,
19099	a,
19100	b,
19101	);
19102	let e = _mm512_setr_ph(
19103	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19104	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19105	);
19106	assert_eq_m512h(r, e);
19107	}
19108
19109	#[simd_test(enable = "avx512fp16")]
19110	unsafe fn test_mm_cmul_sch() {
19111	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19112	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19113	let r = _mm_cmul_sch(a, b);
19114	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19115	assert_eq_m128h(r, e);
19116	}
19117
19118	#[simd_test(enable = "avx512fp16")]
19119	unsafe fn test_mm_mask_cmul_sch() {
19120	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19121	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19122	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19123	let r = _mm_mask_cmul_sch(src, `0`, a, b);
19124	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19125	assert_eq_m128h(r, e);
19126	}
19127
19128	#[simd_test(enable = "avx512fp16")]
19129	unsafe fn test_mm_maskz_cmul_sch() {
19130	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19131	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19132	let r = _mm_maskz_cmul_sch(`0`, a, b);
19133	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19134	assert_eq_m128h(r, e);
19135	}
19136
19137	#[simd_test(enable = "avx512fp16")]
19138	unsafe fn test_mm_cmul_round_sch() {
19139	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19140	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19141	let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19142	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19143	assert_eq_m128h(r, e);
19144	}
19145
19146	#[simd_test(enable = "avx512fp16")]
19147	unsafe fn test_mm_mask_cmul_round_sch() {
19148	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19149	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19150	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19151	let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19152	src, `0`, a, b,
19153	);
19154	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19155	assert_eq_m128h(r, e);
19156	}
19157
19158	#[simd_test(enable = "avx512fp16")]
19159	unsafe fn test_mm_maskz_cmul_round_sch() {
19160	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19161	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19162	let r =
19163	_mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
19164	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19165	assert_eq_m128h(r, e);
19166	}
19167
19168	#[simd_test(enable = "avx512fp16,avx512vl")]
19169	unsafe fn test_mm_fcmul_pch() {
19170	let a = _mm_set1_pch(`0.0`, `1.0`);
19171	let b = _mm_set1_pch(`0.0`, `-1.0`);
19172	let r = _mm_fcmul_pch(a, b);
19173	let e = _mm_set1_pch(`-1.0`, `0.0`);
19174	assert_eq_m128h(r, e);
19175	}
19176
19177	#[simd_test(enable = "avx512fp16,avx512vl")]
19178	unsafe fn test_mm_mask_fcmul_pch() {
19179	let a = _mm_set1_pch(`0.0`, `1.0`);
19180	let b = _mm_set1_pch(`0.0`, `-1.0`);
19181	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
19182	let r = _mm_mask_fcmul_pch(src, `0b0101`, a, b);
19183	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
19184	assert_eq_m128h(r, e);
19185	}
19186
19187	#[simd_test(enable = "avx512fp16,avx512vl")]
19188	unsafe fn test_mm_maskz_fcmul_pch() {
19189	let a = _mm_set1_pch(`0.0`, `1.0`);
19190	let b = _mm_set1_pch(`0.0`, `-1.0`);
19191	let r = _mm_maskz_fcmul_pch(`0b0101`, a, b);
19192	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
19193	assert_eq_m128h(r, e);
19194	}
19195
19196	#[simd_test(enable = "avx512fp16,avx512vl")]
19197	unsafe fn test_mm256_fcmul_pch() {
19198	let a = _mm256_set1_pch(`0.0`, `1.0`);
19199	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19200	let r = _mm256_fcmul_pch(a, b);
19201	let e = _mm256_set1_pch(`-1.0`, `0.0`);
19202	assert_eq_m256h(r, e);
19203	}
19204
19205	#[simd_test(enable = "avx512fp16,avx512vl")]
19206	unsafe fn test_mm256_mask_fcmul_pch() {
19207	let a = _mm256_set1_pch(`0.0`, `1.0`);
19208	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19209	let src = _mm256_setr_ph(
19210	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19211	);
19212	let r = _mm256_mask_fcmul_pch(src, `0b01010101`, a, b);
19213	let e = _mm256_setr_ph(
19214	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19215	);
19216	assert_eq_m256h(r, e);
19217	}
19218
19219	#[simd_test(enable = "avx512fp16,avx512vl")]
19220	unsafe fn test_mm256_maskz_fcmul_pch() {
19221	let a = _mm256_set1_pch(`0.0`, `1.0`);
19222	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19223	let r = _mm256_maskz_fcmul_pch(`0b01010101`, a, b);
19224	let e = _mm256_setr_ph(
19225	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19226	);
19227	assert_eq_m256h(r, e);
19228	}
19229
19230	#[simd_test(enable = "avx512fp16")]
19231	unsafe fn test_mm512_fcmul_pch() {
19232	let a = _mm512_set1_pch(`0.0`, `1.0`);
19233	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19234	let r = _mm512_fcmul_pch(a, b);
19235	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19236	assert_eq_m512h(r, e);
19237	}
19238
19239	#[simd_test(enable = "avx512fp16")]
19240	unsafe fn test_mm512_mask_fcmul_pch() {
19241	let a = _mm512_set1_pch(`0.0`, `1.0`);
19242	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19243	let src = _mm512_setr_ph(
19244	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19245	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19246	`32.0`, `33.0`,
19247	);
19248	let r = _mm512_mask_fcmul_pch(src, `0b0101010101010101`, a, b);
19249	let e = _mm512_setr_ph(
19250	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19251	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19252	`33.0`,
19253	);
19254	assert_eq_m512h(r, e);
19255	}
19256
19257	#[simd_test(enable = "avx512fp16")]
19258	unsafe fn test_mm512_maskz_fcmul_pch() {
19259	let a = _mm512_set1_pch(`0.0`, `1.0`);
19260	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19261	let r = _mm512_maskz_fcmul_pch(`0b0101010101010101`, a, b);
19262	let e = _mm512_setr_ph(
19263	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19264	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19265	);
19266	assert_eq_m512h(r, e);
19267	}
19268
19269	#[simd_test(enable = "avx512fp16")]
19270	unsafe fn test_mm512_fcmul_round_pch() {
19271	let a = _mm512_set1_pch(`0.0`, `1.0`);
19272	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19273	let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19274	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19275	assert_eq_m512h(r, e);
19276	}
19277
19278	#[simd_test(enable = "avx512fp16")]
19279	unsafe fn test_mm512_mask_fcmul_round_pch() {
19280	let a = _mm512_set1_pch(`0.0`, `1.0`);
19281	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19282	let src = _mm512_setr_ph(
19283	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19284	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19285	`32.0`, `33.0`,
19286	);
19287	let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19288	src,
19289	`0b0101010101010101`,
19290	a,
19291	b,
19292	);
19293	let e = _mm512_setr_ph(
19294	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19295	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19296	`33.0`,
19297	);
19298	assert_eq_m512h(r, e);
19299	}
19300
19301	#[simd_test(enable = "avx512fp16")]
19302	unsafe fn test_mm512_maskz_fcmul_round_pch() {
19303	let a = _mm512_set1_pch(`0.0`, `1.0`);
19304	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19305	let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19306	`0b0101010101010101`,
19307	a,
19308	b,
19309	);
19310	let e = _mm512_setr_ph(
19311	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19312	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19313	);
19314	assert_eq_m512h(r, e);
19315	}
19316
19317	#[simd_test(enable = "avx512fp16")]
19318	unsafe fn test_mm_fcmul_sch() {
19319	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19320	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19321	let r = _mm_fcmul_sch(a, b);
19322	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19323	assert_eq_m128h(r, e);
19324	}
19325
19326	#[simd_test(enable = "avx512fp16")]
19327	unsafe fn test_mm_mask_fcmul_sch() {
19328	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19329	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19330	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19331	let r = _mm_mask_fcmul_sch(src, `0`, a, b);
19332	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19333	assert_eq_m128h(r, e);
19334	}
19335
19336	#[simd_test(enable = "avx512fp16")]
19337	unsafe fn test_mm_maskz_fcmul_sch() {
19338	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19339	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19340	let r = _mm_maskz_fcmul_sch(`0`, a, b);
19341	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19342	assert_eq_m128h(r, e);
19343	}
19344
19345	#[simd_test(enable = "avx512fp16")]
19346	unsafe fn test_mm_fcmul_round_sch() {
19347	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19348	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19349	let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19350	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19351	assert_eq_m128h(r, e);
19352	}
19353
19354	#[simd_test(enable = "avx512fp16")]
19355	unsafe fn test_mm_mask_fcmul_round_sch() {
19356	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19357	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19358	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19359	let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19360	src, `0`, a, b,
19361	);
19362	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19363	assert_eq_m128h(r, e);
19364	}
19365
19366	#[simd_test(enable = "avx512fp16")]
19367	unsafe fn test_mm_maskz_fcmul_round_sch() {
19368	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19369	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19370	let r =
19371	_mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
19372	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19373	assert_eq_m128h(r, e);
19374	}
19375
19376	#[simd_test(enable = "avx512fp16,avx512vl")]
19377	unsafe fn test_mm_abs_ph() {
19378	let a = _mm_set_ph(`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`);
19379	let r = _mm_abs_ph(a);
19380	let e = _mm_set_ph(`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`);
19381	assert_eq_m128h(r, e);
19382	}
19383
19384	#[simd_test(enable = "avx512fp16,avx512vl")]
19385	unsafe fn test_mm256_abs_ph() {
19386	let a = _mm256_set_ph(
19387	`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`,
19388	`-14.0`,
19389	);
19390	let r = _mm256_abs_ph(a);
19391	let e = _mm256_set_ph(
19392	`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
19393	);
19394	assert_eq_m256h(r, e);
19395	}
19396
19397	#[simd_test(enable = "avx512fp16")]
19398	unsafe fn test_mm512_abs_ph() {
19399	let a = _mm512_set_ph(
19400	`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`,
19401	`-14.0`, `15.0`, `-16.0`, `17.0`, `-18.0`, `19.0`, `-20.0`, `21.0`, `-22.0`, `23.0`, `-24.0`, `25.0`, `-26.0`,
19402	`27.0`, `-28.0`, `29.0`, `-30.0`,
19403	);
19404	let r = _mm512_abs_ph(a);
19405	let e = _mm512_set_ph(
19406	`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
19407	`15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
19408	`29.0`, `30.0`,
19409	);
19410	assert_eq_m512h(r, e);
19411	}
19412
19413	#[simd_test(enable = "avx512fp16,avx512vl")]
19414	unsafe fn test_mm_conj_pch() {
19415	let a = _mm_set1_pch(`0.0`, `1.0`);
19416	let r = _mm_conj_pch(a);
19417	let e = _mm_set1_pch(`0.0`, `-1.0`);
19418	assert_eq_m128h(r, e);
19419	}
19420
19421	#[simd_test(enable = "avx512fp16,avx512vl")]
19422	unsafe fn test_mm_mask_conj_pch() {
19423	let a = _mm_set1_pch(`0.0`, `1.0`);
19424	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
19425	let r = _mm_mask_conj_pch(src, `0b0101`, a);
19426	let e = _mm_setr_ph(`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`);
19427	assert_eq_m128h(r, e);
19428	}
19429
19430	#[simd_test(enable = "avx512fp16,avx512vl")]
19431	unsafe fn test_mm_maskz_conj_pch() {
19432	let a = _mm_set1_pch(`0.0`, `1.0`);
19433	let r = _mm_maskz_conj_pch(`0b0101`, a);
19434	let e = _mm_setr_ph(`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`);
19435	assert_eq_m128h(r, e);
19436	}
19437
19438	#[simd_test(enable = "avx512fp16,avx512vl")]
19439	unsafe fn test_mm256_conj_pch() {
19440	let a = _mm256_set1_pch(`0.0`, `1.0`);
19441	let r = _mm256_conj_pch(a);
19442	let e = _mm256_set1_pch(`0.0`, `-1.0`);
19443	assert_eq_m256h(r, e);
19444	}
19445
19446	#[simd_test(enable = "avx512fp16,avx512vl")]
19447	unsafe fn test_mm256_mask_conj_pch() {
19448	let a = _mm256_set1_pch(`0.0`, `1.0`);
19449	let src = _mm256_setr_ph(
19450	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19451	);
19452	let r = _mm256_mask_conj_pch(src, `0b01010101`, a);
19453	let e = _mm256_setr_ph(
19454	`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`, `0.0`, `-1.0`, `12.0`, `13.0`, `0.0`, `-1.0`, `16.0`, `17.0`,
19455	);
19456	assert_eq_m256h(r, e);
19457	}
19458
19459	#[simd_test(enable = "avx512fp16,avx512vl")]
19460	unsafe fn test_mm256_maskz_conj_pch() {
19461	let a = _mm256_set1_pch(`0.0`, `1.0`);
19462	let r = _mm256_maskz_conj_pch(`0b01010101`, a);
19463	let e = _mm256_setr_ph(
19464	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19465	);
19466	assert_eq_m256h(r, e);
19467	}
19468
19469	#[simd_test(enable = "avx512fp16")]
19470	unsafe fn test_mm512_conj_pch() {
19471	let a = _mm512_set1_pch(`0.0`, `1.0`);
19472	let r = _mm512_conj_pch(a);
19473	let e = _mm512_set1_pch(`0.0`, `-1.0`);
19474	assert_eq_m512h(r, e);
19475	}
19476
19477	#[simd_test(enable = "avx512fp16")]
19478	unsafe fn test_mm512_mask_conj_pch() {
19479	let a = _mm512_set1_pch(`0.0`, `1.0`);
19480	let src = _mm512_setr_ph(
19481	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19482	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19483	`32.0`, `33.0`,
19484	);
19485	let r = _mm512_mask_conj_pch(src, `0b0101010101010101`, a);
19486	let e = _mm512_setr_ph(
19487	`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`, `0.0`, `-1.0`, `12.0`, `13.0`, `0.0`, `-1.0`, `16.0`, `17.0`,
19488	`0.0`, `-1.0`, `20.0`, `21.0`, `0.0`, `-1.0`, `24.0`, `25.0`, `0.0`, `-1.0`, `28.0`, `29.0`, `0.0`, `-1.0`, `32.0`,
19489	`33.0`,
19490	);
19491	assert_eq_m512h(r, e);
19492	}
19493
19494	#[simd_test(enable = "avx512fp16")]
19495	unsafe fn test_mm512_maskz_conj_pch() {
19496	let a = _mm512_set1_pch(`0.0`, `1.0`);
19497	let r = _mm512_maskz_conj_pch(`0b0101010101010101`, a);
19498	let e = _mm512_setr_ph(
19499	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19500	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19501	);
19502	assert_eq_m512h(r, e);
19503	}
19504
19505	#[simd_test(enable = "avx512fp16,avx512vl")]
19506	unsafe fn test_mm_fmadd_pch() {
19507	let a = _mm_set1_pch(`0.0`, `1.0`);
19508	let b = _mm_set1_pch(`0.0`, `2.0`);
19509	let c = _mm_set1_pch(`0.0`, `3.0`);
19510	let r = _mm_fmadd_pch(a, b, c);
19511	let e = _mm_set1_pch(`-2.0`, `3.0`);
19512	assert_eq_m128h(r, e);
19513	}
19514
19515	#[simd_test(enable = "avx512fp16,avx512vl")]
19516	unsafe fn test_mm_mask_fmadd_pch() {
19517	let a = _mm_set1_pch(`0.0`, `1.0`);
19518	let b = _mm_set1_pch(`0.0`, `2.0`);
19519	let c = _mm_set1_pch(`0.0`, `3.0`);
19520	let r = _mm_mask_fmadd_pch(a, `0b0101`, b, c);
19521	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`);
19522	assert_eq_m128h(r, e);
19523	}
19524
19525	#[simd_test(enable = "avx512fp16,avx512vl")]
19526	unsafe fn test_mm_mask3_fmadd_pch() {
19527	let a = _mm_set1_pch(`0.0`, `1.0`);
19528	let b = _mm_set1_pch(`0.0`, `2.0`);
19529	let c = _mm_set1_pch(`0.0`, `3.0`);
19530	let r = _mm_mask3_fmadd_pch(a, b, c, `0b0101`);
19531	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`);
19532	assert_eq_m128h(r, e);
19533	}
19534
19535	#[simd_test(enable = "avx512fp16,avx512vl")]
19536	unsafe fn test_mm_maskz_fmadd_pch() {
19537	let a = _mm_set1_pch(`0.0`, `1.0`);
19538	let b = _mm_set1_pch(`0.0`, `2.0`);
19539	let c = _mm_set1_pch(`0.0`, `3.0`);
19540	let r = _mm_maskz_fmadd_pch(`0b0101`, a, b, c);
19541	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`);
19542	assert_eq_m128h(r, e);
19543	}
19544
19545	#[simd_test(enable = "avx512fp16,avx512vl")]
19546	unsafe fn test_mm256_fmadd_pch() {
19547	let a = _mm256_set1_pch(`0.0`, `1.0`);
19548	let b = _mm256_set1_pch(`0.0`, `2.0`);
19549	let c = _mm256_set1_pch(`0.0`, `3.0`);
19550	let r = _mm256_fmadd_pch(a, b, c);
19551	let e = _mm256_set1_pch(`-2.0`, `3.0`);
19552	assert_eq_m256h(r, e);
19553	}
19554
19555	#[simd_test(enable = "avx512fp16,avx512vl")]
19556	unsafe fn test_mm256_mask_fmadd_pch() {
19557	let a = _mm256_set1_pch(`0.0`, `1.0`);
19558	let b = _mm256_set1_pch(`0.0`, `2.0`);
19559	let c = _mm256_set1_pch(`0.0`, `3.0`);
19560	let r = _mm256_mask_fmadd_pch(a, `0b01010101`, b, c);
19561	let e = _mm256_setr_ph(
19562	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19563	);
19564	assert_eq_m256h(r, e);
19565	}
19566
19567	#[simd_test(enable = "avx512fp16,avx512vl")]
19568	unsafe fn test_mm256_mask3_fmadd_pch() {
19569	let a = _mm256_set1_pch(`0.0`, `1.0`);
19570	let b = _mm256_set1_pch(`0.0`, `2.0`);
19571	let c = _mm256_set1_pch(`0.0`, `3.0`);
19572	let r = _mm256_mask3_fmadd_pch(a, b, c, `0b01010101`);
19573	let e = _mm256_setr_ph(
19574	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19575	);
19576	assert_eq_m256h(r, e);
19577	}
19578
19579	#[simd_test(enable = "avx512fp16,avx512vl")]
19580	unsafe fn test_mm256_maskz_fmadd_pch() {
19581	let a = _mm256_set1_pch(`0.0`, `1.0`);
19582	let b = _mm256_set1_pch(`0.0`, `2.0`);
19583	let c = _mm256_set1_pch(`0.0`, `3.0`);
19584	let r = _mm256_maskz_fmadd_pch(`0b01010101`, a, b, c);
19585	let e = _mm256_setr_ph(
19586	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19587	);
19588	assert_eq_m256h(r, e);
19589	}
19590
19591	#[simd_test(enable = "avx512fp16")]
19592	unsafe fn test_mm512_fmadd_pch() {
19593	let a = _mm512_set1_pch(`0.0`, `1.0`);
19594	let b = _mm512_set1_pch(`0.0`, `2.0`);
19595	let c = _mm512_set1_pch(`0.0`, `3.0`);
19596	let r = _mm512_fmadd_pch(a, b, c);
19597	let e = _mm512_set1_pch(`-2.0`, `3.0`);
19598	assert_eq_m512h(r, e);
19599	}
19600
19601	#[simd_test(enable = "avx512fp16")]
19602	unsafe fn test_mm512_mask_fmadd_pch() {
19603	let a = _mm512_set1_pch(`0.0`, `1.0`);
19604	let b = _mm512_set1_pch(`0.0`, `2.0`);
19605	let c = _mm512_set1_pch(`0.0`, `3.0`);
19606	let r = _mm512_mask_fmadd_pch(a, `0b0101010101010101`, b, c);
19607	let e = _mm512_setr_ph(
19608	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19609	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19610	);
19611	assert_eq_m512h(r, e);
19612	}
19613
19614	#[simd_test(enable = "avx512fp16")]
19615	unsafe fn test_mm512_mask3_fmadd_pch() {
19616	let a = _mm512_set1_pch(`0.0`, `1.0`);
19617	let b = _mm512_set1_pch(`0.0`, `2.0`);
19618	let c = _mm512_set1_pch(`0.0`, `3.0`);
19619	let r = _mm512_mask3_fmadd_pch(a, b, c, `0b0101010101010101`);
19620	let e = _mm512_setr_ph(
19621	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19622	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19623	);
19624	assert_eq_m512h(r, e);
19625	}
19626
19627	#[simd_test(enable = "avx512fp16")]
19628	unsafe fn test_mm512_maskz_fmadd_pch() {
19629	let a = _mm512_set1_pch(`0.0`, `1.0`);
19630	let b = _mm512_set1_pch(`0.0`, `2.0`);
19631	let c = _mm512_set1_pch(`0.0`, `3.0`);
19632	let r = _mm512_maskz_fmadd_pch(`0b0101010101010101`, a, b, c);
19633	let e = _mm512_setr_ph(
19634	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19635	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19636	);
19637	assert_eq_m512h(r, e);
19638	}
19639
19640	#[simd_test(enable = "avx512fp16")]
19641	unsafe fn test_mm512_fmadd_round_pch() {
19642	let a = _mm512_set1_pch(`0.0`, `1.0`);
19643	let b = _mm512_set1_pch(`0.0`, `2.0`);
19644	let c = _mm512_set1_pch(`0.0`, `3.0`);
19645	let r =
19646	_mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19647	let e = _mm512_set1_pch(`-2.0`, `3.0`);
19648	assert_eq_m512h(r, e);
19649	}
19650
19651	#[simd_test(enable = "avx512fp16")]
19652	unsafe fn test_mm512_mask_fmadd_round_pch() {
19653	let a = _mm512_set1_pch(`0.0`, `1.0`);
19654	let b = _mm512_set1_pch(`0.0`, `2.0`);
19655	let c = _mm512_set1_pch(`0.0`, `3.0`);
19656	let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19657	a,
19658	`0b0101010101010101`,
19659	b,
19660	c,
19661	);
19662	let e = _mm512_setr_ph(
19663	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19664	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19665	);
19666	assert_eq_m512h(r, e);
19667	}
19668
19669	#[simd_test(enable = "avx512fp16")]
19670	unsafe fn test_mm512_mask3_fmadd_round_pch() {
19671	let a = _mm512_set1_pch(`0.0`, `1.0`);
19672	let b = _mm512_set1_pch(`0.0`, `2.0`);
19673	let c = _mm512_set1_pch(`0.0`, `3.0`);
19674	let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19675	a,
19676	b,
19677	c,
19678	`0b0101010101010101`,
19679	);
19680	let e = _mm512_setr_ph(
19681	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19682	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19683	);
19684	assert_eq_m512h(r, e);
19685	}
19686
19687	#[simd_test(enable = "avx512fp16")]
19688	unsafe fn test_mm512_maskz_fmadd_round_pch() {
19689	let a = _mm512_set1_pch(`0.0`, `1.0`);
19690	let b = _mm512_set1_pch(`0.0`, `2.0`);
19691	let c = _mm512_set1_pch(`0.0`, `3.0`);
19692	let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19693	`0b0101010101010101`,
19694	a,
19695	b,
19696	c,
19697	);
19698	let e = _mm512_setr_ph(
19699	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19700	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19701	);
19702	assert_eq_m512h(r, e);
19703	}
19704
19705	#[simd_test(enable = "avx512fp16")]
19706	unsafe fn test_mm_fmadd_sch() {
19707	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19708	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19709	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19710	let r = _mm_fmadd_sch(a, b, c);
19711	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19712	assert_eq_m128h(r, e);
19713	}
19714
19715	#[simd_test(enable = "avx512fp16")]
19716	unsafe fn test_mm_mask_fmadd_sch() {
19717	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19718	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19719	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19720	let r = _mm_mask_fmadd_sch(a, `0`, b, c);
19721	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19722	assert_eq_m128h(r, e);
19723	let r = _mm_mask_fmadd_sch(a, `1`, b, c);
19724	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19725	assert_eq_m128h(r, e);
19726	}
19727
19728	#[simd_test(enable = "avx512fp16")]
19729	unsafe fn test_mm_mask3_fmadd_sch() {
19730	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19731	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19732	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19733	let r = _mm_mask3_fmadd_sch(a, b, c, `0`);
19734	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19735	assert_eq_m128h(r, e);
19736	let r = _mm_mask3_fmadd_sch(a, b, c, `1`);
19737	let e = _mm_setr_ph(`-2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19738	assert_eq_m128h(r, e);
19739	}
19740
19741	#[simd_test(enable = "avx512fp16")]
19742	unsafe fn test_mm_maskz_fmadd_sch() {
19743	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19744	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19745	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19746	let r = _mm_maskz_fmadd_sch(`0`, a, b, c);
19747	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19748	assert_eq_m128h(r, e);
19749	let r = _mm_maskz_fmadd_sch(`1`, a, b, c);
19750	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19751	assert_eq_m128h(r, e);
19752	}
19753
19754	#[simd_test(enable = "avx512fp16")]
19755	unsafe fn test_mm_fmadd_round_sch() {
19756	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19757	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19758	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19759	let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19760	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19761	assert_eq_m128h(r, e);
19762	}
19763
19764	#[simd_test(enable = "avx512fp16")]
19765	unsafe fn test_mm_mask_fmadd_round_sch() {
19766	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19767	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19768	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19769	let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19770	a, `0`, b, c,
19771	);
19772	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19773	assert_eq_m128h(r, e);
19774	let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19775	a, `1`, b, c,
19776	);
19777	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19778	assert_eq_m128h(r, e);
19779	}
19780
19781	#[simd_test(enable = "avx512fp16")]
19782	unsafe fn test_mm_mask3_fmadd_round_sch() {
19783	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19784	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19785	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19786	let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19787	a, b, c, `0`,
19788	);
19789	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19790	assert_eq_m128h(r, e);
19791	let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19792	a, b, c, `1`,
19793	);
19794	let e = _mm_setr_ph(`-2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19795	assert_eq_m128h(r, e);
19796	}
19797
19798	#[simd_test(enable = "avx512fp16")]
19799	unsafe fn test_mm_maskz_fmadd_round_sch() {
19800	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19801	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19802	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19803	let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19804	`0`, a, b, c,
19805	);
19806	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19807	assert_eq_m128h(r, e);
19808	let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19809	`1`, a, b, c,
19810	);
19811	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19812	assert_eq_m128h(r, e);
19813	}
19814
19815	#[simd_test(enable = "avx512fp16,avx512vl")]
19816	unsafe fn test_mm_fcmadd_pch() {
19817	let a = _mm_set1_pch(`0.0`, `1.0`);
19818	let b = _mm_set1_pch(`0.0`, `2.0`);
19819	let c = _mm_set1_pch(`0.0`, `3.0`);
19820	let r = _mm_fcmadd_pch(a, b, c);
19821	let e = _mm_set1_pch(`2.0`, `3.0`);
19822	assert_eq_m128h(r, e);
19823	}
19824
19825	#[simd_test(enable = "avx512fp16,avx512vl")]
19826	unsafe fn test_mm_mask_fcmadd_pch() {
19827	let a = _mm_set1_pch(`0.0`, `1.0`);
19828	let b = _mm_set1_pch(`0.0`, `2.0`);
19829	let c = _mm_set1_pch(`0.0`, `3.0`);
19830	let r = _mm_mask_fcmadd_pch(a, `0b0101`, b, c);
19831	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`);
19832	assert_eq_m128h(r, e);
19833	}
19834
19835	#[simd_test(enable = "avx512fp16,avx512vl")]
19836	unsafe fn test_mm_mask3_fcmadd_pch() {
19837	let a = _mm_set1_pch(`0.0`, `1.0`);
19838	let b = _mm_set1_pch(`0.0`, `2.0`);
19839	let c = _mm_set1_pch(`0.0`, `3.0`);
19840	let r = _mm_mask3_fcmadd_pch(a, b, c, `0b0101`);
19841	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`);
19842	assert_eq_m128h(r, e);
19843	}
19844
19845	#[simd_test(enable = "avx512fp16,avx512vl")]
19846	unsafe fn test_mm_maskz_fcmadd_pch() {
19847	let a = _mm_set1_pch(`0.0`, `1.0`);
19848	let b = _mm_set1_pch(`0.0`, `2.0`);
19849	let c = _mm_set1_pch(`0.0`, `3.0`);
19850	let r = _mm_maskz_fcmadd_pch(`0b0101`, a, b, c);
19851	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`);
19852	assert_eq_m128h(r, e);
19853	}
19854
19855	#[simd_test(enable = "avx512fp16,avx512vl")]
19856	unsafe fn test_mm256_fcmadd_pch() {
19857	let a = _mm256_set1_pch(`0.0`, `1.0`);
19858	let b = _mm256_set1_pch(`0.0`, `2.0`);
19859	let c = _mm256_set1_pch(`0.0`, `3.0`);
19860	let r = _mm256_fcmadd_pch(a, b, c);
19861	let e = _mm256_set1_pch(`2.0`, `3.0`);
19862	assert_eq_m256h(r, e);
19863	}
19864
19865	#[simd_test(enable = "avx512fp16,avx512vl")]
19866	unsafe fn test_mm256_mask_fcmadd_pch() {
19867	let a = _mm256_set1_pch(`0.0`, `1.0`);
19868	let b = _mm256_set1_pch(`0.0`, `2.0`);
19869	let c = _mm256_set1_pch(`0.0`, `3.0`);
19870	let r = _mm256_mask_fcmadd_pch(a, `0b01010101`, b, c);
19871	let e = _mm256_setr_ph(
19872	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19873	);
19874	assert_eq_m256h(r, e);
19875	}
19876
19877	#[simd_test(enable = "avx512fp16,avx512vl")]
19878	unsafe fn test_mm256_mask3_fcmadd_pch() {
19879	let a = _mm256_set1_pch(`0.0`, `1.0`);
19880	let b = _mm256_set1_pch(`0.0`, `2.0`);
19881	let c = _mm256_set1_pch(`0.0`, `3.0`);
19882	let r = _mm256_mask3_fcmadd_pch(a, b, c, `0b01010101`);
19883	let e = _mm256_setr_ph(
19884	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19885	);
19886	assert_eq_m256h(r, e);
19887	}
19888
19889	#[simd_test(enable = "avx512fp16,avx512vl")]
19890	unsafe fn test_mm256_maskz_fcmadd_pch() {
19891	let a = _mm256_set1_pch(`0.0`, `1.0`);
19892	let b = _mm256_set1_pch(`0.0`, `2.0`);
19893	let c = _mm256_set1_pch(`0.0`, `3.0`);
19894	let r = _mm256_maskz_fcmadd_pch(`0b01010101`, a, b, c);
19895	let e = _mm256_setr_ph(
19896	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
19897	);
19898	assert_eq_m256h(r, e);
19899	}
19900
19901	#[simd_test(enable = "avx512fp16")]
19902	unsafe fn test_mm512_fcmadd_pch() {
19903	let a = _mm512_set1_pch(`0.0`, `1.0`);
19904	let b = _mm512_set1_pch(`0.0`, `2.0`);
19905	let c = _mm512_set1_pch(`0.0`, `3.0`);
19906	let r = _mm512_fcmadd_pch(a, b, c);
19907	let e = _mm512_set1_pch(`2.0`, `3.0`);
19908	assert_eq_m512h(r, e);
19909	}
19910
19911	#[simd_test(enable = "avx512fp16")]
19912	unsafe fn test_mm512_mask_fcmadd_pch() {
19913	let a = _mm512_set1_pch(`0.0`, `1.0`);
19914	let b = _mm512_set1_pch(`0.0`, `2.0`);
19915	let c = _mm512_set1_pch(`0.0`, `3.0`);
19916	let r = _mm512_mask_fcmadd_pch(a, `0b0101010101010101`, b, c);
19917	let e = _mm512_setr_ph(
19918	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`,
19919	`3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19920	);
19921	assert_eq_m512h(r, e);
19922	}
19923
19924	#[simd_test(enable = "avx512fp16")]
19925	unsafe fn test_mm512_mask3_fcmadd_pch() {
19926	let a = _mm512_set1_pch(`0.0`, `1.0`);
19927	let b = _mm512_set1_pch(`0.0`, `2.0`);
19928	let c = _mm512_set1_pch(`0.0`, `3.0`);
19929	let r = _mm512_mask3_fcmadd_pch(a, b, c, `0b0101010101010101`);
19930	let e = _mm512_setr_ph(
19931	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`,
19932	`3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19933	);
19934	assert_eq_m512h(r, e);
19935	}
19936
19937	#[simd_test(enable = "avx512fp16")]
19938	unsafe fn test_mm512_maskz_fcmadd_pch() {
19939	let a = _mm512_set1_pch(`0.0`, `1.0`);
19940	let b = _mm512_set1_pch(`0.0`, `2.0`);
19941	let c = _mm512_set1_pch(`0.0`, `3.0`);
19942	let r = _mm512_maskz_fcmadd_pch(`0b0101010101010101`, a, b, c);
19943	let e = _mm512_setr_ph(
19944	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`,
19945	`3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
19946	);
19947	assert_eq_m512h(r, e);
19948	}
19949
19950	#[simd_test(enable = "avx512fp16")]
19951	unsafe fn test_mm512_fcmadd_round_pch() {
19952	let a = _mm512_set1_pch(`0.0`, `1.0`);
19953	let b = _mm512_set1_pch(`0.0`, `2.0`);
19954	let c = _mm512_set1_pch(`0.0`, `3.0`);
19955	let r =
19956	_mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19957	let e = _mm512_set1_pch(`2.0`, `3.0`);
19958	assert_eq_m512h(r, e);
19959	}
19960
19961	#[simd_test(enable = "avx512fp16")]
19962	unsafe fn test_mm512_mask_fcmadd_round_pch() {
19963	let a = _mm512_set1_pch(`0.0`, `1.0`);
19964	let b = _mm512_set1_pch(`0.0`, `2.0`);
19965	let c = _mm512_set1_pch(`0.0`, `3.0`);
19966	let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19967	a,
19968	`0b0101010101010101`,
19969	b,
19970	c,
19971	);
19972	let e = _mm512_setr_ph(
19973	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`,
19974	`3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19975	);
19976	assert_eq_m512h(r, e);
19977	}
19978
19979	#[simd_test(enable = "avx512fp16")]
19980	unsafe fn test_mm512_mask3_fcmadd_round_pch() {
19981	let a = _mm512_set1_pch(`0.0`, `1.0`);
19982	let b = _mm512_set1_pch(`0.0`, `2.0`);
19983	let c = _mm512_set1_pch(`0.0`, `3.0`);
19984	let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19985	a,
19986	b,
19987	c,
19988	`0b0101010101010101`,
19989	);
19990	let e = _mm512_setr_ph(
19991	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`,
19992	`3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19993	);
19994	assert_eq_m512h(r, e);
19995	}
19996
19997	#[simd_test(enable = "avx512fp16")]
19998	unsafe fn test_mm512_maskz_fcmadd_round_pch() {
19999	let a = _mm512_set1_pch(`0.0`, `1.0`);
20000	let b = _mm512_set1_pch(`0.0`, `2.0`);
20001	let c = _mm512_set1_pch(`0.0`, `3.0`);
20002	let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20003	`0b0101010101010101`,
20004	a,
20005	b,
20006	c,
20007	);
20008	let e = _mm512_setr_ph(
20009	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`,
20010	`3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
20011	);
20012	assert_eq_m512h(r, e);
20013	}
20014
20015	#[simd_test(enable = "avx512fp16")]
20016	unsafe fn test_mm_fcmadd_sch() {
20017	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20018	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20019	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20020	let r = _mm_fcmadd_sch(a, b, c);
20021	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20022	assert_eq_m128h(r, e);
20023	}
20024
20025	#[simd_test(enable = "avx512fp16")]
20026	unsafe fn test_mm_mask_fcmadd_sch() {
20027	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20028	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20029	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20030	let r = _mm_mask_fcmadd_sch(a, `0`, b, c);
20031	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20032	assert_eq_m128h(r, e);
20033	let r = _mm_mask_fcmadd_sch(a, `1`, b, c);
20034	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20035	assert_eq_m128h(r, e);
20036	}
20037
20038	#[simd_test(enable = "avx512fp16")]
20039	unsafe fn test_mm_mask3_fcmadd_sch() {
20040	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20041	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20042	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20043	let r = _mm_mask3_fcmadd_sch(a, b, c, `0`);
20044	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20045	assert_eq_m128h(r, e);
20046	let r = _mm_mask3_fcmadd_sch(a, b, c, `1`);
20047	let e = _mm_setr_ph(`2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20048	assert_eq_m128h(r, e);
20049	}
20050
20051	#[simd_test(enable = "avx512fp16")]
20052	unsafe fn test_mm_maskz_fcmadd_sch() {
20053	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20054	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20055	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20056	let r = _mm_maskz_fcmadd_sch(`0`, a, b, c);
20057	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20058	assert_eq_m128h(r, e);
20059	let r = _mm_maskz_fcmadd_sch(`1`, a, b, c);
20060	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20061	assert_eq_m128h(r, e);
20062	}
20063
20064	#[simd_test(enable = "avx512fp16")]
20065	unsafe fn test_mm_fcmadd_round_sch() {
20066	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20067	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20068	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20069	let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20070	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20071	assert_eq_m128h(r, e);
20072	}
20073
20074	#[simd_test(enable = "avx512fp16")]
20075	unsafe fn test_mm_mask_fcmadd_round_sch() {
20076	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20077	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20078	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20079	let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20080	a, `0`, b, c,
20081	);
20082	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20083	assert_eq_m128h(r, e);
20084	let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20085	a, `1`, b, c,
20086	);
20087	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20088	assert_eq_m128h(r, e);
20089	}
20090
20091	#[simd_test(enable = "avx512fp16")]
20092	unsafe fn test_mm_mask3_fcmadd_round_sch() {
20093	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20094	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20095	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20096	let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20097	a, b, c, `0`,
20098	);
20099	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20100	assert_eq_m128h(r, e);
20101	let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20102	a, b, c, `1`,
20103	);
20104	let e = _mm_setr_ph(`2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20105	assert_eq_m128h(r, e);
20106	}
20107
20108	#[simd_test(enable = "avx512fp16")]
20109	unsafe fn test_mm_maskz_fcmadd_round_sch() {
20110	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20111	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20112	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20113	let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20114	`0`, a, b, c,
20115	);
20116	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20117	assert_eq_m128h(r, e);
20118	let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20119	`1`, a, b, c,
20120	);
20121	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20122	assert_eq_m128h(r, e);
20123	}
20124
20125	#[simd_test(enable = "avx512fp16,avx512vl")]
20126	unsafe fn test_mm_fmadd_ph() {
20127	let a = _mm_set1_ph(`1.0`);
20128	let b = _mm_set1_ph(`2.0`);
20129	let c = _mm_set1_ph(`3.0`);
20130	let r = _mm_fmadd_ph(a, b, c);
20131	let e = _mm_set1_ph(`5.0`);
20132	assert_eq_m128h(r, e);
20133	}
20134
20135	#[simd_test(enable = "avx512fp16,avx512vl")]
20136	unsafe fn test_mm_mask_fmadd_ph() {
20137	let a = _mm_set1_ph(`1.0`);
20138	let b = _mm_set1_ph(`2.0`);
20139	let c = _mm_set1_ph(`3.0`);
20140	let r = _mm_mask_fmadd_ph(a, `0b01010101`, b, c);
20141	let e = _mm_set_ph(`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`);
20142	assert_eq_m128h(r, e);
20143	}
20144
20145	#[simd_test(enable = "avx512fp16,avx512vl")]
20146	unsafe fn test_mm_mask3_fmadd_ph() {
20147	let a = _mm_set1_ph(`1.0`);
20148	let b = _mm_set1_ph(`2.0`);
20149	let c = _mm_set1_ph(`3.0`);
20150	let r = _mm_mask3_fmadd_ph(a, b, c, `0b01010101`);
20151	let e = _mm_set_ph(`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`);
20152	assert_eq_m128h(r, e);
20153	}
20154
20155	#[simd_test(enable = "avx512fp16,avx512vl")]
20156	unsafe fn test_mm_maskz_fmadd_ph() {
20157	let a = _mm_set1_ph(`1.0`);
20158	let b = _mm_set1_ph(`2.0`);
20159	let c = _mm_set1_ph(`3.0`);
20160	let r = _mm_maskz_fmadd_ph(`0b01010101`, a, b, c);
20161	let e = _mm_set_ph(`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`);
20162	assert_eq_m128h(r, e);
20163	}
20164
20165	#[simd_test(enable = "avx512fp16,avx512vl")]
20166	unsafe fn test_mm256_fmadd_ph() {
20167	let a = _mm256_set1_ph(`1.0`);
20168	let b = _mm256_set1_ph(`2.0`);
20169	let c = _mm256_set1_ph(`3.0`);
20170	let r = _mm256_fmadd_ph(a, b, c);
20171	let e = _mm256_set1_ph(`5.0`);
20172	assert_eq_m256h(r, e);
20173	}
20174
20175	#[simd_test(enable = "avx512fp16,avx512vl")]
20176	unsafe fn test_mm256_mask_fmadd_ph() {
20177	let a = _mm256_set1_ph(`1.0`);
20178	let b = _mm256_set1_ph(`2.0`);
20179	let c = _mm256_set1_ph(`3.0`);
20180	let r = _mm256_mask_fmadd_ph(a, `0b0101010101010101`, b, c);
20181	let e = _mm256_set_ph(
20182	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20183	);
20184	assert_eq_m256h(r, e);
20185	}
20186
20187	#[simd_test(enable = "avx512fp16,avx512vl")]
20188	unsafe fn test_mm256_mask3_fmadd_ph() {
20189	let a = _mm256_set1_ph(`1.0`);
20190	let b = _mm256_set1_ph(`2.0`);
20191	let c = _mm256_set1_ph(`3.0`);
20192	let r = _mm256_mask3_fmadd_ph(a, b, c, `0b0101010101010101`);
20193	let e = _mm256_set_ph(
20194	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20195	);
20196	assert_eq_m256h(r, e);
20197	}
20198
20199	#[simd_test(enable = "avx512fp16,avx512vl")]
20200	unsafe fn test_mm256_maskz_fmadd_ph() {
20201	let a = _mm256_set1_ph(`1.0`);
20202	let b = _mm256_set1_ph(`2.0`);
20203	let c = _mm256_set1_ph(`3.0`);
20204	let r = _mm256_maskz_fmadd_ph(`0b0101010101010101`, a, b, c);
20205	let e = _mm256_set_ph(
20206	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20207	);
20208	assert_eq_m256h(r, e);
20209	}
20210
20211	#[simd_test(enable = "avx512fp16")]
20212	unsafe fn test_mm512_fmadd_ph() {
20213	let a = _mm512_set1_ph(`1.0`);
20214	let b = _mm512_set1_ph(`2.0`);
20215	let c = _mm512_set1_ph(`3.0`);
20216	let r = _mm512_fmadd_ph(a, b, c);
20217	let e = _mm512_set1_ph(`5.0`);
20218	assert_eq_m512h(r, e);
20219	}
20220
20221	#[simd_test(enable = "avx512fp16")]
20222	unsafe fn test_mm512_mask_fmadd_ph() {
20223	let a = _mm512_set1_ph(`1.0`);
20224	let b = _mm512_set1_ph(`2.0`);
20225	let c = _mm512_set1_ph(`3.0`);
20226	let r = _mm512_mask_fmadd_ph(a, `0b01010101010101010101010101010101`, b, c);
20227	let e = _mm512_set_ph(
20228	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`,
20229	`5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20230	);
20231	assert_eq_m512h(r, e);
20232	}
20233
20234	#[simd_test(enable = "avx512fp16")]
20235	unsafe fn test_mm512_mask3_fmadd_ph() {
20236	let a = _mm512_set1_ph(`1.0`);
20237	let b = _mm512_set1_ph(`2.0`);
20238	let c = _mm512_set1_ph(`3.0`);
20239	let r = _mm512_mask3_fmadd_ph(a, b, c, `0b01010101010101010101010101010101`);
20240	let e = _mm512_set_ph(
20241	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`,
20242	`5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20243	);
20244	assert_eq_m512h(r, e);
20245	}
20246
20247	#[simd_test(enable = "avx512fp16")]
20248	unsafe fn test_mm512_maskz_fmadd_ph() {
20249	let a = _mm512_set1_ph(`1.0`);
20250	let b = _mm512_set1_ph(`2.0`);
20251	let c = _mm512_set1_ph(`3.0`);
20252	let r = _mm512_maskz_fmadd_ph(`0b01010101010101010101010101010101`, a, b, c);
20253	let e = _mm512_set_ph(
20254	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`,
20255	`5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20256	);
20257	assert_eq_m512h(r, e);
20258	}
20259
20260	#[simd_test(enable = "avx512fp16")]
20261	unsafe fn test_mm512_fmadd_round_ph() {
20262	let a = _mm512_set1_ph(`1.0`);
20263	let b = _mm512_set1_ph(`2.0`);
20264	let c = _mm512_set1_ph(`3.0`);
20265	let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20266	let e = _mm512_set1_ph(`5.0`);
20267	assert_eq_m512h(r, e);
20268	}
20269
20270	#[simd_test(enable = "avx512fp16")]
20271	unsafe fn test_mm512_mask_fmadd_round_ph() {
20272	let a = _mm512_set1_ph(`1.0`);
20273	let b = _mm512_set1_ph(`2.0`);
20274	let c = _mm512_set1_ph(`3.0`);
20275	let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20276	a,
20277	`0b01010101010101010101010101010101`,
20278	b,
20279	c,
20280	);
20281	let e = _mm512_set_ph(
20282	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`,
20283	`5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20284	);
20285	assert_eq_m512h(r, e);
20286	}
20287
20288	#[simd_test(enable = "avx512fp16")]
20289	unsafe fn test_mm512_mask3_fmadd_round_ph() {
20290	let a = _mm512_set1_ph(`1.0`);
20291	let b = _mm512_set1_ph(`2.0`);
20292	let c = _mm512_set1_ph(`3.0`);
20293	let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20294	a,
20295	b,
20296	c,
20297	`0b01010101010101010101010101010101`,
20298	);
20299	let e = _mm512_set_ph(
20300	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`,
20301	`5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20302	);
20303	assert_eq_m512h(r, e);
20304	}
20305
20306	#[simd_test(enable = "avx512fp16")]
20307	unsafe fn test_mm512_maskz_fmadd_round_ph() {
20308	let a = _mm512_set1_ph(`1.0`);
20309	let b = _mm512_set1_ph(`2.0`);
20310	let c = _mm512_set1_ph(`3.0`);
20311	let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20312	`0b01010101010101010101010101010101`,
20313	a,
20314	b,
20315	c,
20316	);
20317	let e = _mm512_set_ph(
20318	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`,
20319	`5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20320	);
20321	assert_eq_m512h(r, e);
20322	}
20323
20324	#[simd_test(enable = "avx512fp16")]
20325	unsafe fn test_mm_fmadd_sh() {
20326	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20327	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20328	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20329	let r = _mm_fmadd_sh(a, b, c);
20330	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20331	assert_eq_m128h(r, e);
20332	}
20333
20334	#[simd_test(enable = "avx512fp16")]
20335	unsafe fn test_mm_mask_fmadd_sh() {
20336	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20337	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20338	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20339	let r = _mm_mask_fmadd_sh(a, `0`, b, c);
20340	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20341	assert_eq_m128h(r, e);
20342	let r = _mm_mask_fmadd_sh(a, `1`, b, c);
20343	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20344	assert_eq_m128h(r, e);
20345	}
20346
20347	#[simd_test(enable = "avx512fp16")]
20348	unsafe fn test_mm_mask3_fmadd_sh() {
20349	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20350	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20351	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20352	let r = _mm_mask3_fmadd_sh(a, b, c, `0`);
20353	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20354	assert_eq_m128h(r, e);
20355	let r = _mm_mask3_fmadd_sh(a, b, c, `1`);
20356	let e = _mm_setr_ph(`5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20357	assert_eq_m128h(r, e);
20358	}
20359
20360	#[simd_test(enable = "avx512fp16")]
20361	unsafe fn test_mm_maskz_fmadd_sh() {
20362	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20363	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20364	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20365	let r = _mm_maskz_fmadd_sh(`0`, a, b, c);
20366	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20367	assert_eq_m128h(r, e);
20368	let r = _mm_maskz_fmadd_sh(`1`, a, b, c);
20369	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20370	assert_eq_m128h(r, e);
20371	}
20372
20373	#[simd_test(enable = "avx512fp16")]
20374	unsafe fn test_mm_fmadd_round_sh() {
20375	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20376	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20377	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20378	let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20379	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20380	assert_eq_m128h(r, e);
20381	}
20382
20383	#[simd_test(enable = "avx512fp16")]
20384	unsafe fn test_mm_mask_fmadd_round_sh() {
20385	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20386	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20387	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20388	let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20389	a, `0`, b, c,
20390	);
20391	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20392	assert_eq_m128h(r, e);
20393	let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20394	a, `1`, b, c,
20395	);
20396	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20397	assert_eq_m128h(r, e);
20398	}
20399
20400	#[simd_test(enable = "avx512fp16")]
20401	unsafe fn test_mm_mask3_fmadd_round_sh() {
20402	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20403	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20404	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20405	let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20406	a, b, c, `0`,
20407	);
20408	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20409	assert_eq_m128h(r, e);
20410	let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20411	a, b, c, `1`,
20412	);
20413	let e = _mm_setr_ph(`5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20414	assert_eq_m128h(r, e);
20415	}
20416
20417	#[simd_test(enable = "avx512fp16")]
20418	unsafe fn test_mm_maskz_fmadd_round_sh() {
20419	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20420	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20421	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20422	let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20423	`0`, a, b, c,
20424	);
20425	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20426	assert_eq_m128h(r, e);
20427	let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20428	`1`, a, b, c,
20429	);
20430	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20431	assert_eq_m128h(r, e);
20432	}
20433
20434	#[simd_test(enable = "avx512fp16,avx512vl")]
20435	unsafe fn test_mm_fmsub_ph() {
20436	let a = _mm_set1_ph(`1.0`);
20437	let b = _mm_set1_ph(`2.0`);
20438	let c = _mm_set1_ph(`3.0`);
20439	let r = _mm_fmsub_ph(a, b, c);
20440	let e = _mm_set1_ph(`-1.0`);
20441	assert_eq_m128h(r, e);
20442	}
20443
20444	#[simd_test(enable = "avx512fp16,avx512vl")]
20445	unsafe fn test_mm_mask_fmsub_ph() {
20446	let a = _mm_set1_ph(`1.0`);
20447	let b = _mm_set1_ph(`2.0`);
20448	let c = _mm_set1_ph(`3.0`);
20449	let r = _mm_mask_fmsub_ph(a, `0b01010101`, b, c);
20450	let e = _mm_set_ph(`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`);
20451	assert_eq_m128h(r, e);
20452	}
20453
20454	#[simd_test(enable = "avx512fp16,avx512vl")]
20455	unsafe fn test_mm_mask3_fmsub_ph() {
20456	let a = _mm_set1_ph(`1.0`);
20457	let b = _mm_set1_ph(`2.0`);
20458	let c = _mm_set1_ph(`3.0`);
20459	let r = _mm_mask3_fmsub_ph(a, b, c, `0b01010101`);
20460	let e = _mm_set_ph(`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`);
20461	assert_eq_m128h(r, e);
20462	}
20463
20464	#[simd_test(enable = "avx512fp16,avx512vl")]
20465	unsafe fn test_mm_maskz_fmsub_ph() {
20466	let a = _mm_set1_ph(`1.0`);
20467	let b = _mm_set1_ph(`2.0`);
20468	let c = _mm_set1_ph(`3.0`);
20469	let r = _mm_maskz_fmsub_ph(`0b01010101`, a, b, c);
20470	let e = _mm_set_ph(`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`);
20471	assert_eq_m128h(r, e);
20472	}
20473
20474	#[simd_test(enable = "avx512fp16,avx512vl")]
20475	unsafe fn test_mm256_fmsub_ph() {
20476	let a = _mm256_set1_ph(`1.0`);
20477	let b = _mm256_set1_ph(`2.0`);
20478	let c = _mm256_set1_ph(`3.0`);
20479	let r = _mm256_fmsub_ph(a, b, c);
20480	let e = _mm256_set1_ph(`-1.0`);
20481	assert_eq_m256h(r, e);
20482	}
20483
20484	#[simd_test(enable = "avx512fp16,avx512vl")]
20485	unsafe fn test_mm256_mask_fmsub_ph() {
20486	let a = _mm256_set1_ph(`1.0`);
20487	let b = _mm256_set1_ph(`2.0`);
20488	let c = _mm256_set1_ph(`3.0`);
20489	let r = _mm256_mask_fmsub_ph(a, `0b0101010101010101`, b, c);
20490	let e = _mm256_set_ph(
20491	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20492	);
20493	assert_eq_m256h(r, e);
20494	}
20495
20496	#[simd_test(enable = "avx512fp16,avx512vl")]
20497	unsafe fn test_mm256_mask3_fmsub_ph() {
20498	let a = _mm256_set1_ph(`1.0`);
20499	let b = _mm256_set1_ph(`2.0`);
20500	let c = _mm256_set1_ph(`3.0`);
20501	let r = _mm256_mask3_fmsub_ph(a, b, c, `0b0101010101010101`);
20502	let e = _mm256_set_ph(
20503	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20504	);
20505	assert_eq_m256h(r, e);
20506	}
20507
20508	#[simd_test(enable = "avx512fp16,avx512vl")]
20509	unsafe fn test_mm256_maskz_fmsub_ph() {
20510	let a = _mm256_set1_ph(`1.0`);
20511	let b = _mm256_set1_ph(`2.0`);
20512	let c = _mm256_set1_ph(`3.0`);
20513	let r = _mm256_maskz_fmsub_ph(`0b0101010101010101`, a, b, c);
20514	let e = _mm256_set_ph(
20515	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20516	);
20517	assert_eq_m256h(r, e);
20518	}
20519
20520	#[simd_test(enable = "avx512fp16")]
20521	unsafe fn test_mm512_fmsub_ph() {
20522	let a = _mm512_set1_ph(`1.0`);
20523	let b = _mm512_set1_ph(`2.0`);
20524	let c = _mm512_set1_ph(`3.0`);
20525	let r = _mm512_fmsub_ph(a, b, c);
20526	let e = _mm512_set1_ph(`-1.0`);
20527	assert_eq_m512h(r, e);
20528	}
20529
20530	#[simd_test(enable = "avx512fp16")]
20531	unsafe fn test_mm512_mask_fmsub_ph() {
20532	let a = _mm512_set1_ph(`1.0`);
20533	let b = _mm512_set1_ph(`2.0`);
20534	let c = _mm512_set1_ph(`3.0`);
20535	let r = _mm512_mask_fmsub_ph(a, `0b01010101010101010101010101010101`, b, c);
20536	let e = _mm512_set_ph(
20537	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20538	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20539	);
20540	assert_eq_m512h(r, e);
20541	}
20542
20543	#[simd_test(enable = "avx512fp16")]
20544	unsafe fn test_mm512_mask3_fmsub_ph() {
20545	let a = _mm512_set1_ph(`1.0`);
20546	let b = _mm512_set1_ph(`2.0`);
20547	let c = _mm512_set1_ph(`3.0`);
20548	let r = _mm512_mask3_fmsub_ph(a, b, c, `0b01010101010101010101010101010101`);
20549	let e = _mm512_set_ph(
20550	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20551	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20552	);
20553	assert_eq_m512h(r, e);
20554	}
20555
20556	#[simd_test(enable = "avx512fp16")]
20557	unsafe fn test_mm512_maskz_fmsub_ph() {
20558	let a = _mm512_set1_ph(`1.0`);
20559	let b = _mm512_set1_ph(`2.0`);
20560	let c = _mm512_set1_ph(`3.0`);
20561	let r = _mm512_maskz_fmsub_ph(`0b01010101010101010101010101010101`, a, b, c);
20562	let e = _mm512_set_ph(
20563	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20564	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20565	);
20566	assert_eq_m512h(r, e);
20567	}
20568
20569	#[simd_test(enable = "avx512fp16")]
20570	unsafe fn test_mm512_fmsub_round_ph() {
20571	let a = _mm512_set1_ph(`1.0`);
20572	let b = _mm512_set1_ph(`2.0`);
20573	let c = _mm512_set1_ph(`3.0`);
20574	let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20575	let e = _mm512_set1_ph(`-1.0`);
20576	assert_eq_m512h(r, e);
20577	}
20578
20579	#[simd_test(enable = "avx512fp16")]
20580	unsafe fn test_mm512_mask_fmsub_round_ph() {
20581	let a = _mm512_set1_ph(`1.0`);
20582	let b = _mm512_set1_ph(`2.0`);
20583	let c = _mm512_set1_ph(`3.0`);
20584	let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20585	a,
20586	`0b01010101010101010101010101010101`,
20587	b,
20588	c,
20589	);
20590	let e = _mm512_set_ph(
20591	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20592	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20593	);
20594	assert_eq_m512h(r, e);
20595	}
20596
20597	#[simd_test(enable = "avx512fp16")]
20598	unsafe fn test_mm512_mask3_fmsub_round_ph() {
20599	let a = _mm512_set1_ph(`1.0`);
20600	let b = _mm512_set1_ph(`2.0`);
20601	let c = _mm512_set1_ph(`3.0`);
20602	let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20603	a,
20604	b,
20605	c,
20606	`0b01010101010101010101010101010101`,
20607	);
20608	let e = _mm512_set_ph(
20609	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20610	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20611	);
20612	assert_eq_m512h(r, e);
20613	}
20614
20615	#[simd_test(enable = "avx512fp16")]
20616	unsafe fn test_mm512_maskz_fmsub_round_ph() {
20617	let a = _mm512_set1_ph(`1.0`);
20618	let b = _mm512_set1_ph(`2.0`);
20619	let c = _mm512_set1_ph(`3.0`);
20620	let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20621	`0b01010101010101010101010101010101`,
20622	a,
20623	b,
20624	c,
20625	);
20626	let e = _mm512_set_ph(
20627	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20628	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20629	);
20630	assert_eq_m512h(r, e);
20631	}
20632
20633	#[simd_test(enable = "avx512fp16")]
20634	unsafe fn test_mm_fmsub_sh() {
20635	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20636	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20637	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20638	let r = _mm_fmsub_sh(a, b, c);
20639	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20640	assert_eq_m128h(r, e);
20641	}
20642
20643	#[simd_test(enable = "avx512fp16")]
20644	unsafe fn test_mm_mask_fmsub_sh() {
20645	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20646	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20647	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20648	let r = _mm_mask_fmsub_sh(a, `0`, b, c);
20649	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20650	assert_eq_m128h(r, e);
20651	let r = _mm_mask_fmsub_sh(a, `1`, b, c);
20652	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20653	assert_eq_m128h(r, e);
20654	}
20655
20656	#[simd_test(enable = "avx512fp16")]
20657	unsafe fn test_mm_mask3_fmsub_sh() {
20658	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20659	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20660	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20661	let r = _mm_mask3_fmsub_sh(a, b, c, `0`);
20662	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20663	assert_eq_m128h(r, e);
20664	let r = _mm_mask3_fmsub_sh(a, b, c, `1`);
20665	let e = _mm_setr_ph(`-1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20666	assert_eq_m128h(r, e);
20667	}
20668
20669	#[simd_test(enable = "avx512fp16")]
20670	unsafe fn test_mm_maskz_fmsub_sh() {
20671	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20672	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20673	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20674	let r = _mm_maskz_fmsub_sh(`0`, a, b, c);
20675	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20676	assert_eq_m128h(r, e);
20677	let r = _mm_maskz_fmsub_sh(`1`, a, b, c);
20678	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20679	assert_eq_m128h(r, e);
20680	}
20681
20682	#[simd_test(enable = "avx512fp16")]
20683	unsafe fn test_mm_fmsub_round_sh() {
20684	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20685	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20686	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20687	let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20688	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20689	assert_eq_m128h(r, e);
20690	}
20691
20692	#[simd_test(enable = "avx512fp16")]
20693	unsafe fn test_mm_mask_fmsub_round_sh() {
20694	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20695	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20696	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20697	let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20698	a, `0`, b, c,
20699	);
20700	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20701	assert_eq_m128h(r, e);
20702	let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20703	a, `1`, b, c,
20704	);
20705	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20706	assert_eq_m128h(r, e);
20707	}
20708
20709	#[simd_test(enable = "avx512fp16")]
20710	unsafe fn test_mm_mask3_fmsub_round_sh() {
20711	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20712	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20713	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20714	let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20715	a, b, c, `0`,
20716	);
20717	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20718	assert_eq_m128h(r, e);
20719	let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20720	a, b, c, `1`,
20721	);
20722	let e = _mm_setr_ph(`-1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20723	assert_eq_m128h(r, e);
20724	}
20725
20726	#[simd_test(enable = "avx512fp16")]
20727	unsafe fn test_mm_maskz_fmsub_round_sh() {
20728	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20729	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20730	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20731	let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20732	`0`, a, b, c,
20733	);
20734	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20735	assert_eq_m128h(r, e);
20736	let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20737	`1`, a, b, c,
20738	);
20739	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20740	assert_eq_m128h(r, e);
20741	}
20742
20743	#[simd_test(enable = "avx512fp16,avx512vl")]
20744	unsafe fn test_mm_fnmadd_ph() {
20745	let a = _mm_set1_ph(`1.0`);
20746	let b = _mm_set1_ph(`2.0`);
20747	let c = _mm_set1_ph(`3.0`);
20748	let r = _mm_fnmadd_ph(a, b, c);
20749	let e = _mm_set1_ph(`1.0`);
20750	assert_eq_m128h(r, e);
20751	}
20752
20753	#[simd_test(enable = "avx512fp16,avx512vl")]
20754	unsafe fn test_mm_mask_fnmadd_ph() {
20755	let a = _mm_set1_ph(`1.0`);
20756	let b = _mm_set1_ph(`2.0`);
20757	let c = _mm_set1_ph(`3.0`);
20758	let r = _mm_mask_fnmadd_ph(a, `0b01010101`, b, c);
20759	let e = _mm_set_ph(`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`);
20760	assert_eq_m128h(r, e);
20761	}
20762
20763	#[simd_test(enable = "avx512fp16,avx512vl")]
20764	unsafe fn test_mm_mask3_fnmadd_ph() {
20765	let a = _mm_set1_ph(`1.0`);
20766	let b = _mm_set1_ph(`2.0`);
20767	let c = _mm_set1_ph(`3.0`);
20768	let r = _mm_mask3_fnmadd_ph(a, b, c, `0b01010101`);
20769	let e = _mm_set_ph(`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`);
20770	assert_eq_m128h(r, e);
20771	}
20772
20773	#[simd_test(enable = "avx512fp16,avx512vl")]
20774	unsafe fn test_mm_maskz_fnmadd_ph() {
20775	let a = _mm_set1_ph(`1.0`);
20776	let b = _mm_set1_ph(`2.0`);
20777	let c = _mm_set1_ph(`3.0`);
20778	let r = _mm_maskz_fnmadd_ph(`0b01010101`, a, b, c);
20779	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
20780	assert_eq_m128h(r, e);
20781	}
20782
20783	#[simd_test(enable = "avx512fp16,avx512vl")]
20784	unsafe fn test_mm256_fnmadd_ph() {
20785	let a = _mm256_set1_ph(`1.0`);
20786	let b = _mm256_set1_ph(`2.0`);
20787	let c = _mm256_set1_ph(`3.0`);
20788	let r = _mm256_fnmadd_ph(a, b, c);
20789	let e = _mm256_set1_ph(`1.0`);
20790	assert_eq_m256h(r, e);
20791	}
20792
20793	#[simd_test(enable = "avx512fp16,avx512vl")]
20794	unsafe fn test_mm256_mask_fnmadd_ph() {
20795	let a = _mm256_set1_ph(`1.0`);
20796	let b = _mm256_set1_ph(`2.0`);
20797	let c = _mm256_set1_ph(`3.0`);
20798	let r = _mm256_mask_fnmadd_ph(a, `0b0101010101010101`, b, c);
20799	let e = _mm256_set_ph(
20800	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20801	);
20802	assert_eq_m256h(r, e);
20803	}
20804
20805	#[simd_test(enable = "avx512fp16,avx512vl")]
20806	unsafe fn test_mm256_mask3_fnmadd_ph() {
20807	let a = _mm256_set1_ph(`1.0`);
20808	let b = _mm256_set1_ph(`2.0`);
20809	let c = _mm256_set1_ph(`3.0`);
20810	let r = _mm256_mask3_fnmadd_ph(a, b, c, `0b0101010101010101`);
20811	let e = _mm256_set_ph(
20812	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20813	);
20814	assert_eq_m256h(r, e);
20815	}
20816
20817	#[simd_test(enable = "avx512fp16,avx512vl")]
20818	unsafe fn test_mm256_maskz_fnmadd_ph() {
20819	let a = _mm256_set1_ph(`1.0`);
20820	let b = _mm256_set1_ph(`2.0`);
20821	let c = _mm256_set1_ph(`3.0`);
20822	let r = _mm256_maskz_fnmadd_ph(`0b0101010101010101`, a, b, c);
20823	let e = _mm256_set_ph(
20824	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20825	);
20826	assert_eq_m256h(r, e);
20827	}
20828
20829	#[simd_test(enable = "avx512fp16")]
20830	unsafe fn test_mm512_fnmadd_ph() {
20831	let a = _mm512_set1_ph(`1.0`);
20832	let b = _mm512_set1_ph(`2.0`);
20833	let c = _mm512_set1_ph(`3.0`);
20834	let r = _mm512_fnmadd_ph(a, b, c);
20835	let e = _mm512_set1_ph(`1.0`);
20836	assert_eq_m512h(r, e);
20837	}
20838
20839	#[simd_test(enable = "avx512fp16")]
20840	unsafe fn test_mm512_mask_fnmadd_ph() {
20841	let a = _mm512_set1_ph(`1.0`);
20842	let b = _mm512_set1_ph(`2.0`);
20843	let c = _mm512_set1_ph(`3.0`);
20844	let r = _mm512_mask_fnmadd_ph(a, `0b01010101010101010101010101010101`, b, c);
20845	let e = _mm512_set_ph(
20846	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20847	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20848	);
20849	assert_eq_m512h(r, e);
20850	}
20851
20852	#[simd_test(enable = "avx512fp16")]
20853	unsafe fn test_mm512_mask3_fnmadd_ph() {
20854	let a = _mm512_set1_ph(`1.0`);
20855	let b = _mm512_set1_ph(`2.0`);
20856	let c = _mm512_set1_ph(`3.0`);
20857	let r = _mm512_mask3_fnmadd_ph(a, b, c, `0b01010101010101010101010101010101`);
20858	let e = _mm512_set_ph(
20859	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
20860	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20861	);
20862	assert_eq_m512h(r, e);
20863	}
20864
20865	#[simd_test(enable = "avx512fp16")]
20866	unsafe fn test_mm512_maskz_fnmadd_ph() {
20867	let a = _mm512_set1_ph(`1.0`);
20868	let b = _mm512_set1_ph(`2.0`);
20869	let c = _mm512_set1_ph(`3.0`);
20870	let r = _mm512_maskz_fnmadd_ph(`0b01010101010101010101010101010101`, a, b, c);
20871	let e = _mm512_set_ph(
20872	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
20873	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20874	);
20875	assert_eq_m512h(r, e);
20876	}
20877
20878	#[simd_test(enable = "avx512fp16")]
20879	unsafe fn test_mm512_fnmadd_round_ph() {
20880	let a = _mm512_set1_ph(`1.0`);
20881	let b = _mm512_set1_ph(`2.0`);
20882	let c = _mm512_set1_ph(`3.0`);
20883	let r =
20884	_mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20885	let e = _mm512_set1_ph(`1.0`);
20886	assert_eq_m512h(r, e);
20887	}
20888
20889	#[simd_test(enable = "avx512fp16")]
20890	unsafe fn test_mm512_mask_fnmadd_round_ph() {
20891	let a = _mm512_set1_ph(`1.0`);
20892	let b = _mm512_set1_ph(`2.0`);
20893	let c = _mm512_set1_ph(`3.0`);
20894	let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20895	a,
20896	`0b01010101010101010101010101010101`,
20897	b,
20898	c,
20899	);
20900	let e = _mm512_set_ph(
20901	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20902	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20903	);
20904	assert_eq_m512h(r, e);
20905	}
20906
20907	#[simd_test(enable = "avx512fp16")]
20908	unsafe fn test_mm512_mask3_fnmadd_round_ph() {
20909	let a = _mm512_set1_ph(`1.0`);
20910	let b = _mm512_set1_ph(`2.0`);
20911	let c = _mm512_set1_ph(`3.0`);
20912	let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20913	a,
20914	b,
20915	c,
20916	`0b01010101010101010101010101010101`,
20917	);
20918	let e = _mm512_set_ph(
20919	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
20920	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20921	);
20922	assert_eq_m512h(r, e);
20923	}
20924
20925	#[simd_test(enable = "avx512fp16")]
20926	unsafe fn test_mm512_maskz_fnmadd_round_ph() {
20927	let a = _mm512_set1_ph(`1.0`);
20928	let b = _mm512_set1_ph(`2.0`);
20929	let c = _mm512_set1_ph(`3.0`);
20930	let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20931	`0b01010101010101010101010101010101`,
20932	a,
20933	b,
20934	c,
20935	);
20936	let e = _mm512_set_ph(
20937	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
20938	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20939	);
20940	assert_eq_m512h(r, e);
20941	}
20942
20943	#[simd_test(enable = "avx512fp16")]
20944	unsafe fn test_mm_fnmadd_sh() {
20945	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20946	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20947	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20948	let r = _mm_fnmadd_sh(a, b, c);
20949	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20950	assert_eq_m128h(r, e);
20951	}
20952
20953	#[simd_test(enable = "avx512fp16")]
20954	unsafe fn test_mm_mask_fnmadd_sh() {
20955	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20956	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20957	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20958	let r = _mm_mask_fnmadd_sh(a, `0`, b, c);
20959	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20960	assert_eq_m128h(r, e);
20961	let r = _mm_mask_fnmadd_sh(a, `1`, b, c);
20962	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20963	assert_eq_m128h(r, e);
20964	}
20965
20966	#[simd_test(enable = "avx512fp16")]
20967	unsafe fn test_mm_mask3_fnmadd_sh() {
20968	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20969	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20970	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20971	let r = _mm_mask3_fnmadd_sh(a, b, c, `0`);
20972	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20973	assert_eq_m128h(r, e);
20974	let r = _mm_mask3_fnmadd_sh(a, b, c, `1`);
20975	let e = _mm_setr_ph(`1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20976	assert_eq_m128h(r, e);
20977	}
20978
20979	#[simd_test(enable = "avx512fp16")]
20980	unsafe fn test_mm_maskz_fnmadd_sh() {
20981	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20982	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20983	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20984	let r = _mm_maskz_fnmadd_sh(`0`, a, b, c);
20985	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20986	assert_eq_m128h(r, e);
20987	let r = _mm_maskz_fnmadd_sh(`1`, a, b, c);
20988	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20989	assert_eq_m128h(r, e);
20990	}
20991
20992	#[simd_test(enable = "avx512fp16")]
20993	unsafe fn test_mm_fnmadd_round_sh() {
20994	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20995	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20996	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20997	let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20998	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20999	assert_eq_m128h(r, e);
21000	}
21001
21002	#[simd_test(enable = "avx512fp16")]
21003	unsafe fn test_mm_mask_fnmadd_round_sh() {
21004	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21005	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21006	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21007	let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21008	a, `0`, b, c,
21009	);
21010	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21011	assert_eq_m128h(r, e);
21012	let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21013	a, `1`, b, c,
21014	);
21015	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21016	assert_eq_m128h(r, e);
21017	}
21018
21019	#[simd_test(enable = "avx512fp16")]
21020	unsafe fn test_mm_mask3_fnmadd_round_sh() {
21021	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21022	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21023	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21024	let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21025	a, b, c, `0`,
21026	);
21027	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21028	assert_eq_m128h(r, e);
21029	let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21030	a, b, c, `1`,
21031	);
21032	let e = _mm_setr_ph(`1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21033	assert_eq_m128h(r, e);
21034	}
21035
21036	#[simd_test(enable = "avx512fp16")]
21037	unsafe fn test_mm_maskz_fnmadd_round_sh() {
21038	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21039	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21040	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21041	let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21042	`0`, a, b, c,
21043	);
21044	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21045	assert_eq_m128h(r, e);
21046	let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21047	`1`, a, b, c,
21048	);
21049	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21050	assert_eq_m128h(r, e);
21051	}
21052
21053	#[simd_test(enable = "avx512fp16,avx512vl")]
21054	unsafe fn test_mm_fnmsub_ph() {
21055	let a = _mm_set1_ph(`1.0`);
21056	let b = _mm_set1_ph(`2.0`);
21057	let c = _mm_set1_ph(`3.0`);
21058	let r = _mm_fnmsub_ph(a, b, c);
21059	let e = _mm_set1_ph(`-5.0`);
21060	assert_eq_m128h(r, e);
21061	}
21062
21063	#[simd_test(enable = "avx512fp16,avx512vl")]
21064	unsafe fn test_mm_mask_fnmsub_ph() {
21065	let a = _mm_set1_ph(`1.0`);
21066	let b = _mm_set1_ph(`2.0`);
21067	let c = _mm_set1_ph(`3.0`);
21068	let r = _mm_mask_fnmsub_ph(a, `0b01010101`, b, c);
21069	let e = _mm_set_ph(`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`);
21070	assert_eq_m128h(r, e);
21071	}
21072
21073	#[simd_test(enable = "avx512fp16,avx512vl")]
21074	unsafe fn test_mm_mask3_fnmsub_ph() {
21075	let a = _mm_set1_ph(`1.0`);
21076	let b = _mm_set1_ph(`2.0`);
21077	let c = _mm_set1_ph(`3.0`);
21078	let r = _mm_mask3_fnmsub_ph(a, b, c, `0b01010101`);
21079	let e = _mm_set_ph(`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`);
21080	assert_eq_m128h(r, e);
21081	}
21082
21083	#[simd_test(enable = "avx512fp16,avx512vl")]
21084	unsafe fn test_mm_maskz_fnmsub_ph() {
21085	let a = _mm_set1_ph(`1.0`);
21086	let b = _mm_set1_ph(`2.0`);
21087	let c = _mm_set1_ph(`3.0`);
21088	let r = _mm_maskz_fnmsub_ph(`0b01010101`, a, b, c);
21089	let e = _mm_set_ph(`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`);
21090	assert_eq_m128h(r, e);
21091	}
21092
21093	#[simd_test(enable = "avx512fp16,avx512vl")]
21094	unsafe fn test_mm256_fnmsub_ph() {
21095	let a = _mm256_set1_ph(`1.0`);
21096	let b = _mm256_set1_ph(`2.0`);
21097	let c = _mm256_set1_ph(`3.0`);
21098	let r = _mm256_fnmsub_ph(a, b, c);
21099	let e = _mm256_set1_ph(`-5.0`);
21100	assert_eq_m256h(r, e);
21101	}
21102
21103	#[simd_test(enable = "avx512fp16,avx512vl")]
21104	unsafe fn test_mm256_mask_fnmsub_ph() {
21105	let a = _mm256_set1_ph(`1.0`);
21106	let b = _mm256_set1_ph(`2.0`);
21107	let c = _mm256_set1_ph(`3.0`);
21108	let r = _mm256_mask_fnmsub_ph(a, `0b0101010101010101`, b, c);
21109	let e = _mm256_set_ph(
21110	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21111	);
21112	assert_eq_m256h(r, e);
21113	}
21114
21115	#[simd_test(enable = "avx512fp16,avx512vl")]
21116	unsafe fn test_mm256_mask3_fnmsub_ph() {
21117	let a = _mm256_set1_ph(`1.0`);
21118	let b = _mm256_set1_ph(`2.0`);
21119	let c = _mm256_set1_ph(`3.0`);
21120	let r = _mm256_mask3_fnmsub_ph(a, b, c, `0b0101010101010101`);
21121	let e = _mm256_set_ph(
21122	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21123	);
21124	assert_eq_m256h(r, e);
21125	}
21126
21127	#[simd_test(enable = "avx512fp16,avx512vl")]
21128	unsafe fn test_mm256_maskz_fnmsub_ph() {
21129	let a = _mm256_set1_ph(`1.0`);
21130	let b = _mm256_set1_ph(`2.0`);
21131	let c = _mm256_set1_ph(`3.0`);
21132	let r = _mm256_maskz_fnmsub_ph(`0b0101010101010101`, a, b, c);
21133	let e = _mm256_set_ph(
21134	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21135	);
21136	assert_eq_m256h(r, e);
21137	}
21138
21139	#[simd_test(enable = "avx512fp16")]
21140	unsafe fn test_mm512_fnmsub_ph() {
21141	let a = _mm512_set1_ph(`1.0`);
21142	let b = _mm512_set1_ph(`2.0`);
21143	let c = _mm512_set1_ph(`3.0`);
21144	let r = _mm512_fnmsub_ph(a, b, c);
21145	let e = _mm512_set1_ph(`-5.0`);
21146	assert_eq_m512h(r, e);
21147	}
21148
21149	#[simd_test(enable = "avx512fp16")]
21150	unsafe fn test_mm512_mask_fnmsub_ph() {
21151	let a = _mm512_set1_ph(`1.0`);
21152	let b = _mm512_set1_ph(`2.0`);
21153	let c = _mm512_set1_ph(`3.0`);
21154	let r = _mm512_mask_fnmsub_ph(a, `0b01010101010101010101010101010101`, b, c);
21155	let e = _mm512_set_ph(
21156	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21157	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21158	);
21159	assert_eq_m512h(r, e);
21160	}
21161
21162	#[simd_test(enable = "avx512fp16")]
21163	unsafe fn test_mm512_mask3_fnmsub_ph() {
21164	let a = _mm512_set1_ph(`1.0`);
21165	let b = _mm512_set1_ph(`2.0`);
21166	let c = _mm512_set1_ph(`3.0`);
21167	let r = _mm512_mask3_fnmsub_ph(a, b, c, `0b01010101010101010101010101010101`);
21168	let e = _mm512_set_ph(
21169	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21170	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21171	);
21172	assert_eq_m512h(r, e);
21173	}
21174
21175	#[simd_test(enable = "avx512fp16")]
21176	unsafe fn test_mm512_maskz_fnmsub_ph() {
21177	let a = _mm512_set1_ph(`1.0`);
21178	let b = _mm512_set1_ph(`2.0`);
21179	let c = _mm512_set1_ph(`3.0`);
21180	let r = _mm512_maskz_fnmsub_ph(`0b01010101010101010101010101010101`, a, b, c);
21181	let e = _mm512_set_ph(
21182	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21183	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21184	);
21185	assert_eq_m512h(r, e);
21186	}
21187
21188	#[simd_test(enable = "avx512fp16")]
21189	unsafe fn test_mm512_fnmsub_round_ph() {
21190	let a = _mm512_set1_ph(`1.0`);
21191	let b = _mm512_set1_ph(`2.0`);
21192	let c = _mm512_set1_ph(`3.0`);
21193	let r =
21194	_mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21195	let e = _mm512_set1_ph(`-5.0`);
21196	assert_eq_m512h(r, e);
21197	}
21198
21199	#[simd_test(enable = "avx512fp16")]
21200	unsafe fn test_mm512_mask_fnmsub_round_ph() {
21201	let a = _mm512_set1_ph(`1.0`);
21202	let b = _mm512_set1_ph(`2.0`);
21203	let c = _mm512_set1_ph(`3.0`);
21204	let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21205	a,
21206	`0b01010101010101010101010101010101`,
21207	b,
21208	c,
21209	);
21210	let e = _mm512_set_ph(
21211	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21212	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21213	);
21214	assert_eq_m512h(r, e);
21215	}
21216
21217	#[simd_test(enable = "avx512fp16")]
21218	unsafe fn test_mm512_mask3_fnmsub_round_ph() {
21219	let a = _mm512_set1_ph(`1.0`);
21220	let b = _mm512_set1_ph(`2.0`);
21221	let c = _mm512_set1_ph(`3.0`);
21222	let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21223	a,
21224	b,
21225	c,
21226	`0b01010101010101010101010101010101`,
21227	);
21228	let e = _mm512_set_ph(
21229	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21230	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21231	);
21232	assert_eq_m512h(r, e);
21233	}
21234
21235	#[simd_test(enable = "avx512fp16")]
21236	unsafe fn test_mm512_maskz_fnmsub_round_ph() {
21237	let a = _mm512_set1_ph(`1.0`);
21238	let b = _mm512_set1_ph(`2.0`);
21239	let c = _mm512_set1_ph(`3.0`);
21240	let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21241	`0b01010101010101010101010101010101`,
21242	a,
21243	b,
21244	c,
21245	);
21246	let e = _mm512_set_ph(
21247	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21248	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21249	);
21250	assert_eq_m512h(r, e);
21251	}
21252
21253	#[simd_test(enable = "avx512fp16")]
21254	unsafe fn test_mm_fnmsub_sh() {
21255	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21256	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21257	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21258	let r = _mm_fnmsub_sh(a, b, c);
21259	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21260	assert_eq_m128h(r, e);
21261	}
21262
21263	#[simd_test(enable = "avx512fp16")]
21264	unsafe fn test_mm_mask_fnmsub_sh() {
21265	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21266	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21267	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21268	let r = _mm_mask_fnmsub_sh(a, `0`, b, c);
21269	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21270	assert_eq_m128h(r, e);
21271	let r = _mm_mask_fnmsub_sh(a, `1`, b, c);
21272	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21273	assert_eq_m128h(r, e);
21274	}
21275
21276	#[simd_test(enable = "avx512fp16")]
21277	unsafe fn test_mm_mask3_fnmsub_sh() {
21278	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21279	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21280	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21281	let r = _mm_mask3_fnmsub_sh(a, b, c, `0`);
21282	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21283	assert_eq_m128h(r, e);
21284	let r = _mm_mask3_fnmsub_sh(a, b, c, `1`);
21285	let e = _mm_setr_ph(`-5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21286	assert_eq_m128h(r, e);
21287	}
21288
21289	#[simd_test(enable = "avx512fp16")]
21290	unsafe fn test_mm_maskz_fnmsub_sh() {
21291	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21292	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21293	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21294	let r = _mm_maskz_fnmsub_sh(`0`, a, b, c);
21295	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21296	assert_eq_m128h(r, e);
21297	let r = _mm_maskz_fnmsub_sh(`1`, a, b, c);
21298	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21299	assert_eq_m128h(r, e);
21300	}
21301
21302	#[simd_test(enable = "avx512fp16")]
21303	unsafe fn test_mm_fnmsub_round_sh() {
21304	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21305	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21306	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21307	let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21308	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21309	assert_eq_m128h(r, e);
21310	}
21311
21312	#[simd_test(enable = "avx512fp16")]
21313	unsafe fn test_mm_mask_fnmsub_round_sh() {
21314	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21315	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21316	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21317	let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21318	a, `0`, b, c,
21319	);
21320	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21321	assert_eq_m128h(r, e);
21322	let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21323	a, `1`, b, c,
21324	);
21325	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21326	assert_eq_m128h(r, e);
21327	}
21328
21329	#[simd_test(enable = "avx512fp16")]
21330	unsafe fn test_mm_mask3_fnmsub_round_sh() {
21331	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21332	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21333	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21334	let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21335	a, b, c, `0`,
21336	);
21337	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21338	assert_eq_m128h(r, e);
21339	let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21340	a, b, c, `1`,
21341	);
21342	let e = _mm_setr_ph(`-5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21343	assert_eq_m128h(r, e);
21344	}
21345
21346	#[simd_test(enable = "avx512fp16")]
21347	unsafe fn test_mm_maskz_fnmsub_round_sh() {
21348	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21349	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21350	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21351	let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21352	`0`, a, b, c,
21353	);
21354	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21355	assert_eq_m128h(r, e);
21356	let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21357	`1`, a, b, c,
21358	);
21359	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21360	assert_eq_m128h(r, e);
21361	}
21362
21363	#[simd_test(enable = "avx512fp16,avx512vl")]
21364	unsafe fn test_mm_fmaddsub_ph() {
21365	let a = _mm_set1_ph(`1.0`);
21366	let b = _mm_set1_ph(`2.0`);
21367	let c = _mm_set1_ph(`3.0`);
21368	let r = _mm_fmaddsub_ph(a, b, c);
21369	let e = _mm_set_ph(`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`);
21370	assert_eq_m128h(r, e);
21371	}
21372
21373	#[simd_test(enable = "avx512fp16,avx512vl")]
21374	unsafe fn test_mm_mask_fmaddsub_ph() {
21375	let a = _mm_set1_ph(`1.0`);
21376	let b = _mm_set1_ph(`2.0`);
21377	let c = _mm_set1_ph(`3.0`);
21378	let r = _mm_mask_fmaddsub_ph(a, `0b00110011`, b, c);
21379	let e = _mm_set_ph(`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`);
21380	assert_eq_m128h(r, e);
21381	}
21382
21383	#[simd_test(enable = "avx512fp16,avx512vl")]
21384	unsafe fn test_mm_mask3_fmaddsub_ph() {
21385	let a = _mm_set1_ph(`1.0`);
21386	let b = _mm_set1_ph(`2.0`);
21387	let c = _mm_set1_ph(`3.0`);
21388	let r = _mm_mask3_fmaddsub_ph(a, b, c, `0b00110011`);
21389	let e = _mm_set_ph(`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`);
21390	assert_eq_m128h(r, e);
21391	}
21392
21393	#[simd_test(enable = "avx512fp16,avx512vl")]
21394	unsafe fn test_mm_maskz_fmaddsub_ph() {
21395	let a = _mm_set1_ph(`1.0`);
21396	let b = _mm_set1_ph(`2.0`);
21397	let c = _mm_set1_ph(`3.0`);
21398	let r = _mm_maskz_fmaddsub_ph(`0b00110011`, a, b, c);
21399	let e = _mm_set_ph(`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`);
21400	assert_eq_m128h(r, e);
21401	}
21402
21403	#[simd_test(enable = "avx512fp16,avx512vl")]
21404	unsafe fn test_mm256_fmaddsub_ph() {
21405	let a = _mm256_set1_ph(`1.0`);
21406	let b = _mm256_set1_ph(`2.0`);
21407	let c = _mm256_set1_ph(`3.0`);
21408	let r = _mm256_fmaddsub_ph(a, b, c);
21409	let e = _mm256_set_ph(
21410	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21411	);
21412	assert_eq_m256h(r, e);
21413	}
21414
21415	#[simd_test(enable = "avx512fp16,avx512vl")]
21416	unsafe fn test_mm256_mask_fmaddsub_ph() {
21417	let a = _mm256_set1_ph(`1.0`);
21418	let b = _mm256_set1_ph(`2.0`);
21419	let c = _mm256_set1_ph(`3.0`);
21420	let r = _mm256_mask_fmaddsub_ph(a, `0b0011001100110011`, b, c);
21421	let e = _mm256_set_ph(
21422	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21423	);
21424	assert_eq_m256h(r, e);
21425	}
21426
21427	#[simd_test(enable = "avx512fp16,avx512vl")]
21428	unsafe fn test_mm256_mask3_fmaddsub_ph() {
21429	let a = _mm256_set1_ph(`1.0`);
21430	let b = _mm256_set1_ph(`2.0`);
21431	let c = _mm256_set1_ph(`3.0`);
21432	let r = _mm256_mask3_fmaddsub_ph(a, b, c, `0b0011001100110011`);
21433	let e = _mm256_set_ph(
21434	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21435	);
21436	assert_eq_m256h(r, e);
21437	}
21438
21439	#[simd_test(enable = "avx512fp16,avx512vl")]
21440	unsafe fn test_mm256_maskz_fmaddsub_ph() {
21441	let a = _mm256_set1_ph(`1.0`);
21442	let b = _mm256_set1_ph(`2.0`);
21443	let c = _mm256_set1_ph(`3.0`);
21444	let r = _mm256_maskz_fmaddsub_ph(`0b0011001100110011`, a, b, c);
21445	let e = _mm256_set_ph(
21446	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21447	);
21448	assert_eq_m256h(r, e);
21449	}
21450
21451	#[simd_test(enable = "avx512fp16")]
21452	unsafe fn test_mm512_fmaddsub_ph() {
21453	let a = _mm512_set1_ph(`1.0`);
21454	let b = _mm512_set1_ph(`2.0`);
21455	let c = _mm512_set1_ph(`3.0`);
21456	let r = _mm512_fmaddsub_ph(a, b, c);
21457	let e = _mm512_set_ph(
21458	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21459	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21460	);
21461	assert_eq_m512h(r, e);
21462	}
21463
21464	#[simd_test(enable = "avx512fp16")]
21465	unsafe fn test_mm512_mask_fmaddsub_ph() {
21466	let a = _mm512_set1_ph(`1.0`);
21467	let b = _mm512_set1_ph(`2.0`);
21468	let c = _mm512_set1_ph(`3.0`);
21469	let r = _mm512_mask_fmaddsub_ph(a, `0b00110011001100110011001100110011`, b, c);
21470	let e = _mm512_set_ph(
21471	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21472	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21473	);
21474	assert_eq_m512h(r, e);
21475	}
21476
21477	#[simd_test(enable = "avx512fp16")]
21478	unsafe fn test_mm512_mask3_fmaddsub_ph() {
21479	let a = _mm512_set1_ph(`1.0`);
21480	let b = _mm512_set1_ph(`2.0`);
21481	let c = _mm512_set1_ph(`3.0`);
21482	let r = _mm512_mask3_fmaddsub_ph(a, b, c, `0b00110011001100110011001100110011`);
21483	let e = _mm512_set_ph(
21484	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21485	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21486	);
21487	assert_eq_m512h(r, e);
21488	}
21489
21490	#[simd_test(enable = "avx512fp16")]
21491	unsafe fn test_mm512_maskz_fmaddsub_ph() {
21492	let a = _mm512_set1_ph(`1.0`);
21493	let b = _mm512_set1_ph(`2.0`);
21494	let c = _mm512_set1_ph(`3.0`);
21495	let r = _mm512_maskz_fmaddsub_ph(`0b00110011001100110011001100110011`, a, b, c);
21496	let e = _mm512_set_ph(
21497	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21498	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21499	);
21500	assert_eq_m512h(r, e);
21501	}
21502
21503	#[simd_test(enable = "avx512fp16")]
21504	unsafe fn test_mm512_fmaddsub_round_ph() {
21505	let a = _mm512_set1_ph(`1.0`);
21506	let b = _mm512_set1_ph(`2.0`);
21507	let c = _mm512_set1_ph(`3.0`);
21508	let r =
21509	_mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21510	let e = _mm512_set_ph(
21511	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21512	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21513	);
21514	assert_eq_m512h(r, e);
21515	}
21516
21517	#[simd_test(enable = "avx512fp16")]
21518	unsafe fn test_mm512_mask_fmaddsub_round_ph() {
21519	let a = _mm512_set1_ph(`1.0`);
21520	let b = _mm512_set1_ph(`2.0`);
21521	let c = _mm512_set1_ph(`3.0`);
21522	let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21523	a,
21524	`0b00110011001100110011001100110011`,
21525	b,
21526	c,
21527	);
21528	let e = _mm512_set_ph(
21529	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21530	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21531	);
21532	assert_eq_m512h(r, e);
21533	}
21534
21535	#[simd_test(enable = "avx512fp16")]
21536	unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
21537	let a = _mm512_set1_ph(`1.0`);
21538	let b = _mm512_set1_ph(`2.0`);
21539	let c = _mm512_set1_ph(`3.0`);
21540	let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21541	a,
21542	b,
21543	c,
21544	`0b00110011001100110011001100110011`,
21545	);
21546	let e = _mm512_set_ph(
21547	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21548	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21549	);
21550	assert_eq_m512h(r, e);
21551	}
21552
21553	#[simd_test(enable = "avx512fp16")]
21554	unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
21555	let a = _mm512_set1_ph(`1.0`);
21556	let b = _mm512_set1_ph(`2.0`);
21557	let c = _mm512_set1_ph(`3.0`);
21558	let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21559	`0b00110011001100110011001100110011`,
21560	a,
21561	b,
21562	c,
21563	);
21564	let e = _mm512_set_ph(
21565	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21566	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21567	);
21568	assert_eq_m512h(r, e);
21569	}
21570
21571	#[simd_test(enable = "avx512fp16,avx512vl")]
21572	unsafe fn test_mm_fmsubadd_ph() {
21573	let a = _mm_set1_ph(`1.0`);
21574	let b = _mm_set1_ph(`2.0`);
21575	let c = _mm_set1_ph(`3.0`);
21576	let r = _mm_fmsubadd_ph(a, b, c);
21577	let e = _mm_set_ph(`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`);
21578	assert_eq_m128h(r, e);
21579	}
21580
21581	#[simd_test(enable = "avx512fp16,avx512vl")]
21582	unsafe fn test_mm_mask_fmsubadd_ph() {
21583	let a = _mm_set1_ph(`1.0`);
21584	let b = _mm_set1_ph(`2.0`);
21585	let c = _mm_set1_ph(`3.0`);
21586	let r = _mm_mask_fmsubadd_ph(a, `0b00110011`, b, c);
21587	let e = _mm_set_ph(`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`);
21588	assert_eq_m128h(r, e);
21589	}
21590
21591	#[simd_test(enable = "avx512fp16,avx512vl")]
21592	unsafe fn test_mm_mask3_fmsubadd_ph() {
21593	let a = _mm_set1_ph(`1.0`);
21594	let b = _mm_set1_ph(`2.0`);
21595	let c = _mm_set1_ph(`3.0`);
21596	let r = _mm_mask3_fmsubadd_ph(a, b, c, `0b00110011`);
21597	let e = _mm_set_ph(`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`);
21598	assert_eq_m128h(r, e);
21599	}
21600
21601	#[simd_test(enable = "avx512fp16,avx512vl")]
21602	unsafe fn test_mm_maskz_fmsubadd_ph() {
21603	let a = _mm_set1_ph(`1.0`);
21604	let b = _mm_set1_ph(`2.0`);
21605	let c = _mm_set1_ph(`3.0`);
21606	let r = _mm_maskz_fmsubadd_ph(`0b00110011`, a, b, c);
21607	let e = _mm_set_ph(`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`);
21608	assert_eq_m128h(r, e);
21609	}
21610
21611	#[simd_test(enable = "avx512fp16,avx512vl")]
21612	unsafe fn test_mm256_fmsubadd_ph() {
21613	let a = _mm256_set1_ph(`1.0`);
21614	let b = _mm256_set1_ph(`2.0`);
21615	let c = _mm256_set1_ph(`3.0`);
21616	let r = _mm256_fmsubadd_ph(a, b, c);
21617	let e = _mm256_set_ph(
21618	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21619	);
21620	assert_eq_m256h(r, e);
21621	}
21622
21623	#[simd_test(enable = "avx512fp16,avx512vl")]
21624	unsafe fn test_mm256_mask_fmsubadd_ph() {
21625	let a = _mm256_set1_ph(`1.0`);
21626	let b = _mm256_set1_ph(`2.0`);
21627	let c = _mm256_set1_ph(`3.0`);
21628	let r = _mm256_mask_fmsubadd_ph(a, `0b0011001100110011`, b, c);
21629	let e = _mm256_set_ph(
21630	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21631	);
21632	assert_eq_m256h(r, e);
21633	}
21634
21635	#[simd_test(enable = "avx512fp16,avx512vl")]
21636	unsafe fn test_mm256_mask3_fmsubadd_ph() {
21637	let a = _mm256_set1_ph(`1.0`);
21638	let b = _mm256_set1_ph(`2.0`);
21639	let c = _mm256_set1_ph(`3.0`);
21640	let r = _mm256_mask3_fmsubadd_ph(a, b, c, `0b0011001100110011`);
21641	let e = _mm256_set_ph(
21642	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21643	);
21644	assert_eq_m256h(r, e);
21645	}
21646
21647	#[simd_test(enable = "avx512fp16,avx512vl")]
21648	unsafe fn test_mm256_maskz_fmsubadd_ph() {
21649	let a = _mm256_set1_ph(`1.0`);
21650	let b = _mm256_set1_ph(`2.0`);
21651	let c = _mm256_set1_ph(`3.0`);
21652	let r = _mm256_maskz_fmsubadd_ph(`0b0011001100110011`, a, b, c);
21653	let e = _mm256_set_ph(
21654	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21655	);
21656	assert_eq_m256h(r, e);
21657	}
21658
21659	#[simd_test(enable = "avx512fp16")]
21660	unsafe fn test_mm512_fmsubadd_ph() {
21661	let a = _mm512_set1_ph(`1.0`);
21662	let b = _mm512_set1_ph(`2.0`);
21663	let c = _mm512_set1_ph(`3.0`);
21664	let r = _mm512_fmsubadd_ph(a, b, c);
21665	let e = _mm512_set_ph(
21666	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21667	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21668	);
21669	assert_eq_m512h(r, e);
21670	}
21671
21672	#[simd_test(enable = "avx512fp16")]
21673	unsafe fn test_mm512_mask_fmsubadd_ph() {
21674	let a = _mm512_set1_ph(`1.0`);
21675	let b = _mm512_set1_ph(`2.0`);
21676	let c = _mm512_set1_ph(`3.0`);
21677	let r = _mm512_mask_fmsubadd_ph(a, `0b00110011001100110011001100110011`, b, c);
21678	let e = _mm512_set_ph(
21679	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21680	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21681	);
21682	assert_eq_m512h(r, e);
21683	}
21684
21685	#[simd_test(enable = "avx512fp16")]
21686	unsafe fn test_mm512_mask3_fmsubadd_ph() {
21687	let a = _mm512_set1_ph(`1.0`);
21688	let b = _mm512_set1_ph(`2.0`);
21689	let c = _mm512_set1_ph(`3.0`);
21690	let r = _mm512_mask3_fmsubadd_ph(a, b, c, `0b00110011001100110011001100110011`);
21691	let e = _mm512_set_ph(
21692	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21693	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21694	);
21695	assert_eq_m512h(r, e);
21696	}
21697
21698	#[simd_test(enable = "avx512fp16")]
21699	unsafe fn test_mm512_maskz_fmsubadd_ph() {
21700	let a = _mm512_set1_ph(`1.0`);
21701	let b = _mm512_set1_ph(`2.0`);
21702	let c = _mm512_set1_ph(`3.0`);
21703	let r = _mm512_maskz_fmsubadd_ph(`0b00110011001100110011001100110011`, a, b, c);
21704	let e = _mm512_set_ph(
21705	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21706	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21707	);
21708	assert_eq_m512h(r, e);
21709	}
21710
21711	#[simd_test(enable = "avx512fp16")]
21712	unsafe fn test_mm512_fmsubadd_round_ph() {
21713	let a = _mm512_set1_ph(`1.0`);
21714	let b = _mm512_set1_ph(`2.0`);
21715	let c = _mm512_set1_ph(`3.0`);
21716	let r =
21717	_mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21718	let e = _mm512_set_ph(
21719	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21720	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21721	);
21722	assert_eq_m512h(r, e);
21723	}
21724
21725	#[simd_test(enable = "avx512fp16")]
21726	unsafe fn test_mm512_mask_fmsubadd_round_ph() {
21727	let a = _mm512_set1_ph(`1.0`);
21728	let b = _mm512_set1_ph(`2.0`);
21729	let c = _mm512_set1_ph(`3.0`);
21730	let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21731	a,
21732	`0b00110011001100110011001100110011`,
21733	b,
21734	c,
21735	);
21736	let e = _mm512_set_ph(
21737	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21738	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21739	);
21740	assert_eq_m512h(r, e);
21741	}
21742
21743	#[simd_test(enable = "avx512fp16")]
21744	unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
21745	let a = _mm512_set1_ph(`1.0`);
21746	let b = _mm512_set1_ph(`2.0`);
21747	let c = _mm512_set1_ph(`3.0`);
21748	let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21749	a,
21750	b,
21751	c,
21752	`0b00110011001100110011001100110011`,
21753	);
21754	let e = _mm512_set_ph(
21755	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21756	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21757	);
21758	assert_eq_m512h(r, e);
21759	}
21760
21761	#[simd_test(enable = "avx512fp16")]
21762	unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
21763	let a = _mm512_set1_ph(`1.0`);
21764	let b = _mm512_set1_ph(`2.0`);
21765	let c = _mm512_set1_ph(`3.0`);
21766	let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21767	`0b00110011001100110011001100110011`,
21768	a,
21769	b,
21770	c,
21771	);
21772	let e = _mm512_set_ph(
21773	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21774	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21775	);
21776	assert_eq_m512h(r, e);
21777	}
21778
21779	#[simd_test(enable = "avx512fp16,avx512vl")]
21780	unsafe fn test_mm_rcp_ph() {
21781	let a = _mm_set1_ph(`2.0`);
21782	let r = _mm_rcp_ph(a);
21783	let e = _mm_set1_ph(`0.5`);
21784	assert_eq_m128h(r, e);
21785	}
21786
21787	#[simd_test(enable = "avx512fp16,avx512vl")]
21788	unsafe fn test_mm_mask_rcp_ph() {
21789	let a = _mm_set1_ph(`2.0`);
21790	let src = _mm_set1_ph(`1.0`);
21791	let r = _mm_mask_rcp_ph(src, `0b01010101`, a);
21792	let e = _mm_set_ph(`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`);
21793	assert_eq_m128h(r, e);
21794	}
21795
21796	#[simd_test(enable = "avx512fp16,avx512vl")]
21797	unsafe fn test_mm_maskz_rcp_ph() {
21798	let a = _mm_set1_ph(`2.0`);
21799	let r = _mm_maskz_rcp_ph(`0b01010101`, a);
21800	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
21801	assert_eq_m128h(r, e);
21802	}
21803
21804	#[simd_test(enable = "avx512fp16,avx512vl")]
21805	unsafe fn test_mm256_rcp_ph() {
21806	let a = _mm256_set1_ph(`2.0`);
21807	let r = _mm256_rcp_ph(a);
21808	let e = _mm256_set1_ph(`0.5`);
21809	assert_eq_m256h(r, e);
21810	}
21811
21812	#[simd_test(enable = "avx512fp16,avx512vl")]
21813	unsafe fn test_mm256_mask_rcp_ph() {
21814	let a = _mm256_set1_ph(`2.0`);
21815	let src = _mm256_set1_ph(`1.0`);
21816	let r = _mm256_mask_rcp_ph(src, `0b0101010101010101`, a);
21817	let e = _mm256_set_ph(
21818	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21819	);
21820	assert_eq_m256h(r, e);
21821	}
21822
21823	#[simd_test(enable = "avx512fp16,avx512vl")]
21824	unsafe fn test_mm256_maskz_rcp_ph() {
21825	let a = _mm256_set1_ph(`2.0`);
21826	let r = _mm256_maskz_rcp_ph(`0b0101010101010101`, a);
21827	let e = _mm256_set_ph(
21828	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21829	);
21830	assert_eq_m256h(r, e);
21831	}
21832
21833	#[simd_test(enable = "avx512fp16")]
21834	unsafe fn test_mm512_rcp_ph() {
21835	let a = _mm512_set1_ph(`2.0`);
21836	let r = _mm512_rcp_ph(a);
21837	let e = _mm512_set1_ph(`0.5`);
21838	assert_eq_m512h(r, e);
21839	}
21840
21841	#[simd_test(enable = "avx512fp16")]
21842	unsafe fn test_mm512_mask_rcp_ph() {
21843	let a = _mm512_set1_ph(`2.0`);
21844	let src = _mm512_set1_ph(`1.0`);
21845	let r = _mm512_mask_rcp_ph(src, `0b01010101010101010101010101010101`, a);
21846	let e = _mm512_set_ph(
21847	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`,
21848	`0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21849	);
21850	assert_eq_m512h(r, e);
21851	}
21852
21853	#[simd_test(enable = "avx512fp16")]
21854	unsafe fn test_mm512_maskz_rcp_ph() {
21855	let a = _mm512_set1_ph(`2.0`);
21856	let r = _mm512_maskz_rcp_ph(`0b01010101010101010101010101010101`, a);
21857	let e = _mm512_set_ph(
21858	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
21859	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21860	);
21861	assert_eq_m512h(r, e);
21862	}
21863
21864	#[simd_test(enable = "avx512fp16")]
21865	unsafe fn test_mm_rcp_sh() {
21866	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21867	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21868	let r = _mm_rcp_sh(a, b);
21869	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21870	assert_eq_m128h(r, e);
21871	}
21872
21873	#[simd_test(enable = "avx512fp16")]
21874	unsafe fn test_mm_mask_rcp_sh() {
21875	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21876	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21877	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
21878	let r = _mm_mask_rcp_sh(src, `0`, a, b);
21879	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21880	assert_eq_m128h(r, e);
21881	let r = _mm_mask_rcp_sh(src, `1`, a, b);
21882	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21883	assert_eq_m128h(r, e);
21884	}
21885
21886	#[simd_test(enable = "avx512fp16")]
21887	unsafe fn test_mm_maskz_rcp_sh() {
21888	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21889	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21890	let r = _mm_maskz_rcp_sh(`0`, a, b);
21891	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21892	assert_eq_m128h(r, e);
21893	let r = _mm_maskz_rcp_sh(`1`, a, b);
21894	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21895	assert_eq_m128h(r, e);
21896	}
21897
21898	#[simd_test(enable = "avx512fp16,avx512vl")]
21899	unsafe fn test_mm_rsqrt_ph() {
21900	let a = _mm_set1_ph(`4.0`);
21901	let r = _mm_rsqrt_ph(a);
21902	let e = _mm_set1_ph(`0.5`);
21903	assert_eq_m128h(r, e);
21904	}
21905
21906	#[simd_test(enable = "avx512fp16,avx512vl")]
21907	unsafe fn test_mm_mask_rsqrt_ph() {
21908	let a = _mm_set1_ph(`4.0`);
21909	let src = _mm_set1_ph(`1.0`);
21910	let r = _mm_mask_rsqrt_ph(src, `0b01010101`, a);
21911	let e = _mm_set_ph(`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`);
21912	assert_eq_m128h(r, e);
21913	}
21914
21915	#[simd_test(enable = "avx512fp16,avx512vl")]
21916	unsafe fn test_mm_maskz_rsqrt_ph() {
21917	let a = _mm_set1_ph(`4.0`);
21918	let r = _mm_maskz_rsqrt_ph(`0b01010101`, a);
21919	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
21920	assert_eq_m128h(r, e);
21921	}
21922
21923	#[simd_test(enable = "avx512fp16,avx512vl")]
21924	unsafe fn test_mm256_rsqrt_ph() {
21925	let a = _mm256_set1_ph(`4.0`);
21926	let r = _mm256_rsqrt_ph(a);
21927	let e = _mm256_set1_ph(`0.5`);
21928	assert_eq_m256h(r, e);
21929	}
21930
21931	#[simd_test(enable = "avx512fp16,avx512vl")]
21932	unsafe fn test_mm256_mask_rsqrt_ph() {
21933	let a = _mm256_set1_ph(`4.0`);
21934	let src = _mm256_set1_ph(`1.0`);
21935	let r = _mm256_mask_rsqrt_ph(src, `0b0101010101010101`, a);
21936	let e = _mm256_set_ph(
21937	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21938	);
21939	assert_eq_m256h(r, e);
21940	}
21941
21942	#[simd_test(enable = "avx512fp16,avx512vl")]
21943	unsafe fn test_mm256_maskz_rsqrt_ph() {
21944	let a = _mm256_set1_ph(`4.0`);
21945	let r = _mm256_maskz_rsqrt_ph(`0b0101010101010101`, a);
21946	let e = _mm256_set_ph(
21947	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21948	);
21949	assert_eq_m256h(r, e);
21950	}
21951
21952	#[simd_test(enable = "avx512fp16")]
21953	unsafe fn test_mm512_rsqrt_ph() {
21954	let a = _mm512_set1_ph(`4.0`);
21955	let r = _mm512_rsqrt_ph(a);
21956	let e = _mm512_set1_ph(`0.5`);
21957	assert_eq_m512h(r, e);
21958	}
21959
21960	#[simd_test(enable = "avx512fp16")]
21961	unsafe fn test_mm512_mask_rsqrt_ph() {
21962	let a = _mm512_set1_ph(`4.0`);
21963	let src = _mm512_set1_ph(`1.0`);
21964	let r = _mm512_mask_rsqrt_ph(src, `0b01010101010101010101010101010101`, a);
21965	let e = _mm512_set_ph(
21966	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`,
21967	`0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21968	);
21969	assert_eq_m512h(r, e);
21970	}
21971
21972	#[simd_test(enable = "avx512fp16")]
21973	unsafe fn test_mm512_maskz_rsqrt_ph() {
21974	let a = _mm512_set1_ph(`4.0`);
21975	let r = _mm512_maskz_rsqrt_ph(`0b01010101010101010101010101010101`, a);
21976	let e = _mm512_set_ph(
21977	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
21978	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21979	);
21980	assert_eq_m512h(r, e);
21981	}
21982
21983	#[simd_test(enable = "avx512fp16")]
21984	unsafe fn test_mm_rsqrt_sh() {
21985	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21986	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
21987	let r = _mm_rsqrt_sh(a, b);
21988	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21989	assert_eq_m128h(r, e);
21990	}
21991
21992	#[simd_test(enable = "avx512fp16")]
21993	unsafe fn test_mm_mask_rsqrt_sh() {
21994	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21995	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
21996	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
21997	let r = _mm_mask_rsqrt_sh(src, `0`, a, b);
21998	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21999	assert_eq_m128h(r, e);
22000	let r = _mm_mask_rsqrt_sh(src, `1`, a, b);
22001	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22002	assert_eq_m128h(r, e);
22003	}
22004
22005	#[simd_test(enable = "avx512fp16")]
22006	unsafe fn test_mm_maskz_rsqrt_sh() {
22007	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22008	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22009	let r = _mm_maskz_rsqrt_sh(`0`, a, b);
22010	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22011	assert_eq_m128h(r, e);
22012	let r = _mm_maskz_rsqrt_sh(`1`, a, b);
22013	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22014	assert_eq_m128h(r, e);
22015	}
22016
22017	#[simd_test(enable = "avx512fp16,avx512vl")]
22018	unsafe fn test_mm_sqrt_ph() {
22019	let a = _mm_set1_ph(`4.0`);
22020	let r = _mm_sqrt_ph(a);
22021	let e = _mm_set1_ph(`2.0`);
22022	assert_eq_m128h(r, e);
22023	}
22024
22025	#[simd_test(enable = "avx512fp16,avx512vl")]
22026	unsafe fn test_mm_mask_sqrt_ph() {
22027	let a = _mm_set1_ph(`4.0`);
22028	let src = _mm_set1_ph(`1.0`);
22029	let r = _mm_mask_sqrt_ph(src, `0b01010101`, a);
22030	let e = _mm_set_ph(`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`);
22031	assert_eq_m128h(r, e);
22032	}
22033
22034	#[simd_test(enable = "avx512fp16,avx512vl")]
22035	unsafe fn test_mm_maskz_sqrt_ph() {
22036	let a = _mm_set1_ph(`4.0`);
22037	let r = _mm_maskz_sqrt_ph(`0b01010101`, a);
22038	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`);
22039	assert_eq_m128h(r, e);
22040	}
22041
22042	#[simd_test(enable = "avx512fp16,avx512vl")]
22043	unsafe fn test_mm256_sqrt_ph() {
22044	let a = _mm256_set1_ph(`4.0`);
22045	let r = _mm256_sqrt_ph(a);
22046	let e = _mm256_set1_ph(`2.0`);
22047	assert_eq_m256h(r, e);
22048	}
22049
22050	#[simd_test(enable = "avx512fp16,avx512vl")]
22051	unsafe fn test_mm256_mask_sqrt_ph() {
22052	let a = _mm256_set1_ph(`4.0`);
22053	let src = _mm256_set1_ph(`1.0`);
22054	let r = _mm256_mask_sqrt_ph(src, `0b0101010101010101`, a);
22055	let e = _mm256_set_ph(
22056	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22057	);
22058	assert_eq_m256h(r, e);
22059	}
22060
22061	#[simd_test(enable = "avx512fp16,avx512vl")]
22062	unsafe fn test_mm256_maskz_sqrt_ph() {
22063	let a = _mm256_set1_ph(`4.0`);
22064	let r = _mm256_maskz_sqrt_ph(`0b0101010101010101`, a);
22065	let e = _mm256_set_ph(
22066	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22067	);
22068	assert_eq_m256h(r, e);
22069	}
22070
22071	#[simd_test(enable = "avx512fp16")]
22072	unsafe fn test_mm512_sqrt_ph() {
22073	let a = _mm512_set1_ph(`4.0`);
22074	let r = _mm512_sqrt_ph(a);
22075	let e = _mm512_set1_ph(`2.0`);
22076	assert_eq_m512h(r, e);
22077	}
22078
22079	#[simd_test(enable = "avx512fp16")]
22080	unsafe fn test_mm512_mask_sqrt_ph() {
22081	let a = _mm512_set1_ph(`4.0`);
22082	let src = _mm512_set1_ph(`1.0`);
22083	let r = _mm512_mask_sqrt_ph(src, `0b01010101010101010101010101010101`, a);
22084	let e = _mm512_set_ph(
22085	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
22086	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22087	);
22088	assert_eq_m512h(r, e);
22089	}
22090
22091	#[simd_test(enable = "avx512fp16")]
22092	unsafe fn test_mm512_maskz_sqrt_ph() {
22093	let a = _mm512_set1_ph(`4.0`);
22094	let r = _mm512_maskz_sqrt_ph(`0b01010101010101010101010101010101`, a);
22095	let e = _mm512_set_ph(
22096	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22097	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22098	);
22099	assert_eq_m512h(r, e);
22100	}
22101
22102	#[simd_test(enable = "avx512fp16")]
22103	unsafe fn test_mm512_sqrt_round_ph() {
22104	let a = _mm512_set1_ph(`4.0`);
22105	let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
22106	let e = _mm512_set1_ph(`2.0`);
22107	assert_eq_m512h(r, e);
22108	}
22109
22110	#[simd_test(enable = "avx512fp16")]
22111	unsafe fn test_mm512_mask_sqrt_round_ph() {
22112	let a = _mm512_set1_ph(`4.0`);
22113	let src = _mm512_set1_ph(`1.0`);
22114	let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22115	src,
22116	`0b01010101010101010101010101010101`,
22117	a,
22118	);
22119	let e = _mm512_set_ph(
22120	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
22121	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22122	);
22123	assert_eq_m512h(r, e);
22124	}
22125
22126	#[simd_test(enable = "avx512fp16")]
22127	unsafe fn test_mm512_maskz_sqrt_round_ph() {
22128	let a = _mm512_set1_ph(`4.0`);
22129	let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22130	`0b01010101010101010101010101010101`,
22131	a,
22132	);
22133	let e = _mm512_set_ph(
22134	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22135	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22136	);
22137	assert_eq_m512h(r, e);
22138	}
22139
22140	#[simd_test(enable = "avx512fp16")]
22141	unsafe fn test_mm_sqrt_sh() {
22142	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22143	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22144	let r = _mm_sqrt_sh(a, b);
22145	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22146	assert_eq_m128h(r, e);
22147	}
22148
22149	#[simd_test(enable = "avx512fp16")]
22150	unsafe fn test_mm_mask_sqrt_sh() {
22151	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22152	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22153	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22154	let r = _mm_mask_sqrt_sh(src, `0`, a, b);
22155	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22156	assert_eq_m128h(r, e);
22157	let r = _mm_mask_sqrt_sh(src, `1`, a, b);
22158	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22159	assert_eq_m128h(r, e);
22160	}
22161
22162	#[simd_test(enable = "avx512fp16")]
22163	unsafe fn test_mm_maskz_sqrt_sh() {
22164	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22165	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22166	let r = _mm_maskz_sqrt_sh(`0`, a, b);
22167	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22168	assert_eq_m128h(r, e);
22169	let r = _mm_maskz_sqrt_sh(`1`, a, b);
22170	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22171	assert_eq_m128h(r, e);
22172	}
22173
22174	#[simd_test(enable = "avx512fp16")]
22175	unsafe fn test_mm_sqrt_round_sh() {
22176	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22177	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22178	let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22179	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22180	assert_eq_m128h(r, e);
22181	}
22182
22183	#[simd_test(enable = "avx512fp16")]
22184	unsafe fn test_mm_mask_sqrt_round_sh() {
22185	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22186	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22187	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22188	let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22189	src, `0`, a, b,
22190	);
22191	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22192	assert_eq_m128h(r, e);
22193	let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22194	src, `1`, a, b,
22195	);
22196	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22197	assert_eq_m128h(r, e);
22198	}
22199
22200	#[simd_test(enable = "avx512fp16")]
22201	unsafe fn test_mm_maskz_sqrt_round_sh() {
22202	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22203	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22204	let r =
22205	_mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22206	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22207	assert_eq_m128h(r, e);
22208	let r =
22209	_mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22210	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22211	assert_eq_m128h(r, e);
22212	}
22213
22214	#[simd_test(enable = "avx512fp16,avx512vl")]
22215	unsafe fn test_mm_max_ph() {
22216	let a = _mm_set1_ph(`2.0`);
22217	let b = _mm_set1_ph(`1.0`);
22218	let r = _mm_max_ph(a, b);
22219	let e = _mm_set1_ph(`2.0`);
22220	assert_eq_m128h(r, e);
22221	}
22222
22223	#[simd_test(enable = "avx512fp16,avx512vl")]
22224	unsafe fn test_mm_mask_max_ph() {
22225	let a = _mm_set1_ph(`2.0`);
22226	let b = _mm_set1_ph(`1.0`);
22227	let src = _mm_set1_ph(`3.0`);
22228	let r = _mm_mask_max_ph(src, `0b01010101`, a, b);
22229	let e = _mm_set_ph(`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`);
22230	assert_eq_m128h(r, e);
22231	}
22232
22233	#[simd_test(enable = "avx512fp16,avx512vl")]
22234	unsafe fn test_mm_maskz_max_ph() {
22235	let a = _mm_set1_ph(`2.0`);
22236	let b = _mm_set1_ph(`1.0`);
22237	let r = _mm_maskz_max_ph(`0b01010101`, a, b);
22238	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`);
22239	assert_eq_m128h(r, e);
22240	}
22241
22242	#[simd_test(enable = "avx512fp16,avx512vl")]
22243	unsafe fn test_mm256_max_ph() {
22244	let a = _mm256_set1_ph(`2.0`);
22245	let b = _mm256_set1_ph(`1.0`);
22246	let r = _mm256_max_ph(a, b);
22247	let e = _mm256_set1_ph(`2.0`);
22248	assert_eq_m256h(r, e);
22249	}
22250
22251	#[simd_test(enable = "avx512fp16,avx512vl")]
22252	unsafe fn test_mm256_mask_max_ph() {
22253	let a = _mm256_set1_ph(`2.0`);
22254	let b = _mm256_set1_ph(`1.0`);
22255	let src = _mm256_set1_ph(`3.0`);
22256	let r = _mm256_mask_max_ph(src, `0b0101010101010101`, a, b);
22257	let e = _mm256_set_ph(
22258	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22259	);
22260	assert_eq_m256h(r, e);
22261	}
22262
22263	#[simd_test(enable = "avx512fp16,avx512vl")]
22264	unsafe fn test_mm256_maskz_max_ph() {
22265	let a = _mm256_set1_ph(`2.0`);
22266	let b = _mm256_set1_ph(`1.0`);
22267	let r = _mm256_maskz_max_ph(`0b0101010101010101`, a, b);
22268	let e = _mm256_set_ph(
22269	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22270	);
22271	assert_eq_m256h(r, e);
22272	}
22273
22274	#[simd_test(enable = "avx512fp16")]
22275	unsafe fn test_mm512_max_ph() {
22276	let a = _mm512_set1_ph(`2.0`);
22277	let b = _mm512_set1_ph(`1.0`);
22278	let r = _mm512_max_ph(a, b);
22279	let e = _mm512_set1_ph(`2.0`);
22280	assert_eq_m512h(r, e);
22281	}
22282
22283	#[simd_test(enable = "avx512fp16")]
22284	unsafe fn test_mm512_mask_max_ph() {
22285	let a = _mm512_set1_ph(`2.0`);
22286	let b = _mm512_set1_ph(`1.0`);
22287	let src = _mm512_set1_ph(`3.0`);
22288	let r = _mm512_mask_max_ph(src, `0b01010101010101010101010101010101`, a, b);
22289	let e = _mm512_set_ph(
22290	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`,
22291	`2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22292	);
22293	assert_eq_m512h(r, e);
22294	}
22295
22296	#[simd_test(enable = "avx512fp16")]
22297	unsafe fn test_mm512_maskz_max_ph() {
22298	let a = _mm512_set1_ph(`2.0`);
22299	let b = _mm512_set1_ph(`1.0`);
22300	let r = _mm512_maskz_max_ph(`0b01010101010101010101010101010101`, a, b);
22301	let e = _mm512_set_ph(
22302	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22303	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22304	);
22305	assert_eq_m512h(r, e);
22306	}
22307
22308	#[simd_test(enable = "avx512fp16")]
22309	unsafe fn test_mm512_max_round_ph() {
22310	let a = _mm512_set1_ph(`2.0`);
22311	let b = _mm512_set1_ph(`1.0`);
22312	let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22313	let e = _mm512_set1_ph(`2.0`);
22314	assert_eq_m512h(r, e);
22315	}
22316
22317	#[simd_test(enable = "avx512fp16")]
22318	unsafe fn test_mm512_mask_max_round_ph() {
22319	let a = _mm512_set1_ph(`2.0`);
22320	let b = _mm512_set1_ph(`1.0`);
22321	let src = _mm512_set1_ph(`3.0`);
22322	let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22323	src,
22324	`0b01010101010101010101010101010101`,
22325	a,
22326	b,
22327	);
22328	let e = _mm512_set_ph(
22329	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`,
22330	`2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22331	);
22332	assert_eq_m512h(r, e);
22333	}
22334
22335	#[simd_test(enable = "avx512fp16")]
22336	unsafe fn test_mm512_maskz_max_round_ph() {
22337	let a = _mm512_set1_ph(`2.0`);
22338	let b = _mm512_set1_ph(`1.0`);
22339	let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22340	`0b01010101010101010101010101010101`,
22341	a,
22342	b,
22343	);
22344	let e = _mm512_set_ph(
22345	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22346	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22347	);
22348	assert_eq_m512h(r, e);
22349	}
22350
22351	#[simd_test(enable = "avx512fp16")]
22352	unsafe fn test_mm_max_sh() {
22353	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22354	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22355	let r = _mm_max_sh(a, b);
22356	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22357	assert_eq_m128h(r, e);
22358	}
22359
22360	#[simd_test(enable = "avx512fp16")]
22361	unsafe fn test_mm_mask_max_sh() {
22362	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22363	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22364	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22365	let r = _mm_mask_max_sh(src, `0`, a, b);
22366	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22367	assert_eq_m128h(r, e);
22368	let r = _mm_mask_max_sh(src, `1`, a, b);
22369	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22370	assert_eq_m128h(r, e);
22371	}
22372
22373	#[simd_test(enable = "avx512fp16")]
22374	unsafe fn test_mm_maskz_max_sh() {
22375	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22376	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22377	let r = _mm_maskz_max_sh(`0`, a, b);
22378	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22379	assert_eq_m128h(r, e);
22380	let r = _mm_maskz_max_sh(`1`, a, b);
22381	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22382	assert_eq_m128h(r, e);
22383	}
22384
22385	#[simd_test(enable = "avx512fp16")]
22386	unsafe fn test_mm_max_round_sh() {
22387	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22388	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22389	let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22390	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22391	assert_eq_m128h(r, e);
22392	}
22393
22394	#[simd_test(enable = "avx512fp16")]
22395	unsafe fn test_mm_mask_max_round_sh() {
22396	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22397	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22398	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22399	let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22400	src, `0`, a, b,
22401	);
22402	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22403	assert_eq_m128h(r, e);
22404	let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22405	src, `1`, a, b,
22406	);
22407	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22408	assert_eq_m128h(r, e);
22409	}
22410
22411	#[simd_test(enable = "avx512fp16")]
22412	unsafe fn test_mm_maskz_max_round_sh() {
22413	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22414	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22415	let r =
22416	_mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22417	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22418	assert_eq_m128h(r, e);
22419	let r =
22420	_mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22421	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22422	assert_eq_m128h(r, e);
22423	}
22424
22425	#[simd_test(enable = "avx512fp16,avx512vl")]
22426	unsafe fn test_mm_min_ph() {
22427	let a = _mm_set1_ph(`2.0`);
22428	let b = _mm_set1_ph(`1.0`);
22429	let r = _mm_min_ph(a, b);
22430	let e = _mm_set1_ph(`1.0`);
22431	assert_eq_m128h(r, e);
22432	}
22433
22434	#[simd_test(enable = "avx512fp16,avx512vl")]
22435	unsafe fn test_mm_mask_min_ph() {
22436	let a = _mm_set1_ph(`2.0`);
22437	let b = _mm_set1_ph(`1.0`);
22438	let src = _mm_set1_ph(`3.0`);
22439	let r = _mm_mask_min_ph(src, `0b01010101`, a, b);
22440	let e = _mm_set_ph(`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`);
22441	assert_eq_m128h(r, e);
22442	}
22443
22444	#[simd_test(enable = "avx512fp16,avx512vl")]
22445	unsafe fn test_mm_maskz_min_ph() {
22446	let a = _mm_set1_ph(`2.0`);
22447	let b = _mm_set1_ph(`1.0`);
22448	let r = _mm_maskz_min_ph(`0b01010101`, a, b);
22449	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
22450	assert_eq_m128h(r, e);
22451	}
22452
22453	#[simd_test(enable = "avx512fp16,avx512vl")]
22454	unsafe fn test_mm256_min_ph() {
22455	let a = _mm256_set1_ph(`2.0`);
22456	let b = _mm256_set1_ph(`1.0`);
22457	let r = _mm256_min_ph(a, b);
22458	let e = _mm256_set1_ph(`1.0`);
22459	assert_eq_m256h(r, e);
22460	}
22461
22462	#[simd_test(enable = "avx512fp16,avx512vl")]
22463	unsafe fn test_mm256_mask_min_ph() {
22464	let a = _mm256_set1_ph(`2.0`);
22465	let b = _mm256_set1_ph(`1.0`);
22466	let src = _mm256_set1_ph(`3.0`);
22467	let r = _mm256_mask_min_ph(src, `0b0101010101010101`, a, b);
22468	let e = _mm256_set_ph(
22469	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22470	);
22471	assert_eq_m256h(r, e);
22472	}
22473
22474	#[simd_test(enable = "avx512fp16,avx512vl")]
22475	unsafe fn test_mm256_maskz_min_ph() {
22476	let a = _mm256_set1_ph(`2.0`);
22477	let b = _mm256_set1_ph(`1.0`);
22478	let r = _mm256_maskz_min_ph(`0b0101010101010101`, a, b);
22479	let e = _mm256_set_ph(
22480	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22481	);
22482	assert_eq_m256h(r, e);
22483	}
22484
22485	#[simd_test(enable = "avx512fp16")]
22486	unsafe fn test_mm512_min_ph() {
22487	let a = _mm512_set1_ph(`2.0`);
22488	let b = _mm512_set1_ph(`1.0`);
22489	let r = _mm512_min_ph(a, b);
22490	let e = _mm512_set1_ph(`1.0`);
22491	assert_eq_m512h(r, e);
22492	}
22493
22494	#[simd_test(enable = "avx512fp16")]
22495	unsafe fn test_mm512_mask_min_ph() {
22496	let a = _mm512_set1_ph(`2.0`);
22497	let b = _mm512_set1_ph(`1.0`);
22498	let src = _mm512_set1_ph(`3.0`);
22499	let r = _mm512_mask_min_ph(src, `0b01010101010101010101010101010101`, a, b);
22500	let e = _mm512_set_ph(
22501	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
22502	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22503	);
22504	assert_eq_m512h(r, e);
22505	}
22506
22507	#[simd_test(enable = "avx512fp16")]
22508	unsafe fn test_mm512_maskz_min_ph() {
22509	let a = _mm512_set1_ph(`2.0`);
22510	let b = _mm512_set1_ph(`1.0`);
22511	let r = _mm512_maskz_min_ph(`0b01010101010101010101010101010101`, a, b);
22512	let e = _mm512_set_ph(
22513	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22514	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22515	);
22516	assert_eq_m512h(r, e);
22517	}
22518
22519	#[simd_test(enable = "avx512fp16")]
22520	unsafe fn test_mm512_min_round_ph() {
22521	let a = _mm512_set1_ph(`2.0`);
22522	let b = _mm512_set1_ph(`1.0`);
22523	let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22524	let e = _mm512_set1_ph(`1.0`);
22525	assert_eq_m512h(r, e);
22526	}
22527
22528	#[simd_test(enable = "avx512fp16")]
22529	unsafe fn test_mm512_mask_min_round_ph() {
22530	let a = _mm512_set1_ph(`2.0`);
22531	let b = _mm512_set1_ph(`1.0`);
22532	let src = _mm512_set1_ph(`3.0`);
22533	let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22534	src,
22535	`0b01010101010101010101010101010101`,
22536	a,
22537	b,
22538	);
22539	let e = _mm512_set_ph(
22540	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
22541	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22542	);
22543	assert_eq_m512h(r, e);
22544	}
22545
22546	#[simd_test(enable = "avx512fp16")]
22547	unsafe fn test_mm512_maskz_min_round_ph() {
22548	let a = _mm512_set1_ph(`2.0`);
22549	let b = _mm512_set1_ph(`1.0`);
22550	let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22551	`0b01010101010101010101010101010101`,
22552	a,
22553	b,
22554	);
22555	let e = _mm512_set_ph(
22556	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22557	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22558	);
22559	assert_eq_m512h(r, e);
22560	}
22561
22562	#[simd_test(enable = "avx512fp16")]
22563	unsafe fn test_mm_min_sh() {
22564	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22565	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22566	let r = _mm_min_sh(a, b);
22567	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22568	assert_eq_m128h(r, e);
22569	}
22570
22571	#[simd_test(enable = "avx512fp16")]
22572	unsafe fn test_mm_mask_min_sh() {
22573	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22574	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22575	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22576	let r = _mm_mask_min_sh(src, `0`, a, b);
22577	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22578	assert_eq_m128h(r, e);
22579	let r = _mm_mask_min_sh(src, `1`, a, b);
22580	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22581	assert_eq_m128h(r, e);
22582	}
22583
22584	#[simd_test(enable = "avx512fp16")]
22585	unsafe fn test_mm_maskz_min_sh() {
22586	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22587	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22588	let r = _mm_maskz_min_sh(`0`, a, b);
22589	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22590	assert_eq_m128h(r, e);
22591	let r = _mm_maskz_min_sh(`1`, a, b);
22592	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22593	assert_eq_m128h(r, e);
22594	}
22595
22596	#[simd_test(enable = "avx512fp16")]
22597	unsafe fn test_mm_min_round_sh() {
22598	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22599	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22600	let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22601	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22602	assert_eq_m128h(r, e);
22603	}
22604
22605	#[simd_test(enable = "avx512fp16")]
22606	unsafe fn test_mm_mask_min_round_sh() {
22607	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22608	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22609	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22610	let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22611	src, `0`, a, b,
22612	);
22613	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22614	assert_eq_m128h(r, e);
22615	let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22616	src, `1`, a, b,
22617	);
22618	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22619	assert_eq_m128h(r, e);
22620	}
22621
22622	#[simd_test(enable = "avx512fp16")]
22623	unsafe fn test_mm_maskz_min_round_sh() {
22624	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22625	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22626	let r =
22627	_mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22628	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22629	assert_eq_m128h(r, e);
22630	let r =
22631	_mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22632	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22633	assert_eq_m128h(r, e);
22634	}
22635
22636	#[simd_test(enable = "avx512fp16,avx512vl")]
22637	unsafe fn test_mm_getexp_ph() {
22638	let a = _mm_set1_ph(`3.0`);
22639	let r = _mm_getexp_ph(a);
22640	let e = _mm_set1_ph(`1.0`);
22641	assert_eq_m128h(r, e);
22642	}
22643
22644	#[simd_test(enable = "avx512fp16,avx512vl")]
22645	unsafe fn test_mm_mask_getexp_ph() {
22646	let a = _mm_set1_ph(`3.0`);
22647	let src = _mm_set1_ph(`4.0`);
22648	let r = _mm_mask_getexp_ph(src, `0b01010101`, a);
22649	let e = _mm_set_ph(`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`);
22650	assert_eq_m128h(r, e);
22651	}
22652
22653	#[simd_test(enable = "avx512fp16,avx512vl")]
22654	unsafe fn test_mm_maskz_getexp_ph() {
22655	let a = _mm_set1_ph(`3.0`);
22656	let r = _mm_maskz_getexp_ph(`0b01010101`, a);
22657	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
22658	assert_eq_m128h(r, e);
22659	}
22660
22661	#[simd_test(enable = "avx512fp16,avx512vl")]
22662	unsafe fn test_mm256_getexp_ph() {
22663	let a = _mm256_set1_ph(`3.0`);
22664	let r = _mm256_getexp_ph(a);
22665	let e = _mm256_set1_ph(`1.0`);
22666	assert_eq_m256h(r, e);
22667	}
22668
22669	#[simd_test(enable = "avx512fp16,avx512vl")]
22670	unsafe fn test_mm256_mask_getexp_ph() {
22671	let a = _mm256_set1_ph(`3.0`);
22672	let src = _mm256_set1_ph(`4.0`);
22673	let r = _mm256_mask_getexp_ph(src, `0b0101010101010101`, a);
22674	let e = _mm256_set_ph(
22675	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22676	);
22677	assert_eq_m256h(r, e);
22678	}
22679
22680	#[simd_test(enable = "avx512fp16,avx512vl")]
22681	unsafe fn test_mm256_maskz_getexp_ph() {
22682	let a = _mm256_set1_ph(`3.0`);
22683	let r = _mm256_maskz_getexp_ph(`0b0101010101010101`, a);
22684	let e = _mm256_set_ph(
22685	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22686	);
22687	assert_eq_m256h(r, e);
22688	}
22689
22690	#[simd_test(enable = "avx512fp16")]
22691	unsafe fn test_mm512_getexp_ph() {
22692	let a = _mm512_set1_ph(`3.0`);
22693	let r = _mm512_getexp_ph(a);
22694	let e = _mm512_set1_ph(`1.0`);
22695	assert_eq_m512h(r, e);
22696	}
22697
22698	#[simd_test(enable = "avx512fp16")]
22699	unsafe fn test_mm512_mask_getexp_ph() {
22700	let a = _mm512_set1_ph(`3.0`);
22701	let src = _mm512_set1_ph(`4.0`);
22702	let r = _mm512_mask_getexp_ph(src, `0b01010101010101010101010101010101`, a);
22703	let e = _mm512_set_ph(
22704	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`,
22705	`1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22706	);
22707	assert_eq_m512h(r, e);
22708	}
22709
22710	#[simd_test(enable = "avx512fp16")]
22711	unsafe fn test_mm512_maskz_getexp_ph() {
22712	let a = _mm512_set1_ph(`3.0`);
22713	let r = _mm512_maskz_getexp_ph(`0b01010101010101010101010101010101`, a);
22714	let e = _mm512_set_ph(
22715	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22716	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22717	);
22718	assert_eq_m512h(r, e);
22719	}
22720
22721	#[simd_test(enable = "avx512fp16")]
22722	unsafe fn test_mm512_getexp_round_ph() {
22723	let a = _mm512_set1_ph(`3.0`);
22724	let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
22725	let e = _mm512_set1_ph(`1.0`);
22726	assert_eq_m512h(r, e);
22727	}
22728
22729	#[simd_test(enable = "avx512fp16")]
22730	unsafe fn test_mm512_mask_getexp_round_ph() {
22731	let a = _mm512_set1_ph(`3.0`);
22732	let src = _mm512_set1_ph(`4.0`);
22733	let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22734	src,
22735	`0b01010101010101010101010101010101`,
22736	a,
22737	);
22738	let e = _mm512_set_ph(
22739	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`,
22740	`1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22741	);
22742	assert_eq_m512h(r, e);
22743	}
22744
22745	#[simd_test(enable = "avx512fp16")]
22746	unsafe fn test_mm512_maskz_getexp_round_ph() {
22747	let a = _mm512_set1_ph(`3.0`);
22748	let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22749	`0b01010101010101010101010101010101`,
22750	a,
22751	);
22752	let e = _mm512_set_ph(
22753	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22754	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22755	);
22756	assert_eq_m512h(r, e);
22757	}
22758
22759	#[simd_test(enable = "avx512fp16")]
22760	unsafe fn test_mm_getexp_sh() {
22761	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22762	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22763	let r = _mm_getexp_sh(a, b);
22764	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22765	assert_eq_m128h(r, e);
22766	}
22767
22768	#[simd_test(enable = "avx512fp16")]
22769	unsafe fn test_mm_mask_getexp_sh() {
22770	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22771	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22772	let src = _mm_setr_ph(`4.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22773	let r = _mm_mask_getexp_sh(src, `0`, a, b);
22774	let e = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22775	assert_eq_m128h(r, e);
22776	let r = _mm_mask_getexp_sh(src, `1`, a, b);
22777	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22778	assert_eq_m128h(r, e);
22779	}
22780
22781	#[simd_test(enable = "avx512fp16")]
22782	unsafe fn test_mm_maskz_getexp_sh() {
22783	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22784	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22785	let r = _mm_maskz_getexp_sh(`0`, a, b);
22786	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22787	assert_eq_m128h(r, e);
22788	let r = _mm_maskz_getexp_sh(`1`, a, b);
22789	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22790	assert_eq_m128h(r, e);
22791	}
22792
22793	#[simd_test(enable = "avx512fp16")]
22794	unsafe fn test_mm_getexp_round_sh() {
22795	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22796	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22797	let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
22798	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22799	assert_eq_m128h(r, e);
22800	}
22801
22802	#[simd_test(enable = "avx512fp16")]
22803	unsafe fn test_mm_mask_getexp_round_sh() {
22804	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22805	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22806	let src = _mm_setr_ph(`4.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22807	let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
22808	let e = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22809	assert_eq_m128h(r, e);
22810	let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
22811	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22812	assert_eq_m128h(r, e);
22813	}
22814
22815	#[simd_test(enable = "avx512fp16")]
22816	unsafe fn test_mm_maskz_getexp_round_sh() {
22817	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22818	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22819	let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(`0`, a, b);
22820	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22821	assert_eq_m128h(r, e);
22822	let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(`1`, a, b);
22823	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22824	assert_eq_m128h(r, e);
22825	}
22826
22827	#[simd_test(enable = "avx512fp16,avx512vl")]
22828	unsafe fn test_mm_getmant_ph() {
22829	let a = _mm_set1_ph(`10.0`);
22830	let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22831	let e = _mm_set1_ph(`1.25`);
22832	assert_eq_m128h(r, e);
22833	}
22834
22835	#[simd_test(enable = "avx512fp16,avx512vl")]
22836	unsafe fn test_mm_mask_getmant_ph() {
22837	let a = _mm_set1_ph(`10.0`);
22838	let src = _mm_set1_ph(`20.0`);
22839	let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `0b01010101`, a);
22840	let e = _mm_set_ph(`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`);
22841	assert_eq_m128h(r, e);
22842	}
22843
22844	#[simd_test(enable = "avx512fp16,avx512vl")]
22845	unsafe fn test_mm_maskz_getmant_ph() {
22846	let a = _mm_set1_ph(`10.0`);
22847	let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`0b01010101`, a);
22848	let e = _mm_set_ph(`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`);
22849	assert_eq_m128h(r, e);
22850	}
22851
22852	#[simd_test(enable = "avx512fp16,avx512vl")]
22853	unsafe fn test_mm256_getmant_ph() {
22854	let a = _mm256_set1_ph(`10.0`);
22855	let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22856	let e = _mm256_set1_ph(`1.25`);
22857	assert_eq_m256h(r, e);
22858	}
22859
22860	#[simd_test(enable = "avx512fp16,avx512vl")]
22861	unsafe fn test_mm256_mask_getmant_ph() {
22862	let a = _mm256_set1_ph(`10.0`);
22863	let src = _mm256_set1_ph(`20.0`);
22864	let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22865	src,
22866	`0b0101010101010101`,
22867	a,
22868	);
22869	let e = _mm256_set_ph(
22870	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22871	`20.0`, `1.25`,
22872	);
22873	assert_eq_m256h(r, e);
22874	}
22875
22876	#[simd_test(enable = "avx512fp16,avx512vl")]
22877	unsafe fn test_mm256_maskz_getmant_ph() {
22878	let a = _mm256_set1_ph(`10.0`);
22879	let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22880	`0b0101010101010101`,
22881	a,
22882	);
22883	let e = _mm256_set_ph(
22884	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22885	);
22886	assert_eq_m256h(r, e);
22887	}
22888
22889	#[simd_test(enable = "avx512fp16")]
22890	unsafe fn test_mm512_getmant_ph() {
22891	let a = _mm512_set1_ph(`10.0`);
22892	let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22893	let e = _mm512_set1_ph(`1.25`);
22894	assert_eq_m512h(r, e);
22895	}
22896
22897	#[simd_test(enable = "avx512fp16")]
22898	unsafe fn test_mm512_mask_getmant_ph() {
22899	let a = _mm512_set1_ph(`10.0`);
22900	let src = _mm512_set1_ph(`20.0`);
22901	let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22902	src,
22903	`0b01010101010101010101010101010101`,
22904	a,
22905	);
22906	let e = _mm512_set_ph(
22907	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22908	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22909	`20.0`, `1.25`, `20.0`, `1.25`,
22910	);
22911	assert_eq_m512h(r, e);
22912	}
22913
22914	#[simd_test(enable = "avx512fp16")]
22915	unsafe fn test_mm512_maskz_getmant_ph() {
22916	let a = _mm512_set1_ph(`10.0`);
22917	let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22918	`0b01010101010101010101010101010101`,
22919	a,
22920	);
22921	let e = _mm512_set_ph(
22922	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22923	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22924	);
22925	assert_eq_m512h(r, e);
22926	}
22927
22928	#[simd_test(enable = "avx512fp16")]
22929	unsafe fn test_mm512_getmant_round_ph() {
22930	let a = _mm512_set1_ph(`10.0`);
22931	let r =
22932	_mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
22933	a,
22934	);
22935	let e = _mm512_set1_ph(`1.25`);
22936	assert_eq_m512h(r, e);
22937	}
22938
22939	#[simd_test(enable = "avx512fp16")]
22940	unsafe fn test_mm512_mask_getmant_round_ph() {
22941	let a = _mm512_set1_ph(`10.0`);
22942	let src = _mm512_set1_ph(`20.0`);
22943	let r = _mm512_mask_getmant_round_ph::<
22944	_MM_MANT_NORM_P75_1P5,
22945	_MM_MANT_SIGN_NAN,
22946	_MM_FROUND_NO_EXC,
22947	>(src, `0b01010101010101010101010101010101`, a);
22948	let e = _mm512_set_ph(
22949	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22950	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22951	`20.0`, `1.25`, `20.0`, `1.25`,
22952	);
22953	assert_eq_m512h(r, e);
22954	}
22955
22956	#[simd_test(enable = "avx512fp16")]
22957	unsafe fn test_mm512_maskz_getmant_round_ph() {
22958	let a = _mm512_set1_ph(`10.0`);
22959	let r = _mm512_maskz_getmant_round_ph::<
22960	_MM_MANT_NORM_P75_1P5,
22961	_MM_MANT_SIGN_NAN,
22962	_MM_FROUND_NO_EXC,
22963	>(`0b01010101010101010101010101010101`, a);
22964	let e = _mm512_set_ph(
22965	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22966	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22967	);
22968	assert_eq_m512h(r, e);
22969	}
22970
22971	#[simd_test(enable = "avx512fp16")]
22972	unsafe fn test_mm_getmant_sh() {
22973	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22974	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22975	let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
22976	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22977	assert_eq_m128h(r, e);
22978	}
22979
22980	#[simd_test(enable = "avx512fp16")]
22981	unsafe fn test_mm_mask_getmant_sh() {
22982	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22983	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22984	let src = _mm_setr_ph(`20.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22985	let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `0`, a, b);
22986	let e = _mm_setr_ph(`20.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22987	assert_eq_m128h(r, e);
22988	let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `1`, a, b);
22989	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22990	assert_eq_m128h(r, e);
22991	}
22992
22993	#[simd_test(enable = "avx512fp16")]
22994	unsafe fn test_mm_maskz_getmant_sh() {
22995	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22996	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22997	let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`0`, a, b);
22998	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22999	assert_eq_m128h(r, e);
23000	let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`1`, a, b);
23001	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23002	assert_eq_m128h(r, e);
23003	}
23004
23005	#[simd_test(enable = "avx512fp16")]
23006	unsafe fn test_mm_getmant_round_sh() {
23007	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23008	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23009	let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23010	a, b,
23011	);
23012	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23013	assert_eq_m128h(r, e);
23014	}
23015
23016	#[simd_test(enable = "avx512fp16")]
23017	unsafe fn test_mm_mask_getmant_round_sh() {
23018	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23019	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23020	let src = _mm_setr_ph(`20.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23021	let r = _mm_mask_getmant_round_sh::<
23022	_MM_MANT_NORM_P75_1P5,
23023	_MM_MANT_SIGN_NAN,
23024	_MM_FROUND_NO_EXC,
23025	>(src, `0`, a, b);
23026	let e = _mm_setr_ph(`20.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23027	assert_eq_m128h(r, e);
23028	let r = _mm_mask_getmant_round_sh::<
23029	_MM_MANT_NORM_P75_1P5,
23030	_MM_MANT_SIGN_NAN,
23031	_MM_FROUND_NO_EXC,
23032	>(src, `1`, a, b);
23033	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23034	assert_eq_m128h(r, e);
23035	}
23036
23037	#[simd_test(enable = "avx512fp16")]
23038	unsafe fn test_mm_maskz_getmant_round_sh() {
23039	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23040	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23041	let r = _mm_maskz_getmant_round_sh::<
23042	_MM_MANT_NORM_P75_1P5,
23043	_MM_MANT_SIGN_NAN,
23044	_MM_FROUND_NO_EXC,
23045	>(`0`, a, b);
23046	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23047	assert_eq_m128h(r, e);
23048	let r = _mm_maskz_getmant_round_sh::<
23049	_MM_MANT_NORM_P75_1P5,
23050	_MM_MANT_SIGN_NAN,
23051	_MM_FROUND_NO_EXC,
23052	>(`1`, a, b);
23053	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23054	assert_eq_m128h(r, e);
23055	}
23056
23057	#[simd_test(enable = "avx512fp16,avx512vl")]
23058	unsafe fn test_mm_roundscale_ph() {
23059	let a = _mm_set1_ph(`1.1`);
23060	let r = _mm_roundscale_ph::<`0`>(a);
23061	let e = _mm_set1_ph(`1.0`);
23062	assert_eq_m128h(r, e);
23063	}
23064
23065	#[simd_test(enable = "avx512fp16,avx512vl")]
23066	unsafe fn test_mm_mask_roundscale_ph() {
23067	let a = _mm_set1_ph(`1.1`);
23068	let src = _mm_set1_ph(`2.0`);
23069	let r = _mm_mask_roundscale_ph::<`0`>(src, `0b01010101`, a);
23070	let e = _mm_set_ph(`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`);
23071	assert_eq_m128h(r, e);
23072	}
23073
23074	#[simd_test(enable = "avx512fp16,avx512vl")]
23075	unsafe fn test_mm_maskz_roundscale_ph() {
23076	let a = _mm_set1_ph(`1.1`);
23077	let r = _mm_maskz_roundscale_ph::<`0`>(`0b01010101`, a);
23078	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
23079	assert_eq_m128h(r, e);
23080	}
23081
23082	#[simd_test(enable = "avx512fp16,avx512vl")]
23083	unsafe fn test_mm256_roundscale_ph() {
23084	let a = _mm256_set1_ph(`1.1`);
23085	let r = _mm256_roundscale_ph::<`0`>(a);
23086	let e = _mm256_set1_ph(`1.0`);
23087	assert_eq_m256h(r, e);
23088	}
23089
23090	#[simd_test(enable = "avx512fp16,avx512vl")]
23091	unsafe fn test_mm256_mask_roundscale_ph() {
23092	let a = _mm256_set1_ph(`1.1`);
23093	let src = _mm256_set1_ph(`2.0`);
23094	let r = _mm256_mask_roundscale_ph::<`0`>(src, `0b0101010101010101`, a);
23095	let e = _mm256_set_ph(
23096	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23097	);
23098	assert_eq_m256h(r, e);
23099	}
23100
23101	#[simd_test(enable = "avx512fp16,avx512vl")]
23102	unsafe fn test_mm256_maskz_roundscale_ph() {
23103	let a = _mm256_set1_ph(`1.1`);
23104	let r = _mm256_maskz_roundscale_ph::<`0`>(`0b0101010101010101`, a);
23105	let e = _mm256_set_ph(
23106	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23107	);
23108	assert_eq_m256h(r, e);
23109	}
23110
23111	#[simd_test(enable = "avx512fp16")]
23112	unsafe fn test_mm512_roundscale_ph() {
23113	let a = _mm512_set1_ph(`1.1`);
23114	let r = _mm512_roundscale_ph::<`0`>(a);
23115	let e = _mm512_set1_ph(`1.0`);
23116	assert_eq_m512h(r, e);
23117	}
23118
23119	#[simd_test(enable = "avx512fp16")]
23120	unsafe fn test_mm512_mask_roundscale_ph() {
23121	let a = _mm512_set1_ph(`1.1`);
23122	let src = _mm512_set1_ph(`2.0`);
23123	let r = _mm512_mask_roundscale_ph::<`0`>(src, `0b01010101010101010101010101010101`, a);
23124	let e = _mm512_set_ph(
23125	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
23126	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23127	);
23128	assert_eq_m512h(r, e);
23129	}
23130
23131	#[simd_test(enable = "avx512fp16")]
23132	unsafe fn test_mm512_maskz_roundscale_ph() {
23133	let a = _mm512_set1_ph(`1.1`);
23134	let r = _mm512_maskz_roundscale_ph::<`0`>(`0b01010101010101010101010101010101`, a);
23135	let e = _mm512_set_ph(
23136	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
23137	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23138	);
23139	assert_eq_m512h(r, e);
23140	}
23141
23142	#[simd_test(enable = "avx512fp16")]
23143	unsafe fn test_mm512_roundscale_round_ph() {
23144	let a = _mm512_set1_ph(`1.1`);
23145	let r = _mm512_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(a);
23146	let e = _mm512_set1_ph(`1.0`);
23147	assert_eq_m512h(r, e);
23148	}
23149
23150	#[simd_test(enable = "avx512fp16")]
23151	unsafe fn test_mm512_mask_roundscale_round_ph() {
23152	let a = _mm512_set1_ph(`1.1`);
23153	let src = _mm512_set1_ph(`2.0`);
23154	let r = _mm512_mask_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(
23155	src,
23156	`0b01010101010101010101010101010101`,
23157	a,
23158	);
23159	let e = _mm512_set_ph(
23160	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
23161	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23162	);
23163	assert_eq_m512h(r, e);
23164	}
23165
23166	#[simd_test(enable = "avx512fp16")]
23167	unsafe fn test_mm512_maskz_roundscale_round_ph() {
23168	let a = _mm512_set1_ph(`1.1`);
23169	let r = _mm512_maskz_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(
23170	`0b01010101010101010101010101010101`,
23171	a,
23172	);
23173	let e = _mm512_set_ph(
23174	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
23175	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23176	);
23177	assert_eq_m512h(r, e);
23178	}
23179
23180	#[simd_test(enable = "avx512fp16")]
23181	unsafe fn test_mm_roundscale_sh() {
23182	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23183	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23184	let r = _mm_roundscale_sh::<`0`>(a, b);
23185	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23186	assert_eq_m128h(r, e);
23187	}
23188
23189	#[simd_test(enable = "avx512fp16")]
23190	unsafe fn test_mm_mask_roundscale_sh() {
23191	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23192	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23193	let src = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23194	let r = _mm_mask_roundscale_sh::<`0`>(src, `0`, a, b);
23195	let e = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23196	assert_eq_m128h(r, e);
23197	let r = _mm_mask_roundscale_sh::<`0`>(src, `1`, a, b);
23198	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23199	assert_eq_m128h(r, e);
23200	}
23201
23202	#[simd_test(enable = "avx512fp16")]
23203	unsafe fn test_mm_maskz_roundscale_sh() {
23204	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23205	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23206	let r = _mm_maskz_roundscale_sh::<`0`>(`0`, a, b);
23207	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23208	assert_eq_m128h(r, e);
23209	let r = _mm_maskz_roundscale_sh::<`0`>(`1`, a, b);
23210	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23211	assert_eq_m128h(r, e);
23212	}
23213
23214	#[simd_test(enable = "avx512fp16")]
23215	unsafe fn test_mm_roundscale_round_sh() {
23216	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23217	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23218	let r = _mm_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(a, b);
23219	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23220	assert_eq_m128h(r, e);
23221	}
23222
23223	#[simd_test(enable = "avx512fp16")]
23224	unsafe fn test_mm_mask_roundscale_round_sh() {
23225	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23226	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23227	let src = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23228	let r = _mm_mask_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(src, `0`, a, b);
23229	let e = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23230	assert_eq_m128h(r, e);
23231	let r = _mm_mask_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(src, `1`, a, b);
23232	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23233	assert_eq_m128h(r, e);
23234	}
23235
23236	#[simd_test(enable = "avx512fp16")]
23237	unsafe fn test_mm_maskz_roundscale_round_sh() {
23238	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23239	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23240	let r = _mm_maskz_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(`0`, a, b);
23241	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23242	assert_eq_m128h(r, e);
23243	let r = _mm_maskz_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(`1`, a, b);
23244	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23245	assert_eq_m128h(r, e);
23246	}
23247
23248	#[simd_test(enable = "avx512fp16,avx512vl")]
23249	unsafe fn test_mm_scalef_ph() {
23250	let a = _mm_set1_ph(`1.`);
23251	let b = _mm_set1_ph(`3.`);
23252	let r = _mm_scalef_ph(a, b);
23253	let e = _mm_set1_ph(`8.0`);
23254	assert_eq_m128h(r, e);
23255	}
23256
23257	#[simd_test(enable = "avx512fp16,avx512vl")]
23258	unsafe fn test_mm_mask_scalef_ph() {
23259	let a = _mm_set1_ph(`1.`);
23260	let b = _mm_set1_ph(`3.`);
23261	let src = _mm_set1_ph(`2.`);
23262	let r = _mm_mask_scalef_ph(src, `0b01010101`, a, b);
23263	let e = _mm_set_ph(`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`);
23264	assert_eq_m128h(r, e);
23265	}
23266
23267	#[simd_test(enable = "avx512fp16,avx512vl")]
23268	unsafe fn test_mm_maskz_scalef_ph() {
23269	let a = _mm_set1_ph(`1.`);
23270	let b = _mm_set1_ph(`3.`);
23271	let r = _mm_maskz_scalef_ph(`0b01010101`, a, b);
23272	let e = _mm_set_ph(`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`);
23273	assert_eq_m128h(r, e);
23274	}
23275
23276	#[simd_test(enable = "avx512fp16,avx512vl")]
23277	unsafe fn test_mm256_scalef_ph() {
23278	let a = _mm256_set1_ph(`1.`);
23279	let b = _mm256_set1_ph(`3.`);
23280	let r = _mm256_scalef_ph(a, b);
23281	let e = _mm256_set1_ph(`8.0`);
23282	assert_eq_m256h(r, e);
23283	}
23284
23285	#[simd_test(enable = "avx512fp16,avx512vl")]
23286	unsafe fn test_mm256_mask_scalef_ph() {
23287	let a = _mm256_set1_ph(`1.`);
23288	let b = _mm256_set1_ph(`3.`);
23289	let src = _mm256_set1_ph(`2.`);
23290	let r = _mm256_mask_scalef_ph(src, `0b0101010101010101`, a, b);
23291	let e = _mm256_set_ph(
23292	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23293	);
23294	assert_eq_m256h(r, e);
23295	}
23296
23297	#[simd_test(enable = "avx512fp16,avx512vl")]
23298	unsafe fn test_mm256_maskz_scalef_ph() {
23299	let a = _mm256_set1_ph(`1.`);
23300	let b = _mm256_set1_ph(`3.`);
23301	let r = _mm256_maskz_scalef_ph(`0b0101010101010101`, a, b);
23302	let e = _mm256_set_ph(
23303	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23304	);
23305	assert_eq_m256h(r, e);
23306	}
23307
23308	#[simd_test(enable = "avx512fp16")]
23309	unsafe fn test_mm512_scalef_ph() {
23310	let a = _mm512_set1_ph(`1.`);
23311	let b = _mm512_set1_ph(`3.`);
23312	let r = _mm512_scalef_ph(a, b);
23313	let e = _mm512_set1_ph(`8.0`);
23314	assert_eq_m512h(r, e);
23315	}
23316
23317	#[simd_test(enable = "avx512fp16")]
23318	unsafe fn test_mm512_mask_scalef_ph() {
23319	let a = _mm512_set1_ph(`1.`);
23320	let b = _mm512_set1_ph(`3.`);
23321	let src = _mm512_set1_ph(`2.`);
23322	let r = _mm512_mask_scalef_ph(src, `0b01010101010101010101010101010101`, a, b);
23323	let e = _mm512_set_ph(
23324	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`,
23325	`8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23326	);
23327	assert_eq_m512h(r, e);
23328	}
23329
23330	#[simd_test(enable = "avx512fp16")]
23331	unsafe fn test_mm512_maskz_scalef_ph() {
23332	let a = _mm512_set1_ph(`1.`);
23333	let b = _mm512_set1_ph(`3.`);
23334	let r = _mm512_maskz_scalef_ph(`0b01010101010101010101010101010101`, a, b);
23335	let e = _mm512_set_ph(
23336	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`,
23337	`8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23338	);
23339	assert_eq_m512h(r, e);
23340	}
23341
23342	#[simd_test(enable = "avx512fp16")]
23343	unsafe fn test_mm512_scalef_round_ph() {
23344	let a = _mm512_set1_ph(`1.`);
23345	let b = _mm512_set1_ph(`3.`);
23346	let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
23347	let e = _mm512_set1_ph(`8.0`);
23348	assert_eq_m512h(r, e);
23349	}
23350
23351	#[simd_test(enable = "avx512fp16")]
23352	unsafe fn test_mm512_mask_scalef_round_ph() {
23353	let a = _mm512_set1_ph(`1.`);
23354	let b = _mm512_set1_ph(`3.`);
23355	let src = _mm512_set1_ph(`2.`);
23356	let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23357	src,
23358	`0b01010101010101010101010101010101`,
23359	a,
23360	b,
23361	);
23362	let e = _mm512_set_ph(
23363	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`,
23364	`8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23365	);
23366	assert_eq_m512h(r, e);
23367	}
23368
23369	#[simd_test(enable = "avx512fp16")]
23370	unsafe fn test_mm512_maskz_scalef_round_ph() {
23371	let a = _mm512_set1_ph(`1.`);
23372	let b = _mm512_set1_ph(`3.`);
23373	let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23374	`0b01010101010101010101010101010101`,
23375	a,
23376	b,
23377	);
23378	let e = _mm512_set_ph(
23379	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`,
23380	`8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23381	);
23382	assert_eq_m512h(r, e);
23383	}
23384
23385	#[simd_test(enable = "avx512fp16")]
23386	unsafe fn test_mm_scalef_sh() {
23387	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23388	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23389	let r = _mm_scalef_sh(a, b);
23390	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23391	assert_eq_m128h(r, e);
23392	}
23393
23394	#[simd_test(enable = "avx512fp16")]
23395	unsafe fn test_mm_mask_scalef_sh() {
23396	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23397	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23398	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23399	let r = _mm_mask_scalef_sh(src, `0`, a, b);
23400	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23401	assert_eq_m128h(r, e);
23402	let r = _mm_mask_scalef_sh(src, `1`, a, b);
23403	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23404	assert_eq_m128h(r, e);
23405	}
23406
23407	#[simd_test(enable = "avx512fp16")]
23408	unsafe fn test_mm_maskz_scalef_sh() {
23409	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23410	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23411	let r = _mm_maskz_scalef_sh(`0`, a, b);
23412	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23413	assert_eq_m128h(r, e);
23414	let r = _mm_maskz_scalef_sh(`1`, a, b);
23415	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23416	assert_eq_m128h(r, e);
23417	}
23418
23419	#[simd_test(enable = "avx512fp16")]
23420	unsafe fn test_mm_scalef_round_sh() {
23421	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23422	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23423	let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
23424	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23425	assert_eq_m128h(r, e);
23426	}
23427
23428	#[simd_test(enable = "avx512fp16")]
23429	unsafe fn test_mm_mask_scalef_round_sh() {
23430	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23431	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23432	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23433	let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23434	src, `0`, a, b,
23435	);
23436	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23437	assert_eq_m128h(r, e);
23438	let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23439	src, `1`, a, b,
23440	);
23441	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23442	assert_eq_m128h(r, e);
23443	}
23444
23445	#[simd_test(enable = "avx512fp16")]
23446	unsafe fn test_mm_maskz_scalef_round_sh() {
23447	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23448	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23449	let r =
23450	_mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
23451	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23452	assert_eq_m128h(r, e);
23453	let r =
23454	_mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
23455	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23456	assert_eq_m128h(r, e);
23457	}
23458
23459	#[simd_test(enable = "avx512fp16,avx512vl")]
23460	unsafe fn test_mm_reduce_ph() {
23461	let a = _mm_set1_ph(`1.25`);
23462	let r = _mm_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23463	let e = _mm_set1_ph(`0.25`);
23464	assert_eq_m128h(r, e);
23465	}
23466
23467	#[simd_test(enable = "avx512fp16,avx512vl")]
23468	unsafe fn test_mm_mask_reduce_ph() {
23469	let a = _mm_set1_ph(`1.25`);
23470	let src = _mm_set1_ph(`2.0`);
23471	let r = _mm_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b01010101`, a);
23472	let e = _mm_set_ph(`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`);
23473	assert_eq_m128h(r, e);
23474	}
23475
23476	#[simd_test(enable = "avx512fp16,avx512vl")]
23477	unsafe fn test_mm_maskz_reduce_ph() {
23478	let a = _mm_set1_ph(`1.25`);
23479	let r = _mm_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b01010101`, a);
23480	let e = _mm_set_ph(`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`);
23481	assert_eq_m128h(r, e);
23482	}
23483
23484	#[simd_test(enable = "avx512fp16,avx512vl")]
23485	unsafe fn test_mm256_reduce_ph() {
23486	let a = _mm256_set1_ph(`1.25`);
23487	let r = _mm256_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23488	let e = _mm256_set1_ph(`0.25`);
23489	assert_eq_m256h(r, e);
23490	}
23491
23492	#[simd_test(enable = "avx512fp16,avx512vl")]
23493	unsafe fn test_mm256_mask_reduce_ph() {
23494	let a = _mm256_set1_ph(`1.25`);
23495	let src = _mm256_set1_ph(`2.0`);
23496	let r = _mm256_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b0101010101010101`, a);
23497	let e = _mm256_set_ph(
23498	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23499	);
23500	assert_eq_m256h(r, e);
23501	}
23502
23503	#[simd_test(enable = "avx512fp16,avx512vl")]
23504	unsafe fn test_mm256_maskz_reduce_ph() {
23505	let a = _mm256_set1_ph(`1.25`);
23506	let r = _mm256_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0101010101010101`, a);
23507	let e = _mm256_set_ph(
23508	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23509	);
23510	assert_eq_m256h(r, e);
23511	}
23512
23513	#[simd_test(enable = "avx512fp16")]
23514	unsafe fn test_mm512_reduce_ph() {
23515	let a = _mm512_set1_ph(`1.25`);
23516	let r = _mm512_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23517	let e = _mm512_set1_ph(`0.25`);
23518	assert_eq_m512h(r, e);
23519	}
23520
23521	#[simd_test(enable = "avx512fp16")]
23522	unsafe fn test_mm512_mask_reduce_ph() {
23523	let a = _mm512_set1_ph(`1.25`);
23524	let src = _mm512_set1_ph(`2.0`);
23525	let r = _mm512_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(
23526	src,
23527	`0b01010101010101010101010101010101`,
23528	a,
23529	);
23530	let e = _mm512_set_ph(
23531	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23532	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23533	);
23534	assert_eq_m512h(r, e);
23535	}
23536
23537	#[simd_test(enable = "avx512fp16")]
23538	unsafe fn test_mm512_maskz_reduce_ph() {
23539	let a = _mm512_set1_ph(`1.25`);
23540	let r = _mm512_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(
23541	`0b01010101010101010101010101010101`,
23542	a,
23543	);
23544	let e = _mm512_set_ph(
23545	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23546	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23547	);
23548	assert_eq_m512h(r, e);
23549	}
23550
23551	#[simd_test(enable = "avx512fp16")]
23552	unsafe fn test_mm512_reduce_round_ph() {
23553	let a = _mm512_set1_ph(`1.25`);
23554	let r = _mm512_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23555	let e = _mm512_set1_ph(`0.25`);
23556	assert_eq_m512h(r, e);
23557	}
23558
23559	#[simd_test(enable = "avx512fp16")]
23560	unsafe fn test_mm512_mask_reduce_round_ph() {
23561	let a = _mm512_set1_ph(`1.25`);
23562	let src = _mm512_set1_ph(`2.0`);
23563	let r = _mm512_mask_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23564	src,
23565	`0b01010101010101010101010101010101`,
23566	a,
23567	);
23568	let e = _mm512_set_ph(
23569	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23570	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23571	);
23572	assert_eq_m512h(r, e);
23573	}
23574
23575	#[simd_test(enable = "avx512fp16")]
23576	unsafe fn test_mm512_maskz_reduce_round_ph() {
23577	let a = _mm512_set1_ph(`1.25`);
23578	let r = _mm512_maskz_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23579	`0b01010101010101010101010101010101`,
23580	a,
23581	);
23582	let e = _mm512_set_ph(
23583	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23584	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23585	);
23586	assert_eq_m512h(r, e);
23587	}
23588
23589	#[simd_test(enable = "avx512fp16")]
23590	unsafe fn test_mm_reduce_sh() {
23591	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23592	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23593	let r = _mm_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(a, b);
23594	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23595	assert_eq_m128h(r, e);
23596	}
23597
23598	#[simd_test(enable = "avx512fp16")]
23599	unsafe fn test_mm_mask_reduce_sh() {
23600	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23601	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23602	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23603	let r = _mm_mask_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0`, a, b);
23604	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23605	assert_eq_m128h(r, e);
23606	let r = _mm_mask_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `1`, a, b);
23607	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23608	assert_eq_m128h(r, e);
23609	}
23610
23611	#[simd_test(enable = "avx512fp16")]
23612	unsafe fn test_mm_maskz_reduce_sh() {
23613	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23614	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23615	let r = _mm_maskz_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0`, a, b);
23616	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23617	assert_eq_m128h(r, e);
23618	let r = _mm_maskz_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(`1`, a, b);
23619	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23620	assert_eq_m128h(r, e);
23621	}
23622
23623	#[simd_test(enable = "avx512fp16")]
23624	unsafe fn test_mm_reduce_round_sh() {
23625	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23626	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23627	let r = _mm_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23628	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23629	assert_eq_m128h(r, e);
23630	}
23631
23632	#[simd_test(enable = "avx512fp16")]
23633	unsafe fn test_mm_mask_reduce_round_sh() {
23634	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23635	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23636	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23637	let r = _mm_mask_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23638	src, `0`, a, b,
23639	);
23640	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23641	assert_eq_m128h(r, e);
23642	let r = _mm_mask_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23643	src, `1`, a, b,
23644	);
23645	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23646	assert_eq_m128h(r, e);
23647	}
23648
23649	#[simd_test(enable = "avx512fp16")]
23650	unsafe fn test_mm_maskz_reduce_round_sh() {
23651	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23652	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23653	let r =
23654	_mm_maskz_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`0`, a, b);
23655	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23656	assert_eq_m128h(r, e);
23657	let r =
23658	_mm_maskz_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`1`, a, b);
23659	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23660	assert_eq_m128h(r, e);
23661	}
23662
23663	#[simd_test(enable = "avx512fp16,avx512vl")]
23664	unsafe fn test_mm_reduce_add_ph() {
23665	let a = _mm_set1_ph(`2.0`);
23666	let r = _mm_reduce_add_ph(a);
23667	assert_eq!(r, `16.0`);
23668	}
23669
23670	#[simd_test(enable = "avx512fp16,avx512vl")]
23671	unsafe fn test_mm256_reduce_add_ph() {
23672	let a = _mm256_set1_ph(`2.0`);
23673	let r = _mm256_reduce_add_ph(a);
23674	assert_eq!(r, `32.0`);
23675	}
23676
23677	#[simd_test(enable = "avx512fp16")]
23678	unsafe fn test_mm512_reduce_add_ph() {
23679	let a = _mm512_set1_ph(`2.0`);
23680	let r = _mm512_reduce_add_ph(a);
23681	assert_eq!(r, `64.0`);
23682	}
23683
23684	#[simd_test(enable = "avx512fp16,avx512vl")]
23685	unsafe fn test_mm_reduce_mul_ph() {
23686	let a = _mm_set1_ph(`2.0`);
23687	let r = _mm_reduce_mul_ph(a);
23688	assert_eq!(r, `256.0`);
23689	}
23690
23691	#[simd_test(enable = "avx512fp16,avx512vl")]
23692	unsafe fn test_mm256_reduce_mul_ph() {
23693	let a = _mm256_set1_ph(`2.0`);
23694	let r = _mm256_reduce_mul_ph(a);
23695	assert_eq!(r, `65536.0`);
23696	}
23697
23698	#[simd_test(enable = "avx512fp16")]
23699	unsafe fn test_mm512_reduce_mul_ph() {
23700	let a = _mm512_set1_ph(`2.0`);
23701	let r = _mm512_reduce_mul_ph(a);
23702	assert_eq!(r, `16777216.0`);
23703	}
23704
23705	#[simd_test(enable = "avx512fp16,avx512vl")]
23706	unsafe fn test_mm_reduce_max_ph() {
23707	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23708	let r = _mm_reduce_max_ph(a);
23709	assert_eq!(r, `8.0`);
23710	}
23711
23712	#[simd_test(enable = "avx512fp16,avx512vl")]
23713	unsafe fn test_mm256_reduce_max_ph() {
23714	let a = _mm256_set_ph(
23715	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23716	);
23717	let r = _mm256_reduce_max_ph(a);
23718	assert_eq!(r, `16.0`);
23719	}
23720
23721	#[simd_test(enable = "avx512fp16")]
23722	unsafe fn test_mm512_reduce_max_ph() {
23723	let a = _mm512_set_ph(
23724	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23725	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23726	`31.0`, `32.0`,
23727	);
23728	let r = _mm512_reduce_max_ph(a);
23729	assert_eq!(r, `32.0`);
23730	}
23731
23732	#[simd_test(enable = "avx512fp16,avx512vl")]
23733	unsafe fn test_mm_reduce_min_ph() {
23734	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23735	let r = _mm_reduce_min_ph(a);
23736	assert_eq!(r, `1.0`);
23737	}
23738
23739	#[simd_test(enable = "avx512fp16,avx512vl")]
23740	unsafe fn test_mm256_reduce_min_ph() {
23741	let a = _mm256_set_ph(
23742	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23743	);
23744	let r = _mm256_reduce_min_ph(a);
23745	assert_eq!(r, `1.0`);
23746	}
23747
23748	#[simd_test(enable = "avx512fp16")]
23749	unsafe fn test_mm512_reduce_min_ph() {
23750	let a = _mm512_set_ph(
23751	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23752	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23753	`31.0`, `32.0`,
23754	);
23755	let r = _mm512_reduce_min_ph(a);
23756	assert_eq!(r, `1.0`);
23757	}
23758
23759	#[simd_test(enable = "avx512fp16,avx512vl")]
23760	unsafe fn test_mm_fpclass_ph_mask() {
23761	let a = _mm_set_ph(
23762	`1.`,
23763	f16::INFINITY,
23764	f16::NEG_INFINITY,
23765	`0.0`,
23766	`-0.0`,
23767	`-2.0`,
23768	f16::NAN,
23769	`5.9e-8`, // Denormal
23770	);
23771	let r = _mm_fpclass_ph_mask::<`0x18`>(a); // infinities
23772	assert_eq!(r, `0b01100000`);
23773	}
23774
23775	#[simd_test(enable = "avx512fp16,avx512vl")]
23776	unsafe fn test_mm_mask_fpclass_ph_mask() {
23777	let a = _mm_set_ph(
23778	`1.`,
23779	f16::INFINITY,
23780	f16::NEG_INFINITY,
23781	`0.0`,
23782	`-0.0`,
23783	`-2.0`,
23784	f16::NAN,
23785	`5.9e-8`, // Denormal
23786	);
23787	let r = _mm_mask_fpclass_ph_mask::<`0x18`>(`0b01010101`, a);
23788	assert_eq!(r, `0b01000000`);
23789	}
23790
23791	#[simd_test(enable = "avx512fp16,avx512vl")]
23792	unsafe fn test_mm256_fpclass_ph_mask() {
23793	let a = _mm256_set_ph(
23794	`1.`,
23795	f16::INFINITY,
23796	f16::NEG_INFINITY,
23797	`0.0`,
23798	`-0.0`,
23799	`-2.0`,
23800	f16::NAN,
23801	`5.9e-8`, // Denormal
23802	`1.`,
23803	f16::INFINITY,
23804	f16::NEG_INFINITY,
23805	`0.0`,
23806	`-0.0`,
23807	`-2.0`,
23808	f16::NAN,
23809	`5.9e-8`, // Denormal
23810	);
23811	let r = _mm256_fpclass_ph_mask::<`0x18`>(a); // infinities
23812	assert_eq!(r, `0b0110000001100000`);
23813	}
23814
23815	#[simd_test(enable = "avx512fp16,avx512vl")]
23816	unsafe fn test_mm256_mask_fpclass_ph_mask() {
23817	let a = _mm256_set_ph(
23818	`1.`,
23819	f16::INFINITY,
23820	f16::NEG_INFINITY,
23821	`0.0`,
23822	`-0.0`,
23823	`-2.0`,
23824	f16::NAN,
23825	`5.9e-8`, // Denormal
23826	`1.`,
23827	f16::INFINITY,
23828	f16::NEG_INFINITY,
23829	`0.0`,
23830	`-0.0`,
23831	`-2.0`,
23832	f16::NAN,
23833	`5.9e-8`, // Denormal
23834	);
23835	let r = _mm256_mask_fpclass_ph_mask::<`0x18`>(`0b0101010101010101`, a);
23836	assert_eq!(r, `0b0100000001000000`);
23837	}
23838
23839	#[simd_test(enable = "avx512fp16")]
23840	unsafe fn test_mm512_fpclass_ph_mask() {
23841	let a = _mm512_set_ph(
23842	`1.`,
23843	f16::INFINITY,
23844	f16::NEG_INFINITY,
23845	`0.0`,
23846	`-0.0`,
23847	`-2.0`,
23848	f16::NAN,
23849	`5.9e-8`, // Denormal
23850	`1.`,
23851	f16::INFINITY,
23852	f16::NEG_INFINITY,
23853	`0.0`,
23854	`-0.0`,
23855	`-2.0`,
23856	f16::NAN,
23857	`5.9e-8`, // Denormal
23858	`1.`,
23859	f16::INFINITY,
23860	f16::NEG_INFINITY,
23861	`0.0`,
23862	`-0.0`,
23863	`-2.0`,
23864	f16::NAN,
23865	`5.9e-8`, // Denormal
23866	`1.`,
23867	f16::INFINITY,
23868	f16::NEG_INFINITY,
23869	`0.0`,
23870	`-0.0`,
23871	`-2.0`,
23872	f16::NAN,
23873	`5.9e-8`, // Denormal
23874	);
23875	let r = _mm512_fpclass_ph_mask::<`0x18`>(a); // infinities
23876	assert_eq!(r, `0b01100000011000000110000001100000`);
23877	}
23878
23879	#[simd_test(enable = "avx512fp16")]
23880	unsafe fn test_mm512_mask_fpclass_ph_mask() {
23881	let a = _mm512_set_ph(
23882	`1.`,
23883	f16::INFINITY,
23884	f16::NEG_INFINITY,
23885	`0.0`,
23886	`-0.0`,
23887	`-2.0`,
23888	f16::NAN,
23889	`5.9e-8`, // Denormal
23890	`1.`,
23891	f16::INFINITY,
23892	f16::NEG_INFINITY,
23893	`0.0`,
23894	`-0.0`,
23895	`-2.0`,
23896	f16::NAN,
23897	`5.9e-8`, // Denormal
23898	`1.`,
23899	f16::INFINITY,
23900	f16::NEG_INFINITY,
23901	`0.0`,
23902	`-0.0`,
23903	`-2.0`,
23904	f16::NAN,
23905	`5.9e-8`, // Denormal
23906	`1.`,
23907	f16::INFINITY,
23908	f16::NEG_INFINITY,
23909	`0.0`,
23910	`-0.0`,
23911	`-2.0`,
23912	f16::NAN,
23913	`5.9e-8`, // Denormal
23914	);
23915	let r = _mm512_mask_fpclass_ph_mask::<`0x18`>(`0b01010101010101010101010101010101`, a);
23916	assert_eq!(r, `0b01000000010000000100000001000000`);
23917	}
23918
23919	#[simd_test(enable = "avx512fp16")]
23920	unsafe fn test_mm_fpclass_sh_mask() {
23921	let a = _mm_set_sh(f16::INFINITY);
23922	let r = _mm_fpclass_sh_mask::<`0x18`>(a);
23923	assert_eq!(r, `1`);
23924	}
23925
23926	#[simd_test(enable = "avx512fp16")]
23927	unsafe fn test_mm_mask_fpclass_sh_mask() {
23928	let a = _mm_set_sh(f16::INFINITY);
23929	let r = _mm_mask_fpclass_sh_mask::<`0x18`>(`0`, a);
23930	assert_eq!(r, `0`);
23931	let r = _mm_mask_fpclass_sh_mask::<`0x18`>(`1`, a);
23932	assert_eq!(r, `1`);
23933	}
23934
23935	#[simd_test(enable = "avx512fp16,avx512vl")]
23936	unsafe fn test_mm_mask_blend_ph() {
23937	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23938	let b = _mm_set_ph(`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
23939	let r = _mm_mask_blend_ph(`0b01010101`, a, b);
23940	let e = _mm_set_ph(`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`);
23941	assert_eq_m128h(r, e);
23942	}
23943
23944	#[simd_test(enable = "avx512fp16,avx512vl")]
23945	unsafe fn test_mm256_mask_blend_ph() {
23946	let a = _mm256_set_ph(
23947	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23948	);
23949	let b = _mm256_set_ph(
23950	`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `-9.0`, `-10.0`, `-11.0`, `-12.0`, `-13.0`,
23951	`-14.0`, `-15.0`, `-16.0`,
23952	);
23953	let r = _mm256_mask_blend_ph(`0b0101010101010101`, a, b);
23954	let e = _mm256_set_ph(
23955	`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`, `-14.0`, `15.0`,
23956	`-16.0`,
23957	);
23958	assert_eq_m256h(r, e);
23959	}
23960
23961	#[simd_test(enable = "avx512fp16")]
23962	unsafe fn test_mm512_mask_blend_ph() {
23963	let a = _mm512_set_ph(
23964	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23965	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23966	`31.0`, `32.0`,
23967	);
23968	let b = _mm512_set_ph(
23969	`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `-9.0`, `-10.0`, `-11.0`, `-12.0`, `-13.0`,
23970	`-14.0`, `-15.0`, `-16.0`, `-17.0`, `-18.0`, `-19.0`, `-20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `-25.0`,
23971	`-26.0`, `-27.0`, `-28.0`, `-29.0`, `-30.0`, `-31.0`, `-32.0`,
23972	);
23973	let r = _mm512_mask_blend_ph(`0b01010101010101010101010101010101`, a, b);
23974	let e = _mm512_set_ph(
23975	`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`, `-14.0`, `15.0`,
23976	`-16.0`, `17.0`, `-18.0`, `19.0`, `-20.0`, `21.0`, `-22.0`, `23.0`, `-24.0`, `25.0`, `-26.0`, `27.0`, `-28.0`,
23977	`29.0`, `-30.0`, `31.0`, `-32.0`,
23978	);
23979	assert_eq_m512h(r, e);
23980	}
23981
23982	#[simd_test(enable = "avx512fp16,avx512vl")]
23983	unsafe fn test_mm_permutex2var_ph() {
23984	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23985	let b = _mm_setr_ph(`9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
23986	let idx = _mm_setr_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`);
23987	let r = _mm_permutex2var_ph(a, idx, b);
23988	let e = _mm_setr_ph(`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`);
23989	assert_eq_m128h(r, e);
23990	}
23991
23992	#[simd_test(enable = "avx512fp16,avx512vl")]
23993	unsafe fn test_mm256_permutex2var_ph() {
23994	let a = _mm256_setr_ph(
23995	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23996	);
23997	let b = _mm256_setr_ph(
23998	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23999	`31.0`, `32.0`,
24000	);
24001	let idx = _mm256_setr_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`);
24002	let r = _mm256_permutex2var_ph(a, idx, b);
24003	let e = _mm256_setr_ph(
24004	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
24005	`31.0`,
24006	);
24007	assert_eq_m256h(r, e);
24008	}
24009
24010	#[simd_test(enable = "avx512fp16")]
24011	unsafe fn test_mm512_permutex2var_ph() {
24012	let a = _mm512_setr_ph(
24013	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24014	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24015	`31.0`, `32.0`,
24016	);
24017	let b = _mm512_setr_ph(
24018	`33.0`, `34.0`, `35.0`, `36.0`, `37.0`, `38.0`, `39.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`,
24019	`47.0`, `48.0`, `49.0`, `50.0`, `51.0`, `52.0`, `53.0`, `54.0`, `55.0`, `56.0`, `57.0`, `58.0`, `59.0`, `60.0`,
24020	`61.0`, `62.0`, `63.0`, `64.0`,
24021	);
24022	let idx = _mm512_set_epi16(
24023	`62`, `60`, `58`, `56`, `54`, `52`, `50`, `48`, `46`, `44`, `42`, `40`, `38`, `36`, `34`, `32`, `30`, `28`, `26`, `24`, `22`, `20`,
24024	`18`, `16`, `14`, `12`, `10`, `8`, `6`, `4`, `2`, `0`,
24025	);
24026	let r = _mm512_permutex2var_ph(a, idx, b);
24027	let e = _mm512_setr_ph(
24028	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
24029	`31.0`, `33.0`, `35.0`, `37.0`, `39.0`, `41.0`, `43.0`, `45.0`, `47.0`, `49.0`, `51.0`, `53.0`, `55.0`, `57.0`,
24030	`59.0`, `61.0`, `63.0`,
24031	);
24032	assert_eq_m512h(r, e);
24033	}
24034
24035	#[simd_test(enable = "avx512fp16,avx512vl")]
24036	unsafe fn test_mm_permutexvar_ph() {
24037	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24038	let idx = _mm_set_epi16(`0`, `2`, `4`, `6`, `1`, `3`, `5`, `7`);
24039	let r = _mm_permutexvar_ph(idx, a);
24040	let e = _mm_setr_ph(`1.0`, `3.0`, `5.0`, `7.0`, `2.0`, `4.0`, `6.0`, `8.0`);
24041	assert_eq_m128h(r, e);
24042	}
24043
24044	#[simd_test(enable = "avx512fp16,avx512vl")]
24045	unsafe fn test_mm256_permutexvar_ph() {
24046	let a = _mm256_set_ph(
24047	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24048	);
24049	let idx = _mm256_set_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
24050	let r = _mm256_permutexvar_ph(idx, a);
24051	let e = _mm256_setr_ph(
24052	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `2.0`, `4.0`, `6.0`, `8.0`, `10.0`, `12.0`, `14.0`, `16.0`,
24053	);
24054	assert_eq_m256h(r, e);
24055	}
24056
24057	#[simd_test(enable = "avx512fp16")]
24058	unsafe fn test_mm512_permutexvar_ph() {
24059	let a = _mm512_set_ph(
24060	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24061	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24062	`31.0`, `32.0`,
24063	);
24064	let idx = _mm512_set_epi16(
24065	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
24066	`17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
24067	);
24068	let r = _mm512_permutexvar_ph(idx, a);
24069	let e = _mm512_setr_ph(
24070	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
24071	`31.0`, `2.0`, `4.0`, `6.0`, `8.0`, `10.0`, `12.0`, `14.0`, `16.0`, `18.0`, `20.0`, `22.0`, `24.0`, `26.0`, `28.0`,
24072	`30.0`, `32.0`,
24073	);
24074	assert_eq_m512h(r, e);
24075	}
24076
24077	#[simd_test(enable = "avx512fp16,avx512vl")]
24078	unsafe fn test_mm_cvtepi16_ph() {
24079	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24080	let r = _mm_cvtepi16_ph(a);
24081	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24082	assert_eq_m128h(r, e);
24083	}
24084
24085	#[simd_test(enable = "avx512fp16,avx512vl")]
24086	unsafe fn test_mm_mask_cvtepi16_ph() {
24087	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24088	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24089	let r = _mm_mask_cvtepi16_ph(src, `0b01010101`, a);
24090	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24091	assert_eq_m128h(r, e);
24092	}
24093
24094	#[simd_test(enable = "avx512fp16,avx512vl")]
24095	unsafe fn test_mm_maskz_cvtepi16_ph() {
24096	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24097	let r = _mm_maskz_cvtepi16_ph(`0b01010101`, a);
24098	let e = _mm_set_ph(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
24099	assert_eq_m128h(r, e);
24100	}
24101
24102	#[simd_test(enable = "avx512fp16,avx512vl")]
24103	unsafe fn test_mm256_cvtepi16_ph() {
24104	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24105	let r = _mm256_cvtepi16_ph(a);
24106	let e = _mm256_set_ph(
24107	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24108	);
24109	assert_eq_m256h(r, e);
24110	}
24111
24112	#[simd_test(enable = "avx512fp16,avx512vl")]
24113	unsafe fn test_mm256_mask_cvtepi16_ph() {
24114	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24115	let src = _mm256_set_ph(
24116	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24117	);
24118	let r = _mm256_mask_cvtepi16_ph(src, `0b0101010101010101`, a);
24119	let e = _mm256_set_ph(
24120	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24121	);
24122	assert_eq_m256h(r, e);
24123	}
24124
24125	#[simd_test(enable = "avx512fp16,avx512vl")]
24126	unsafe fn test_mm256_maskz_cvtepi16_ph() {
24127	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24128	let r = _mm256_maskz_cvtepi16_ph(`0b0101010101010101`, a);
24129	let e = _mm256_set_ph(
24130	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`,
24131	);
24132	assert_eq_m256h(r, e);
24133	}
24134
24135	#[simd_test(enable = "avx512fp16")]
24136	unsafe fn test_mm512_cvtepi16_ph() {
24137	let a = _mm512_set_epi16(
24138	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24139	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24140	);
24141	let r = _mm512_cvtepi16_ph(a);
24142	let e = _mm512_set_ph(
24143	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24144	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24145	`31.0`, `32.0`,
24146	);
24147	assert_eq_m512h(r, e);
24148	}
24149
24150	#[simd_test(enable = "avx512fp16")]
24151	unsafe fn test_mm512_mask_cvtepi16_ph() {
24152	let a = _mm512_set_epi16(
24153	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24154	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24155	);
24156	let src = _mm512_set_ph(
24157	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24158	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24159	);
24160	let r = _mm512_mask_cvtepi16_ph(src, `0b01010101010101010101010101010101`, a);
24161	let e = _mm512_set_ph(
24162	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24163	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24164	);
24165	assert_eq_m512h(r, e);
24166	}
24167
24168	#[simd_test(enable = "avx512fp16")]
24169	unsafe fn test_mm512_maskz_cvtepi16_ph() {
24170	let a = _mm512_set_epi16(
24171	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24172	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24173	);
24174	let r = _mm512_maskz_cvtepi16_ph(`0b01010101010101010101010101010101`, a);
24175	let e = _mm512_set_ph(
24176	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24177	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24178	);
24179	assert_eq_m512h(r, e);
24180	}
24181
24182	#[simd_test(enable = "avx512fp16")]
24183	unsafe fn test_mm512_cvt_roundepi16_ph() {
24184	let a = _mm512_set_epi16(
24185	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24186	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24187	);
24188	let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24189	let e = _mm512_set_ph(
24190	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24191	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24192	`31.0`, `32.0`,
24193	);
24194	assert_eq_m512h(r, e);
24195	}
24196
24197	#[simd_test(enable = "avx512fp16")]
24198	unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
24199	let a = _mm512_set_epi16(
24200	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24201	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24202	);
24203	let src = _mm512_set_ph(
24204	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24205	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24206	);
24207	let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24208	src,
24209	`0b01010101010101010101010101010101`,
24210	a,
24211	);
24212	let e = _mm512_set_ph(
24213	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24214	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24215	);
24216	assert_eq_m512h(r, e);
24217	}
24218
24219	#[simd_test(enable = "avx512fp16")]
24220	unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
24221	let a = _mm512_set_epi16(
24222	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24223	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24224	);
24225	let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24226	`0b01010101010101010101010101010101`,
24227	a,
24228	);
24229	let e = _mm512_set_ph(
24230	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24231	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24232	);
24233	assert_eq_m512h(r, e);
24234	}
24235
24236	#[simd_test(enable = "avx512fp16,avx512vl")]
24237	unsafe fn test_mm_cvtepu16_ph() {
24238	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24239	let r = _mm_cvtepu16_ph(a);
24240	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24241	assert_eq_m128h(r, e);
24242	}
24243
24244	#[simd_test(enable = "avx512fp16,avx512vl")]
24245	unsafe fn test_mm_mask_cvtepu16_ph() {
24246	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24247	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24248	let r = _mm_mask_cvtepu16_ph(src, `0b01010101`, a);
24249	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24250	assert_eq_m128h(r, e);
24251	}
24252
24253	#[simd_test(enable = "avx512fp16,avx512vl")]
24254	unsafe fn test_mm_maskz_cvtepu16_ph() {
24255	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24256	let r = _mm_maskz_cvtepu16_ph(`0b01010101`, a);
24257	let e = _mm_set_ph(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
24258	assert_eq_m128h(r, e);
24259	}
24260
24261	#[simd_test(enable = "avx512fp16,avx512vl")]
24262	unsafe fn test_mm256_cvtepu16_ph() {
24263	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24264	let r = _mm256_cvtepu16_ph(a);
24265	let e = _mm256_set_ph(
24266	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24267	);
24268	assert_eq_m256h(r, e);
24269	}
24270
24271	#[simd_test(enable = "avx512fp16,avx512vl")]
24272	unsafe fn test_mm256_mask_cvtepu16_ph() {
24273	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24274	let src = _mm256_set_ph(
24275	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24276	);
24277	let r = _mm256_mask_cvtepu16_ph(src, `0b0101010101010101`, a);
24278	let e = _mm256_set_ph(
24279	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24280	);
24281	assert_eq_m256h(r, e);
24282	}
24283
24284	#[simd_test(enable = "avx512fp16,avx512vl")]
24285	unsafe fn test_mm256_maskz_cvtepu16_ph() {
24286	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24287	let r = _mm256_maskz_cvtepu16_ph(`0b0101010101010101`, a);
24288	let e = _mm256_set_ph(
24289	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`,
24290	);
24291	assert_eq_m256h(r, e);
24292	}
24293
24294	#[simd_test(enable = "avx512fp16")]
24295	unsafe fn test_mm512_cvtepu16_ph() {
24296	let a = _mm512_set_epi16(
24297	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24298	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24299	);
24300	let r = _mm512_cvtepu16_ph(a);
24301	let e = _mm512_set_ph(
24302	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24303	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24304	`31.0`, `32.0`,
24305	);
24306	assert_eq_m512h(r, e);
24307	}
24308
24309	#[simd_test(enable = "avx512fp16")]
24310	unsafe fn test_mm512_mask_cvtepu16_ph() {
24311	let a = _mm512_set_epi16(
24312	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24313	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24314	);
24315	let src = _mm512_set_ph(
24316	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24317	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24318	);
24319	let r = _mm512_mask_cvtepu16_ph(src, `0b01010101010101010101010101010101`, a);
24320	let e = _mm512_set_ph(
24321	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24322	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24323	);
24324	assert_eq_m512h(r, e);
24325	}
24326
24327	#[simd_test(enable = "avx512fp16")]
24328	unsafe fn test_mm512_maskz_cvtepu16_ph() {
24329	let a = _mm512_set_epi16(
24330	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24331	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24332	);
24333	let r = _mm512_maskz_cvtepu16_ph(`0b01010101010101010101010101010101`, a);
24334	let e = _mm512_set_ph(
24335	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24336	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24337	);
24338	assert_eq_m512h(r, e);
24339	}
24340
24341	#[simd_test(enable = "avx512fp16")]
24342	unsafe fn test_mm512_cvt_roundepu16_ph() {
24343	let a = _mm512_set_epi16(
24344	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24345	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24346	);
24347	let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24348	let e = _mm512_set_ph(
24349	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24350	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24351	`31.0`, `32.0`,
24352	);
24353	assert_eq_m512h(r, e);
24354	}
24355
24356	#[simd_test(enable = "avx512fp16")]
24357	unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
24358	let a = _mm512_set_epi16(
24359	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24360	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24361	);
24362	let src = _mm512_set_ph(
24363	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24364	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24365	);
24366	let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24367	src,
24368	`0b01010101010101010101010101010101`,
24369	a,
24370	);
24371	let e = _mm512_set_ph(
24372	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24373	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24374	);
24375	assert_eq_m512h(r, e);
24376	}
24377
24378	#[simd_test(enable = "avx512fp16")]
24379	unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
24380	let a = _mm512_set_epi16(
24381	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24382	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24383	);
24384	let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24385	`0b01010101010101010101010101010101`,
24386	a,
24387	);
24388	let e = _mm512_set_ph(
24389	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24390	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24391	);
24392	assert_eq_m512h(r, e);
24393	}
24394
24395	#[simd_test(enable = "avx512fp16,avx512vl")]
24396	unsafe fn test_mm_cvtepi32_ph() {
24397	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24398	let r = _mm_cvtepi32_ph(a);
24399	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24400	assert_eq_m128h(r, e);
24401	}
24402
24403	#[simd_test(enable = "avx512fp16,avx512vl")]
24404	unsafe fn test_mm_mask_cvtepi32_ph() {
24405	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24406	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24407	let r = _mm_mask_cvtepi32_ph(src, `0b0101`, a);
24408	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.`, `16.`, `4.`);
24409	assert_eq_m128h(r, e);
24410	}
24411
24412	#[simd_test(enable = "avx512fp16,avx512vl")]
24413	unsafe fn test_mm_maskz_cvtepi32_ph() {
24414	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24415	let r = _mm_maskz_cvtepi32_ph(`0b0101`, a);
24416	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`, `0.0`, `4.`);
24417	assert_eq_m128h(r, e);
24418	}
24419
24420	#[simd_test(enable = "avx512fp16,avx512vl")]
24421	unsafe fn test_mm256_cvtepi32_ph() {
24422	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24423	let r = _mm256_cvtepi32_ph(a);
24424	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24425	assert_eq_m128h(r, e);
24426	}
24427
24428	#[simd_test(enable = "avx512fp16,avx512vl")]
24429	unsafe fn test_mm256_mask_cvtepi32_ph() {
24430	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24431	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24432	let r = _mm256_mask_cvtepi32_ph(src, `0b01010101`, a);
24433	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24434	assert_eq_m128h(r, e);
24435	}
24436
24437	#[simd_test(enable = "avx512fp16,avx512vl")]
24438	unsafe fn test_mm256_maskz_cvtepi32_ph() {
24439	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24440	let r = _mm256_maskz_cvtepi32_ph(`0b01010101`, a);
24441	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24442	assert_eq_m128h(r, e);
24443	}
24444
24445	#[simd_test(enable = "avx512fp16")]
24446	unsafe fn test_mm512_cvtepi32_ph() {
24447	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24448	let r = _mm512_cvtepi32_ph(a);
24449	let e = _mm256_set_ph(
24450	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24451	);
24452	assert_eq_m256h(r, e);
24453	}
24454
24455	#[simd_test(enable = "avx512fp16")]
24456	unsafe fn test_mm512_mask_cvtepi32_ph() {
24457	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24458	let src = _mm256_set_ph(
24459	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24460	);
24461	let r = _mm512_mask_cvtepi32_ph(src, `0b0101010101010101`, a);
24462	let e = _mm256_set_ph(
24463	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24464	);
24465	assert_eq_m256h(r, e);
24466	}
24467
24468	#[simd_test(enable = "avx512fp16")]
24469	unsafe fn test_mm512_maskz_cvtepi32_ph() {
24470	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24471	let r = _mm512_maskz_cvtepi32_ph(`0b0101010101010101`, a);
24472	let e = _mm256_set_ph(
24473	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24474	);
24475	assert_eq_m256h(r, e);
24476	}
24477
24478	#[simd_test(enable = "avx512fp16")]
24479	unsafe fn test_mm512_cvt_roundepi32_ph() {
24480	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24481	let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24482	let e = _mm256_set_ph(
24483	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24484	);
24485	assert_eq_m256h(r, e);
24486	}
24487
24488	#[simd_test(enable = "avx512fp16")]
24489	unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
24490	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24491	let src = _mm256_set_ph(
24492	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24493	);
24494	let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24495	src,
24496	`0b0101010101010101`,
24497	a,
24498	);
24499	let e = _mm256_set_ph(
24500	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24501	);
24502	assert_eq_m256h(r, e);
24503	}
24504
24505	#[simd_test(enable = "avx512fp16")]
24506	unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
24507	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24508	let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24509	`0b0101010101010101`,
24510	a,
24511	);
24512	let e = _mm256_set_ph(
24513	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24514	);
24515	assert_eq_m256h(r, e);
24516	}
24517
24518	#[simd_test(enable = "avx512fp16")]
24519	unsafe fn test_mm_cvti32_sh() {
24520	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24521	let r = _mm_cvti32_sh(a, `10`);
24522	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24523	assert_eq_m128h(r, e);
24524	}
24525
24526	#[simd_test(enable = "avx512fp16")]
24527	unsafe fn test_mm_cvt_roundi32_sh() {
24528	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24529	let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
24530	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24531	assert_eq_m128h(r, e);
24532	}
24533
24534	#[simd_test(enable = "avx512fp16,avx512vl")]
24535	unsafe fn test_mm_cvtepu32_ph() {
24536	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24537	let r = _mm_cvtepu32_ph(a);
24538	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24539	assert_eq_m128h(r, e);
24540	}
24541
24542	#[simd_test(enable = "avx512fp16,avx512vl")]
24543	unsafe fn test_mm_mask_cvtepu32_ph() {
24544	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24545	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24546	let r = _mm_mask_cvtepu32_ph(src, `0b0101`, a);
24547	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.`, `16.`, `4.`);
24548	assert_eq_m128h(r, e);
24549	}
24550
24551	#[simd_test(enable = "avx512fp16,avx512vl")]
24552	unsafe fn test_mm_maskz_cvtepu32_ph() {
24553	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24554	let r = _mm_maskz_cvtepu32_ph(`0b0101`, a);
24555	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`, `0.0`, `4.`);
24556	assert_eq_m128h(r, e);
24557	}
24558
24559	#[simd_test(enable = "avx512fp16,avx512vl")]
24560	unsafe fn test_mm256_cvtepu32_ph() {
24561	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24562	let r = _mm256_cvtepu32_ph(a);
24563	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24564	assert_eq_m128h(r, e);
24565	}
24566
24567	#[simd_test(enable = "avx512fp16,avx512vl")]
24568	unsafe fn test_mm256_mask_cvtepu32_ph() {
24569	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24570	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24571	let r = _mm256_mask_cvtepu32_ph(src, `0b01010101`, a);
24572	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24573	assert_eq_m128h(r, e);
24574	}
24575
24576	#[simd_test(enable = "avx512fp16,avx512vl")]
24577	unsafe fn test_mm256_maskz_cvtepu32_ph() {
24578	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24579	let r = _mm256_maskz_cvtepu32_ph(`0b01010101`, a);
24580	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24581	assert_eq_m128h(r, e);
24582	}
24583
24584	#[simd_test(enable = "avx512fp16")]
24585	unsafe fn test_mm512_cvtepu32_ph() {
24586	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24587	let r = _mm512_cvtepu32_ph(a);
24588	let e = _mm256_set_ph(
24589	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24590	);
24591	assert_eq_m256h(r, e);
24592	}
24593
24594	#[simd_test(enable = "avx512fp16")]
24595	unsafe fn test_mm512_mask_cvtepu32_ph() {
24596	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24597	let src = _mm256_set_ph(
24598	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24599	);
24600	let r = _mm512_mask_cvtepu32_ph(src, `0b0101010101010101`, a);
24601	let e = _mm256_set_ph(
24602	`10.`, `2.0`, `12.`, `4.0`, `14.`, `6.0`, `16.`, `8.0`, `18.`, `10.0`, `20.`, `12.0`, `22.`, `14.0`, `24.`, `16.0`,
24603	);
24604	assert_eq_m256h(r, e);
24605	}
24606
24607	#[simd_test(enable = "avx512fp16")]
24608	unsafe fn test_mm512_maskz_cvtepu32_ph() {
24609	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24610	let r = _mm512_maskz_cvtepu32_ph(`0b0101010101010101`, a);
24611	let e = _mm256_set_ph(
24612	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24613	);
24614	assert_eq_m256h(r, e);
24615	}
24616
24617	#[simd_test(enable = "avx512fp16")]
24618	unsafe fn test_mm512_cvt_roundepu32_ph() {
24619	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24620	let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24621	let e = _mm256_set_ph(
24622	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24623	);
24624	assert_eq_m256h(r, e);
24625	}
24626
24627	#[simd_test(enable = "avx512fp16")]
24628	unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
24629	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24630	let src = _mm256_set_ph(
24631	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24632	);
24633	let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24634	src,
24635	`0b0101010101010101`,
24636	a,
24637	);
24638	let e = _mm256_set_ph(
24639	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
24640	`16.0`,
24641	);
24642	assert_eq_m256h(r, e);
24643	}
24644
24645	#[simd_test(enable = "avx512fp16")]
24646	unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
24647	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24648	let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24649	`0b0101010101010101`,
24650	a,
24651	);
24652	let e = _mm256_set_ph(
24653	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24654	);
24655	assert_eq_m256h(r, e);
24656	}
24657
24658	#[simd_test(enable = "avx512fp16")]
24659	unsafe fn test_mm_cvtu32_sh() {
24660	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24661	let r = _mm_cvtu32_sh(a, `10`);
24662	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24663	assert_eq_m128h(r, e);
24664	}
24665
24666	#[simd_test(enable = "avx512fp16")]
24667	unsafe fn test_mm_cvt_roundu32_sh() {
24668	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24669	let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
24670	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24671	assert_eq_m128h(r, e);
24672	}
24673
24674	#[simd_test(enable = "avx512fp16,avx512vl")]
24675	unsafe fn test_mm_cvtepi64_ph() {
24676	let a = _mm_set_epi64x(`1`, `2`);
24677	let r = _mm_cvtepi64_ph(a);
24678	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
24679	assert_eq_m128h(r, e);
24680	}
24681
24682	#[simd_test(enable = "avx512fp16,avx512vl")]
24683	unsafe fn test_mm_mask_cvtepi64_ph() {
24684	let a = _mm_set_epi64x(`1`, `2`);
24685	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24686	let r = _mm_mask_cvtepi64_ph(src, `0b01`, a);
24687	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
24688	assert_eq_m128h(r, e);
24689	}
24690
24691	#[simd_test(enable = "avx512fp16,avx512vl")]
24692	unsafe fn test_mm_maskz_cvtepi64_ph() {
24693	let a = _mm_set_epi64x(`1`, `2`);
24694	let r = _mm_maskz_cvtepi64_ph(`0b01`, a);
24695	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`);
24696	assert_eq_m128h(r, e);
24697	}
24698
24699	#[simd_test(enable = "avx512fp16,avx512vl")]
24700	unsafe fn test_mm256_cvtepi64_ph() {
24701	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24702	let r = _mm256_cvtepi64_ph(a);
24703	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24704	assert_eq_m128h(r, e);
24705	}
24706
24707	#[simd_test(enable = "avx512fp16,avx512vl")]
24708	unsafe fn test_mm256_mask_cvtepi64_ph() {
24709	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24710	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24711	let r = _mm256_mask_cvtepi64_ph(src, `0b0101`, a);
24712	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
24713	assert_eq_m128h(r, e);
24714	}
24715
24716	#[simd_test(enable = "avx512fp16,avx512vl")]
24717	unsafe fn test_mm256_maskz_cvtepi64_ph() {
24718	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24719	let r = _mm256_maskz_cvtepi64_ph(`0b0101`, a);
24720	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24721	assert_eq_m128h(r, e);
24722	}
24723
24724	#[simd_test(enable = "avx512fp16")]
24725	unsafe fn test_mm512_cvtepi64_ph() {
24726	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24727	let r = _mm512_cvtepi64_ph(a);
24728	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24729	assert_eq_m128h(r, e);
24730	}
24731
24732	#[simd_test(enable = "avx512fp16")]
24733	unsafe fn test_mm512_mask_cvtepi64_ph() {
24734	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24735	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24736	let r = _mm512_mask_cvtepi64_ph(src, `0b01010101`, a);
24737	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24738	assert_eq_m128h(r, e);
24739	}
24740
24741	#[simd_test(enable = "avx512fp16")]
24742	unsafe fn test_mm512_maskz_cvtepi64_ph() {
24743	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24744	let r = _mm512_maskz_cvtepi64_ph(`0b01010101`, a);
24745	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24746	assert_eq_m128h(r, e);
24747	}
24748
24749	#[simd_test(enable = "avx512fp16")]
24750	unsafe fn test_mm512_cvt_roundepi64_ph() {
24751	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24752	let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24753	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24754	assert_eq_m128h(r, e);
24755	}
24756
24757	#[simd_test(enable = "avx512fp16")]
24758	unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
24759	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24760	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24761	let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24762	src, `0b01010101`, a,
24763	);
24764	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24765	assert_eq_m128h(r, e);
24766	}
24767
24768	#[simd_test(enable = "avx512fp16")]
24769	unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
24770	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24771	let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24772	`0b01010101`, a,
24773	);
24774	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24775	assert_eq_m128h(r, e);
24776	}
24777
24778	#[simd_test(enable = "avx512fp16,avx512vl")]
24779	unsafe fn test_mm_cvtepu64_ph() {
24780	let a = _mm_set_epi64x(`1`, `2`);
24781	let r = _mm_cvtepu64_ph(a);
24782	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
24783	assert_eq_m128h(r, e);
24784	}
24785
24786	#[simd_test(enable = "avx512fp16,avx512vl")]
24787	unsafe fn test_mm_mask_cvtepu64_ph() {
24788	let a = _mm_set_epi64x(`1`, `2`);
24789	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24790	let r = _mm_mask_cvtepu64_ph(src, `0b01`, a);
24791	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
24792	assert_eq_m128h(r, e);
24793	}
24794
24795	#[simd_test(enable = "avx512fp16,avx512vl")]
24796	unsafe fn test_mm_maskz_cvtepu64_ph() {
24797	let a = _mm_set_epi64x(`1`, `2`);
24798	let r = _mm_maskz_cvtepu64_ph(`0b01`, a);
24799	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`);
24800	assert_eq_m128h(r, e);
24801	}
24802
24803	#[simd_test(enable = "avx512fp16,avx512vl")]
24804	unsafe fn test_mm256_cvtepu64_ph() {
24805	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24806	let r = _mm256_cvtepu64_ph(a);
24807	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24808	assert_eq_m128h(r, e);
24809	}
24810
24811	#[simd_test(enable = "avx512fp16,avx512vl")]
24812	unsafe fn test_mm256_mask_cvtepu64_ph() {
24813	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24814	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24815	let r = _mm256_mask_cvtepu64_ph(src, `0b0101`, a);
24816	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
24817	assert_eq_m128h(r, e);
24818	}
24819
24820	#[simd_test(enable = "avx512fp16,avx512vl")]
24821	unsafe fn test_mm256_maskz_cvtepu64_ph() {
24822	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24823	let r = _mm256_maskz_cvtepu64_ph(`0b0101`, a);
24824	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24825	assert_eq_m128h(r, e);
24826	}
24827
24828	#[simd_test(enable = "avx512fp16")]
24829	unsafe fn test_mm512_cvtepu64_ph() {
24830	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24831	let r = _mm512_cvtepu64_ph(a);
24832	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24833	assert_eq_m128h(r, e);
24834	}
24835
24836	#[simd_test(enable = "avx512fp16")]
24837	unsafe fn test_mm512_mask_cvtepu64_ph() {
24838	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24839	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24840	let r = _mm512_mask_cvtepu64_ph(src, `0b01010101`, a);
24841	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24842	assert_eq_m128h(r, e);
24843	}
24844
24845	#[simd_test(enable = "avx512fp16")]
24846	unsafe fn test_mm512_maskz_cvtepu64_ph() {
24847	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24848	let r = _mm512_maskz_cvtepu64_ph(`0b01010101`, a);
24849	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24850	assert_eq_m128h(r, e);
24851	}
24852
24853	#[simd_test(enable = "avx512fp16")]
24854	unsafe fn test_mm512_cvt_roundepu64_ph() {
24855	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24856	let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24857	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24858	assert_eq_m128h(r, e);
24859	}
24860
24861	#[simd_test(enable = "avx512fp16")]
24862	unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
24863	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24864	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24865	let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24866	src, `0b01010101`, a,
24867	);
24868	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24869	assert_eq_m128h(r, e);
24870	}
24871
24872	#[simd_test(enable = "avx512fp16")]
24873	unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
24874	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24875	let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24876	`0b01010101`, a,
24877	);
24878	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24879	assert_eq_m128h(r, e);
24880	}
24881
24882	#[simd_test(enable = "avx512fp16,avx512vl")]
24883	unsafe fn test_mm_cvtxps_ph() {
24884	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24885	let r = _mm_cvtxps_ph(a);
24886	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24887	assert_eq_m128h(r, e);
24888	}
24889
24890	#[simd_test(enable = "avx512fp16,avx512vl")]
24891	unsafe fn test_mm_mask_cvtxps_ph() {
24892	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24893	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24894	let r = _mm_mask_cvtxps_ph(src, `0b0101`, a);
24895	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.`, `4.0`);
24896	assert_eq_m128h(r, e);
24897	}
24898
24899	#[simd_test(enable = "avx512fp16,avx512vl")]
24900	unsafe fn test_mm_maskz_cvtxps_ph() {
24901	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24902	let r = _mm_maskz_cvtxps_ph(`0b0101`, a);
24903	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24904	assert_eq_m128h(r, e);
24905	}
24906
24907	#[simd_test(enable = "avx512fp16,avx512vl")]
24908	unsafe fn test_mm256_cvtxps_ph() {
24909	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24910	let r = _mm256_cvtxps_ph(a);
24911	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24912	assert_eq_m128h(r, e);
24913	}
24914
24915	#[simd_test(enable = "avx512fp16,avx512vl")]
24916	unsafe fn test_mm256_mask_cvtxps_ph() {
24917	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24918	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24919	let r = _mm256_mask_cvtxps_ph(src, `0b01010101`, a);
24920	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24921	assert_eq_m128h(r, e);
24922	}
24923
24924	#[simd_test(enable = "avx512fp16,avx512vl")]
24925	unsafe fn test_mm256_maskz_cvtxps_ph() {
24926	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24927	let r = _mm256_maskz_cvtxps_ph(`0b01010101`, a);
24928	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24929	assert_eq_m128h(r, e);
24930	}
24931
24932	#[simd_test(enable = "avx512fp16")]
24933	unsafe fn test_mm512_cvtxps_ph() {
24934	let a = _mm512_set_ps(
24935	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24936	);
24937	let r = _mm512_cvtxps_ph(a);
24938	let e = _mm256_set_ph(
24939	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24940	);
24941	assert_eq_m256h(r, e);
24942	}
24943
24944	#[simd_test(enable = "avx512fp16")]
24945	unsafe fn test_mm512_mask_cvtxps_ph() {
24946	let a = _mm512_set_ps(
24947	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24948	);
24949	let src = _mm256_set_ph(
24950	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24951	);
24952	let r = _mm512_mask_cvtxps_ph(src, `0b0101010101010101`, a);
24953	let e = _mm256_set_ph(
24954	`10.`, `2.0`, `12.`, `4.0`, `14.`, `6.0`, `16.`, `8.0`, `18.`, `10.0`, `20.`, `12.0`, `22.`, `14.0`, `24.`, `16.0`,
24955	);
24956	assert_eq_m256h(r, e);
24957	}
24958
24959	#[simd_test(enable = "avx512fp16")]
24960	unsafe fn test_mm512_maskz_cvtxps_ph() {
24961	let a = _mm512_set_ps(
24962	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24963	);
24964	let r = _mm512_maskz_cvtxps_ph(`0b0101010101010101`, a);
24965	let e = _mm256_set_ph(
24966	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24967	);
24968	assert_eq_m256h(r, e);
24969	}
24970
24971	#[simd_test(enable = "avx512fp16")]
24972	unsafe fn test_mm512_cvtx_roundps_ph() {
24973	let a = _mm512_set_ps(
24974	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24975	);
24976	let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24977	let e = _mm256_set_ph(
24978	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24979	);
24980	assert_eq_m256h(r, e);
24981	}
24982
24983	#[simd_test(enable = "avx512fp16")]
24984	unsafe fn test_mm512_mask_cvtx_roundps_ph() {
24985	let a = _mm512_set_ps(
24986	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24987	);
24988	let src = _mm256_set_ph(
24989	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24990	);
24991	let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24992	src,
24993	`0b0101010101010101`,
24994	a,
24995	);
24996	let e = _mm256_set_ph(
24997	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
24998	`16.0`,
24999	);
25000	assert_eq_m256h(r, e);
25001	}
25002
25003	#[simd_test(enable = "avx512fp16")]
25004	unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
25005	let a = _mm512_set_ps(
25006	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25007	);
25008	let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25009	`0b0101010101010101`,
25010	a,
25011	);
25012	let e = _mm256_set_ph(
25013	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
25014	);
25015	assert_eq_m256h(r, e);
25016	}
25017
25018	#[simd_test(enable = "avx512fp16")]
25019	unsafe fn test_mm_cvtss_sh() {
25020	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25021	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25022	let r = _mm_cvtss_sh(a, b);
25023	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25024	assert_eq_m128h(r, e);
25025	}
25026
25027	#[simd_test(enable = "avx512fp16")]
25028	unsafe fn test_mm_mask_cvtss_sh() {
25029	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25030	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25031	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25032	let r = _mm_mask_cvtss_sh(src, `0`, a, b);
25033	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25034	assert_eq_m128h(r, e);
25035	let r = _mm_mask_cvtss_sh(src, `1`, a, b);
25036	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25037	assert_eq_m128h(r, e);
25038	}
25039
25040	#[simd_test(enable = "avx512fp16")]
25041	unsafe fn test_mm_maskz_cvtss_sh() {
25042	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25043	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25044	let r = _mm_maskz_cvtss_sh(`0`, a, b);
25045	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25046	assert_eq_m128h(r, e);
25047	let r = _mm_maskz_cvtss_sh(`1`, a, b);
25048	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25049	assert_eq_m128h(r, e);
25050	}
25051
25052	#[simd_test(enable = "avx512fp16")]
25053	unsafe fn test_mm_cvt_roundss_sh() {
25054	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25055	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25056	let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
25057	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25058	assert_eq_m128h(r, e);
25059	}
25060
25061	#[simd_test(enable = "avx512fp16")]
25062	unsafe fn test_mm_mask_cvt_roundss_sh() {
25063	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25064	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25065	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25066	let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25067	src, `0`, a, b,
25068	);
25069	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25070	assert_eq_m128h(r, e);
25071	let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25072	src, `1`, a, b,
25073	);
25074	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25075	assert_eq_m128h(r, e);
25076	}
25077
25078	#[simd_test(enable = "avx512fp16")]
25079	unsafe fn test_mm_maskz_cvt_roundss_sh() {
25080	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25081	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25082	let r =
25083	_mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
25084	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25085	assert_eq_m128h(r, e);
25086	let r =
25087	_mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
25088	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25089	assert_eq_m128h(r, e);
25090	}
25091
25092	#[simd_test(enable = "avx512fp16,avx512vl")]
25093	unsafe fn test_mm_cvtpd_ph() {
25094	let a = _mm_set_pd(`1.0`, `2.0`);
25095	let r = _mm_cvtpd_ph(a);
25096	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
25097	assert_eq_m128h(r, e);
25098	}
25099
25100	#[simd_test(enable = "avx512fp16,avx512vl")]
25101	unsafe fn test_mm_mask_cvtpd_ph() {
25102	let a = _mm_set_pd(`1.0`, `2.0`);
25103	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25104	let r = _mm_mask_cvtpd_ph(src, `0b01`, a);
25105	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
25106	assert_eq_m128h(r, e);
25107	}
25108
25109	#[simd_test(enable = "avx512fp16,avx512vl")]
25110	unsafe fn test_mm_maskz_cvtpd_ph() {
25111	let a = _mm_set_pd(`1.0`, `2.0`);
25112	let r = _mm_maskz_cvtpd_ph(`0b01`, a);
25113	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`);
25114	assert_eq_m128h(r, e);
25115	}
25116
25117	#[simd_test(enable = "avx512fp16,avx512vl")]
25118	unsafe fn test_mm256_cvtpd_ph() {
25119	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25120	let r = _mm256_cvtpd_ph(a);
25121	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25122	assert_eq_m128h(r, e);
25123	}
25124
25125	#[simd_test(enable = "avx512fp16,avx512vl")]
25126	unsafe fn test_mm256_mask_cvtpd_ph() {
25127	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25128	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25129	let r = _mm256_mask_cvtpd_ph(src, `0b0101`, a);
25130	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
25131	assert_eq_m128h(r, e);
25132	}
25133
25134	#[simd_test(enable = "avx512fp16,avx512vl")]
25135	unsafe fn test_mm256_maskz_cvtpd_ph() {
25136	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25137	let r = _mm256_maskz_cvtpd_ph(`0b0101`, a);
25138	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
25139	assert_eq_m128h(r, e);
25140	}
25141
25142	#[simd_test(enable = "avx512fp16")]
25143	unsafe fn test_mm512_cvtpd_ph() {
25144	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25145	let r = _mm512_cvtpd_ph(a);
25146	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25147	assert_eq_m128h(r, e);
25148	}
25149
25150	#[simd_test(enable = "avx512fp16")]
25151	unsafe fn test_mm512_mask_cvtpd_ph() {
25152	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25153	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25154	let r = _mm512_mask_cvtpd_ph(src, `0b01010101`, a);
25155	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
25156	assert_eq_m128h(r, e);
25157	}
25158
25159	#[simd_test(enable = "avx512fp16")]
25160	unsafe fn test_mm512_maskz_cvtpd_ph() {
25161	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25162	let r = _mm512_maskz_cvtpd_ph(`0b01010101`, a);
25163	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
25164	assert_eq_m128h(r, e);
25165	}
25166
25167	#[simd_test(enable = "avx512fp16")]
25168	unsafe fn test_mm512_cvt_roundpd_ph() {
25169	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25170	let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
25171	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25172	assert_eq_m128h(r, e);
25173	}
25174
25175	#[simd_test(enable = "avx512fp16")]
25176	unsafe fn test_mm512_mask_cvt_roundpd_ph() {
25177	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25178	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25179	let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25180	src, `0b01010101`, a,
25181	);
25182	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
25183	assert_eq_m128h(r, e);
25184	}
25185
25186	#[simd_test(enable = "avx512fp16")]
25187	unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
25188	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25189	let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25190	`0b01010101`, a,
25191	);
25192	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
25193	assert_eq_m128h(r, e);
25194	}
25195
25196	#[simd_test(enable = "avx512fp16")]
25197	unsafe fn test_mm_cvtsd_sh() {
25198	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25199	let b = _mm_setr_pd(`1.0`, `2.0`);
25200	let r = _mm_cvtsd_sh(a, b);
25201	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25202	assert_eq_m128h(r, e);
25203	}
25204
25205	#[simd_test(enable = "avx512fp16")]
25206	unsafe fn test_mm_mask_cvtsd_sh() {
25207	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25208	let b = _mm_setr_pd(`1.0`, `2.0`);
25209	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25210	let r = _mm_mask_cvtsd_sh(src, `0`, a, b);
25211	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25212	assert_eq_m128h(r, e);
25213	let r = _mm_mask_cvtsd_sh(src, `1`, a, b);
25214	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25215	assert_eq_m128h(r, e);
25216	}
25217
25218	#[simd_test(enable = "avx512fp16")]
25219	unsafe fn test_mm_maskz_cvtsd_sh() {
25220	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25221	let b = _mm_setr_pd(`1.0`, `2.0`);
25222	let r = _mm_maskz_cvtsd_sh(`0`, a, b);
25223	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25224	assert_eq_m128h(r, e);
25225	let r = _mm_maskz_cvtsd_sh(`1`, a, b);
25226	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25227	assert_eq_m128h(r, e);
25228	}
25229
25230	#[simd_test(enable = "avx512fp16")]
25231	unsafe fn test_mm_cvt_roundsd_sh() {
25232	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25233	let b = _mm_setr_pd(`1.0`, `2.0`);
25234	let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
25235	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25236	assert_eq_m128h(r, e);
25237	}
25238
25239	#[simd_test(enable = "avx512fp16")]
25240	unsafe fn test_mm_mask_cvt_roundsd_sh() {
25241	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25242	let b = _mm_setr_pd(`1.0`, `2.0`);
25243	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25244	let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25245	src, `0`, a, b,
25246	);
25247	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25248	assert_eq_m128h(r, e);
25249	let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25250	src, `1`, a, b,
25251	);
25252	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25253	assert_eq_m128h(r, e);
25254	}
25255
25256	#[simd_test(enable = "avx512fp16")]
25257	unsafe fn test_mm_maskz_cvt_roundsd_sh() {
25258	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25259	let b = _mm_setr_pd(`1.0`, `2.0`);
25260	let r =
25261	_mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
25262	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25263	assert_eq_m128h(r, e);
25264	let r =
25265	_mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
25266	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25267	assert_eq_m128h(r, e);
25268	}
25269
25270	#[simd_test(enable = "avx512fp16,avx512vl")]
25271	unsafe fn test_mm_cvtph_epi16() {
25272	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25273	let r = _mm_cvttph_epi16(a);
25274	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25275	assert_eq_m128i(r, e);
25276	}
25277
25278	#[simd_test(enable = "avx512fp16,avx512vl")]
25279	unsafe fn test_mm_mask_cvtph_epi16() {
25280	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25281	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25282	let r = _mm_mask_cvttph_epi16(src, `0b01010101`, a);
25283	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25284	assert_eq_m128i(r, e);
25285	}
25286
25287	#[simd_test(enable = "avx512fp16,avx512vl")]
25288	unsafe fn test_mm_maskz_cvtph_epi16() {
25289	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25290	let r = _mm_maskz_cvttph_epi16(`0b01010101`, a);
25291	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25292	assert_eq_m128i(r, e);
25293	}
25294
25295	#[simd_test(enable = "avx512fp16,avx512vl")]
25296	unsafe fn test_mm256_cvtph_epi16() {
25297	let a = _mm256_set_ph(
25298	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25299	);
25300	let r = _mm256_cvttph_epi16(a);
25301	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25302	assert_eq_m256i(r, e);
25303	}
25304
25305	#[simd_test(enable = "avx512fp16,avx512vl")]
25306	unsafe fn test_mm256_mask_cvtph_epi16() {
25307	let a = _mm256_set_ph(
25308	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25309	);
25310	let src = _mm256_set_epi16(
25311	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25312	);
25313	let r = _mm256_mask_cvttph_epi16(src, `0b0101010101010101`, a);
25314	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25315	assert_eq_m256i(r, e);
25316	}
25317
25318	#[simd_test(enable = "avx512fp16,avx512vl")]
25319	unsafe fn test_mm256_maskz_cvtph_epi16() {
25320	let a = _mm256_set_ph(
25321	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25322	);
25323	let r = _mm256_maskz_cvttph_epi16(`0b0101010101010101`, a);
25324	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25325	assert_eq_m256i(r, e);
25326	}
25327
25328	#[simd_test(enable = "avx512fp16")]
25329	unsafe fn test_mm512_cvtph_epi16() {
25330	let a = _mm512_set_ph(
25331	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25332	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25333	`31.0`, `32.0`,
25334	);
25335	let r = _mm512_cvttph_epi16(a);
25336	let e = _mm512_set_epi16(
25337	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25338	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25339	);
25340	assert_eq_m512i(r, e);
25341	}
25342
25343	#[simd_test(enable = "avx512fp16")]
25344	unsafe fn test_mm512_mask_cvtph_epi16() {
25345	let a = _mm512_set_ph(
25346	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25347	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25348	`31.0`, `32.0`,
25349	);
25350	let src = _mm512_set_epi16(
25351	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25352	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25353	);
25354	let r = _mm512_mask_cvttph_epi16(src, `0b01010101010101010101010101010101`, a);
25355	let e = _mm512_set_epi16(
25356	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25357	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25358	);
25359	assert_eq_m512i(r, e);
25360	}
25361
25362	#[simd_test(enable = "avx512fp16")]
25363	unsafe fn test_mm512_maskz_cvtph_epi16() {
25364	let a = _mm512_set_ph(
25365	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25366	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25367	`31.0`, `32.0`,
25368	);
25369	let r = _mm512_maskz_cvttph_epi16(`0b01010101010101010101010101010101`, a);
25370	let e = _mm512_set_epi16(
25371	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25372	`0`, `28`, `0`, `30`, `0`, `32`,
25373	);
25374	assert_eq_m512i(r, e);
25375	}
25376
25377	#[simd_test(enable = "avx512fp16")]
25378	unsafe fn test_mm512_cvt_roundph_epi16() {
25379	let a = _mm512_set_ph(
25380	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25381	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25382	`31.0`, `32.0`,
25383	);
25384	let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25385	let e = _mm512_set_epi16(
25386	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25387	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25388	);
25389	assert_eq_m512i(r, e);
25390	}
25391
25392	#[simd_test(enable = "avx512fp16")]
25393	unsafe fn test_mm512_mask_cvt_roundph_epi16() {
25394	let a = _mm512_set_ph(
25395	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25396	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25397	`31.0`, `32.0`,
25398	);
25399	let src = _mm512_set_epi16(
25400	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25401	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25402	);
25403	let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25404	src,
25405	`0b01010101010101010101010101010101`,
25406	a,
25407	);
25408	let e = _mm512_set_epi16(
25409	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25410	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25411	);
25412	assert_eq_m512i(r, e);
25413	}
25414
25415	#[simd_test(enable = "avx512fp16")]
25416	unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
25417	let a = _mm512_set_ph(
25418	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25419	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25420	`31.0`, `32.0`,
25421	);
25422	let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25423	`0b01010101010101010101010101010101`,
25424	a,
25425	);
25426	let e = _mm512_set_epi16(
25427	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25428	`0`, `28`, `0`, `30`, `0`, `32`,
25429	);
25430	assert_eq_m512i(r, e);
25431	}
25432
25433	#[simd_test(enable = "avx512fp16,avx512vl")]
25434	unsafe fn test_mm_cvtph_epu16() {
25435	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25436	let r = _mm_cvttph_epu16(a);
25437	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25438	assert_eq_m128i(r, e);
25439	}
25440
25441	#[simd_test(enable = "avx512fp16,avx512vl")]
25442	unsafe fn test_mm_mask_cvtph_epu16() {
25443	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25444	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25445	let r = _mm_mask_cvttph_epu16(src, `0b01010101`, a);
25446	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25447	assert_eq_m128i(r, e);
25448	}
25449
25450	#[simd_test(enable = "avx512fp16,avx512vl")]
25451	unsafe fn test_mm_maskz_cvtph_epu16() {
25452	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25453	let r = _mm_maskz_cvttph_epu16(`0b01010101`, a);
25454	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25455	assert_eq_m128i(r, e);
25456	}
25457
25458	#[simd_test(enable = "avx512fp16,avx512vl")]
25459	unsafe fn test_mm256_cvtph_epu16() {
25460	let a = _mm256_set_ph(
25461	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25462	);
25463	let r = _mm256_cvttph_epu16(a);
25464	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25465	assert_eq_m256i(r, e);
25466	}
25467
25468	#[simd_test(enable = "avx512fp16,avx512vl")]
25469	unsafe fn test_mm256_mask_cvtph_epu16() {
25470	let a = _mm256_set_ph(
25471	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25472	);
25473	let src = _mm256_set_epi16(
25474	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25475	);
25476	let r = _mm256_mask_cvttph_epu16(src, `0b0101010101010101`, a);
25477	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25478	assert_eq_m256i(r, e);
25479	}
25480
25481	#[simd_test(enable = "avx512fp16,avx512vl")]
25482	unsafe fn test_mm256_maskz_cvtph_epu16() {
25483	let a = _mm256_set_ph(
25484	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25485	);
25486	let r = _mm256_maskz_cvttph_epu16(`0b0101010101010101`, a);
25487	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25488	assert_eq_m256i(r, e);
25489	}
25490
25491	#[simd_test(enable = "avx512fp16")]
25492	unsafe fn test_mm512_cvtph_epu16() {
25493	let a = _mm512_set_ph(
25494	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25495	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25496	`31.0`, `32.0`,
25497	);
25498	let r = _mm512_cvttph_epu16(a);
25499	let e = _mm512_set_epi16(
25500	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25501	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25502	);
25503	assert_eq_m512i(r, e);
25504	}
25505
25506	#[simd_test(enable = "avx512fp16")]
25507	unsafe fn test_mm512_mask_cvtph_epu16() {
25508	let a = _mm512_set_ph(
25509	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25510	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25511	`31.0`, `32.0`,
25512	);
25513	let src = _mm512_set_epi16(
25514	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25515	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25516	);
25517	let r = _mm512_mask_cvttph_epu16(src, `0b01010101010101010101010101010101`, a);
25518	let e = _mm512_set_epi16(
25519	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25520	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25521	);
25522	assert_eq_m512i(r, e);
25523	}
25524
25525	#[simd_test(enable = "avx512fp16")]
25526	unsafe fn test_mm512_maskz_cvtph_epu16() {
25527	let a = _mm512_set_ph(
25528	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25529	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25530	`31.0`, `32.0`,
25531	);
25532	let r = _mm512_maskz_cvttph_epu16(`0b01010101010101010101010101010101`, a);
25533	let e = _mm512_set_epi16(
25534	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25535	`0`, `28`, `0`, `30`, `0`, `32`,
25536	);
25537	assert_eq_m512i(r, e);
25538	}
25539
25540	#[simd_test(enable = "avx512fp16")]
25541	unsafe fn test_mm512_cvt_roundph_epu16() {
25542	let a = _mm512_set_ph(
25543	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25544	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25545	`31.0`, `32.0`,
25546	);
25547	let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
25548	let e = _mm512_set_epi16(
25549	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25550	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25551	);
25552	assert_eq_m512i(r, e);
25553	}
25554
25555	#[simd_test(enable = "avx512fp16")]
25556	unsafe fn test_mm512_mask_cvt_roundph_epu16() {
25557	let a = _mm512_set_ph(
25558	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25559	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25560	`31.0`, `32.0`,
25561	);
25562	let src = _mm512_set_epi16(
25563	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25564	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25565	);
25566	let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25567	src,
25568	`0b01010101010101010101010101010101`,
25569	a,
25570	);
25571	let e = _mm512_set_epi16(
25572	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25573	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25574	);
25575	assert_eq_m512i(r, e);
25576	}
25577
25578	#[simd_test(enable = "avx512fp16")]
25579	unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
25580	let a = _mm512_set_ph(
25581	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25582	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25583	`31.0`, `32.0`,
25584	);
25585	let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25586	`0b01010101010101010101010101010101`,
25587	a,
25588	);
25589	let e = _mm512_set_epi16(
25590	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25591	`0`, `28`, `0`, `30`, `0`, `32`,
25592	);
25593	assert_eq_m512i(r, e);
25594	}
25595
25596	#[simd_test(enable = "avx512fp16,avx512vl")]
25597	unsafe fn test_mm_cvttph_epi16() {
25598	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25599	let r = _mm_cvttph_epi16(a);
25600	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25601	assert_eq_m128i(r, e);
25602	}
25603
25604	#[simd_test(enable = "avx512fp16,avx512vl")]
25605	unsafe fn test_mm_mask_cvttph_epi16() {
25606	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25607	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25608	let r = _mm_mask_cvttph_epi16(src, `0b01010101`, a);
25609	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25610	assert_eq_m128i(r, e);
25611	}
25612
25613	#[simd_test(enable = "avx512fp16,avx512vl")]
25614	unsafe fn test_mm_maskz_cvttph_epi16() {
25615	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25616	let r = _mm_maskz_cvttph_epi16(`0b01010101`, a);
25617	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25618	assert_eq_m128i(r, e);
25619	}
25620
25621	#[simd_test(enable = "avx512fp16,avx512vl")]
25622	unsafe fn test_mm256_cvttph_epi16() {
25623	let a = _mm256_set_ph(
25624	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25625	);
25626	let r = _mm256_cvttph_epi16(a);
25627	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25628	assert_eq_m256i(r, e);
25629	}
25630
25631	#[simd_test(enable = "avx512fp16,avx512vl")]
25632	unsafe fn test_mm256_mask_cvttph_epi16() {
25633	let a = _mm256_set_ph(
25634	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25635	);
25636	let src = _mm256_set_epi16(
25637	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25638	);
25639	let r = _mm256_mask_cvttph_epi16(src, `0b0101010101010101`, a);
25640	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25641	assert_eq_m256i(r, e);
25642	}
25643
25644	#[simd_test(enable = "avx512fp16,avx512vl")]
25645	unsafe fn test_mm256_maskz_cvttph_epi16() {
25646	let a = _mm256_set_ph(
25647	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25648	);
25649	let r = _mm256_maskz_cvttph_epi16(`0b0101010101010101`, a);
25650	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25651	assert_eq_m256i(r, e);
25652	}
25653
25654	#[simd_test(enable = "avx512fp16")]
25655	unsafe fn test_mm512_cvttph_epi16() {
25656	let a = _mm512_set_ph(
25657	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25658	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25659	`31.0`, `32.0`,
25660	);
25661	let r = _mm512_cvttph_epi16(a);
25662	let e = _mm512_set_epi16(
25663	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25664	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25665	);
25666	assert_eq_m512i(r, e);
25667	}
25668
25669	#[simd_test(enable = "avx512fp16")]
25670	unsafe fn test_mm512_mask_cvttph_epi16() {
25671	let a = _mm512_set_ph(
25672	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25673	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25674	`31.0`, `32.0`,
25675	);
25676	let src = _mm512_set_epi16(
25677	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25678	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25679	);
25680	let r = _mm512_mask_cvttph_epi16(src, `0b01010101010101010101010101010101`, a);
25681	let e = _mm512_set_epi16(
25682	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25683	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25684	);
25685	assert_eq_m512i(r, e);
25686	}
25687
25688	#[simd_test(enable = "avx512fp16")]
25689	unsafe fn test_mm512_maskz_cvttph_epi16() {
25690	let a = _mm512_set_ph(
25691	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25692	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25693	`31.0`, `32.0`,
25694	);
25695	let r = _mm512_maskz_cvttph_epi16(`0b01010101010101010101010101010101`, a);
25696	let e = _mm512_set_epi16(
25697	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25698	`0`, `28`, `0`, `30`, `0`, `32`,
25699	);
25700	assert_eq_m512i(r, e);
25701	}
25702
25703	#[simd_test(enable = "avx512fp16")]
25704	unsafe fn test_mm512_cvtt_roundph_epi16() {
25705	let a = _mm512_set_ph(
25706	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25707	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25708	`31.0`, `32.0`,
25709	);
25710	let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25711	let e = _mm512_set_epi16(
25712	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25713	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25714	);
25715	assert_eq_m512i(r, e);
25716	}
25717
25718	#[simd_test(enable = "avx512fp16")]
25719	unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
25720	let a = _mm512_set_ph(
25721	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25722	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25723	`31.0`, `32.0`,
25724	);
25725	let src = _mm512_set_epi16(
25726	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25727	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25728	);
25729	let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25730	src,
25731	`0b01010101010101010101010101010101`,
25732	a,
25733	);
25734	let e = _mm512_set_epi16(
25735	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25736	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25737	);
25738	assert_eq_m512i(r, e);
25739	}
25740
25741	#[simd_test(enable = "avx512fp16")]
25742	unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
25743	let a = _mm512_set_ph(
25744	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25745	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25746	`31.0`, `32.0`,
25747	);
25748	let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25749	`0b01010101010101010101010101010101`,
25750	a,
25751	);
25752	let e = _mm512_set_epi16(
25753	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25754	`0`, `28`, `0`, `30`, `0`, `32`,
25755	);
25756	assert_eq_m512i(r, e);
25757	}
25758
25759	#[simd_test(enable = "avx512fp16,avx512vl")]
25760	unsafe fn test_mm_cvttph_epu16() {
25761	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25762	let r = _mm_cvttph_epu16(a);
25763	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25764	assert_eq_m128i(r, e);
25765	}
25766
25767	#[simd_test(enable = "avx512fp16,avx512vl")]
25768	unsafe fn test_mm_mask_cvttph_epu16() {
25769	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25770	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25771	let r = _mm_mask_cvttph_epu16(src, `0b01010101`, a);
25772	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25773	assert_eq_m128i(r, e);
25774	}
25775
25776	#[simd_test(enable = "avx512fp16,avx512vl")]
25777	unsafe fn test_mm_maskz_cvttph_epu16() {
25778	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25779	let r = _mm_maskz_cvttph_epu16(`0b01010101`, a);
25780	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25781	assert_eq_m128i(r, e);
25782	}
25783
25784	#[simd_test(enable = "avx512fp16,avx512vl")]
25785	unsafe fn test_mm256_cvttph_epu16() {
25786	let a = _mm256_set_ph(
25787	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25788	);
25789	let r = _mm256_cvttph_epu16(a);
25790	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25791	assert_eq_m256i(r, e);
25792	}
25793
25794	#[simd_test(enable = "avx512fp16,avx512vl")]
25795	unsafe fn test_mm256_mask_cvttph_epu16() {
25796	let a = _mm256_set_ph(
25797	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25798	);
25799	let src = _mm256_set_epi16(
25800	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25801	);
25802	let r = _mm256_mask_cvttph_epu16(src, `0b0101010101010101`, a);
25803	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25804	assert_eq_m256i(r, e);
25805	}
25806
25807	#[simd_test(enable = "avx512fp16,avx512vl")]
25808	unsafe fn test_mm256_maskz_cvttph_epu16() {
25809	let a = _mm256_set_ph(
25810	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25811	);
25812	let r = _mm256_maskz_cvttph_epu16(`0b0101010101010101`, a);
25813	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25814	assert_eq_m256i(r, e);
25815	}
25816
25817	#[simd_test(enable = "avx512fp16")]
25818	unsafe fn test_mm512_cvttph_epu16() {
25819	let a = _mm512_set_ph(
25820	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25821	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25822	`31.0`, `32.0`,
25823	);
25824	let r = _mm512_cvttph_epu16(a);
25825	let e = _mm512_set_epi16(
25826	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25827	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25828	);
25829	assert_eq_m512i(r, e);
25830	}
25831
25832	#[simd_test(enable = "avx512fp16")]
25833	unsafe fn test_mm512_mask_cvttph_epu16() {
25834	let a = _mm512_set_ph(
25835	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25836	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25837	`31.0`, `32.0`,
25838	);
25839	let src = _mm512_set_epi16(
25840	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25841	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25842	);
25843	let r = _mm512_mask_cvttph_epu16(src, `0b01010101010101010101010101010101`, a);
25844	let e = _mm512_set_epi16(
25845	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25846	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25847	);
25848	assert_eq_m512i(r, e);
25849	}
25850
25851	#[simd_test(enable = "avx512fp16")]
25852	unsafe fn test_mm512_maskz_cvttph_epu16() {
25853	let a = _mm512_set_ph(
25854	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25855	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25856	`31.0`, `32.0`,
25857	);
25858	let r = _mm512_maskz_cvttph_epu16(`0b01010101010101010101010101010101`, a);
25859	let e = _mm512_set_epi16(
25860	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25861	`0`, `28`, `0`, `30`, `0`, `32`,
25862	);
25863	assert_eq_m512i(r, e);
25864	}
25865
25866	#[simd_test(enable = "avx512fp16")]
25867	unsafe fn test_mm512_cvtt_roundph_epu16() {
25868	let a = _mm512_set_ph(
25869	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25870	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25871	`31.0`, `32.0`,
25872	);
25873	let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
25874	let e = _mm512_set_epi16(
25875	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25876	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25877	);
25878	assert_eq_m512i(r, e);
25879	}
25880
25881	#[simd_test(enable = "avx512fp16")]
25882	unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
25883	let a = _mm512_set_ph(
25884	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25885	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25886	`31.0`, `32.0`,
25887	);
25888	let src = _mm512_set_epi16(
25889	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25890	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25891	);
25892	let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25893	src,
25894	`0b01010101010101010101010101010101`,
25895	a,
25896	);
25897	let e = _mm512_set_epi16(
25898	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25899	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25900	);
25901	assert_eq_m512i(r, e);
25902	}
25903
25904	#[simd_test(enable = "avx512fp16")]
25905	unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
25906	let a = _mm512_set_ph(
25907	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25908	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25909	`31.0`, `32.0`,
25910	);
25911	let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25912	`0b01010101010101010101010101010101`,
25913	a,
25914	);
25915	let e = _mm512_set_epi16(
25916	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25917	`0`, `28`, `0`, `30`, `0`, `32`,
25918	);
25919	assert_eq_m512i(r, e);
25920	}
25921
25922	#[simd_test(enable = "avx512fp16,avx512vl")]
25923	unsafe fn test_mm_cvtph_epi32() {
25924	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25925	let r = _mm_cvtph_epi32(a);
25926	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
25927	assert_eq_m128i(r, e);
25928	}
25929
25930	#[simd_test(enable = "avx512fp16,avx512vl")]
25931	unsafe fn test_mm_mask_cvtph_epi32() {
25932	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25933	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
25934	let r = _mm_mask_cvtph_epi32(src, `0b0101`, a);
25935	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
25936	assert_eq_m128i(r, e);
25937	}
25938
25939	#[simd_test(enable = "avx512fp16,avx512vl")]
25940	unsafe fn test_mm_maskz_cvtph_epi32() {
25941	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25942	let r = _mm_maskz_cvtph_epi32(`0b0101`, a);
25943	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
25944	assert_eq_m128i(r, e);
25945	}
25946
25947	#[simd_test(enable = "avx512fp16,avx512vl")]
25948	unsafe fn test_mm256_cvtph_epi32() {
25949	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25950	let r = _mm256_cvtph_epi32(a);
25951	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25952	assert_eq_m256i(r, e);
25953	}
25954
25955	#[simd_test(enable = "avx512fp16,avx512vl")]
25956	unsafe fn test_mm256_mask_cvtph_epi32() {
25957	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25958	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25959	let r = _mm256_mask_cvtph_epi32(src, `0b01010101`, a);
25960	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25961	assert_eq_m256i(r, e);
25962	}
25963
25964	#[simd_test(enable = "avx512fp16,avx512vl")]
25965	unsafe fn test_mm256_maskz_cvtph_epi32() {
25966	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25967	let r = _mm256_maskz_cvtph_epi32(`0b01010101`, a);
25968	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25969	assert_eq_m256i(r, e);
25970	}
25971
25972	#[simd_test(enable = "avx512fp16")]
25973	unsafe fn test_mm512_cvtph_epi32() {
25974	let a = _mm256_set_ph(
25975	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25976	);
25977	let r = _mm512_cvtph_epi32(a);
25978	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25979	assert_eq_m512i(r, e);
25980	}
25981
25982	#[simd_test(enable = "avx512fp16")]
25983	unsafe fn test_mm512_mask_cvtph_epi32() {
25984	let a = _mm256_set_ph(
25985	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25986	);
25987	let src = _mm512_set_epi32(
25988	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25989	);
25990	let r = _mm512_mask_cvtph_epi32(src, `0b0101010101010101`, a);
25991	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25992	assert_eq_m512i(r, e);
25993	}
25994
25995	#[simd_test(enable = "avx512fp16")]
25996	unsafe fn test_mm512_maskz_cvtph_epi32() {
25997	let a = _mm256_set_ph(
25998	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25999	);
26000	let r = _mm512_maskz_cvtph_epi32(`0b0101010101010101`, a);
26001	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26002	assert_eq_m512i(r, e);
26003	}
26004
26005	#[simd_test(enable = "avx512fp16")]
26006	unsafe fn test_mm512_cvt_roundph_epi32() {
26007	let a = _mm256_set_ph(
26008	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26009	);
26010	let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26011	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26012	assert_eq_m512i(r, e);
26013	}
26014
26015	#[simd_test(enable = "avx512fp16")]
26016	unsafe fn test_mm512_mask_cvt_roundph_epi32() {
26017	let a = _mm256_set_ph(
26018	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26019	);
26020	let src = _mm512_set_epi32(
26021	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26022	);
26023	let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26024	src,
26025	`0b0101010101010101`,
26026	a,
26027	);
26028	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26029	assert_eq_m512i(r, e);
26030	}
26031
26032	#[simd_test(enable = "avx512fp16")]
26033	unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
26034	let a = _mm256_set_ph(
26035	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26036	);
26037	let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26038	`0b0101010101010101`,
26039	a,
26040	);
26041	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26042	assert_eq_m512i(r, e);
26043	}
26044
26045	#[simd_test(enable = "avx512fp16")]
26046	unsafe fn test_mm_cvtsh_i32() {
26047	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26048	let r = _mm_cvtsh_i32(a);
26049	assert_eq!(r, `1`);
26050	}
26051
26052	#[simd_test(enable = "avx512fp16")]
26053	unsafe fn test_mm_cvt_roundsh_i32() {
26054	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26055	let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26056	assert_eq!(r, `1`);
26057	}
26058
26059	#[simd_test(enable = "avx512fp16,avx512vl")]
26060	unsafe fn test_mm_cvtph_epu32() {
26061	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26062	let r = _mm_cvtph_epu32(a);
26063	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26064	assert_eq_m128i(r, e);
26065	}
26066
26067	#[simd_test(enable = "avx512fp16,avx512vl")]
26068	unsafe fn test_mm_mask_cvtph_epu32() {
26069	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26070	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26071	let r = _mm_mask_cvtph_epu32(src, `0b0101`, a);
26072	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26073	assert_eq_m128i(r, e);
26074	}
26075
26076	#[simd_test(enable = "avx512fp16,avx512vl")]
26077	unsafe fn test_mm_maskz_cvtph_epu32() {
26078	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26079	let r = _mm_maskz_cvtph_epu32(`0b0101`, a);
26080	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26081	assert_eq_m128i(r, e);
26082	}
26083
26084	#[simd_test(enable = "avx512fp16,avx512vl")]
26085	unsafe fn test_mm256_cvtph_epu32() {
26086	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26087	let r = _mm256_cvtph_epu32(a);
26088	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26089	assert_eq_m256i(r, e);
26090	}
26091
26092	#[simd_test(enable = "avx512fp16,avx512vl")]
26093	unsafe fn test_mm256_mask_cvtph_epu32() {
26094	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26095	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26096	let r = _mm256_mask_cvtph_epu32(src, `0b01010101`, a);
26097	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26098	assert_eq_m256i(r, e);
26099	}
26100
26101	#[simd_test(enable = "avx512fp16,avx512vl")]
26102	unsafe fn test_mm256_maskz_cvtph_epu32() {
26103	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26104	let r = _mm256_maskz_cvtph_epu32(`0b01010101`, a);
26105	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26106	assert_eq_m256i(r, e);
26107	}
26108
26109	#[simd_test(enable = "avx512fp16")]
26110	unsafe fn test_mm512_cvtph_epu32() {
26111	let a = _mm256_set_ph(
26112	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26113	);
26114	let r = _mm512_cvtph_epu32(a);
26115	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26116	assert_eq_m512i(r, e);
26117	}
26118
26119	#[simd_test(enable = "avx512fp16")]
26120	unsafe fn test_mm512_mask_cvtph_epu32() {
26121	let a = _mm256_set_ph(
26122	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26123	);
26124	let src = _mm512_set_epi32(
26125	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26126	);
26127	let r = _mm512_mask_cvtph_epu32(src, `0b0101010101010101`, a);
26128	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26129	assert_eq_m512i(r, e);
26130	}
26131
26132	#[simd_test(enable = "avx512fp16")]
26133	unsafe fn test_mm512_maskz_cvtph_epu32() {
26134	let a = _mm256_set_ph(
26135	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26136	);
26137	let r = _mm512_maskz_cvtph_epu32(`0b0101010101010101`, a);
26138	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26139	assert_eq_m512i(r, e);
26140	}
26141
26142	#[simd_test(enable = "avx512fp16")]
26143	unsafe fn test_mm512_cvt_roundph_epu32() {
26144	let a = _mm256_set_ph(
26145	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26146	);
26147	let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26148	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26149	assert_eq_m512i(r, e);
26150	}
26151
26152	#[simd_test(enable = "avx512fp16")]
26153	unsafe fn test_mm512_mask_cvt_roundph_epu32() {
26154	let a = _mm256_set_ph(
26155	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26156	);
26157	let src = _mm512_set_epi32(
26158	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26159	);
26160	let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26161	src,
26162	`0b0101010101010101`,
26163	a,
26164	);
26165	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26166	assert_eq_m512i(r, e);
26167	}
26168
26169	#[simd_test(enable = "avx512fp16")]
26170	unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
26171	let a = _mm256_set_ph(
26172	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26173	);
26174	let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26175	`0b0101010101010101`,
26176	a,
26177	);
26178	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26179	assert_eq_m512i(r, e);
26180	}
26181
26182	#[simd_test(enable = "avx512fp16")]
26183	unsafe fn test_mm_cvtsh_u32() {
26184	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26185	let r = _mm_cvtsh_u32(a);
26186	assert_eq!(r, `1`);
26187	}
26188
26189	#[simd_test(enable = "avx512fp16")]
26190	unsafe fn test_mm_cvt_roundsh_u32() {
26191	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26192	let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26193	assert_eq!(r, `1`);
26194	}
26195
26196	#[simd_test(enable = "avx512fp16,avx512vl")]
26197	unsafe fn test_mm_cvttph_epi32() {
26198	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26199	let r = _mm_cvttph_epi32(a);
26200	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26201	assert_eq_m128i(r, e);
26202	}
26203
26204	#[simd_test(enable = "avx512fp16,avx512vl")]
26205	unsafe fn test_mm_mask_cvttph_epi32() {
26206	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26207	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26208	let r = _mm_mask_cvttph_epi32(src, `0b0101`, a);
26209	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26210	assert_eq_m128i(r, e);
26211	}
26212
26213	#[simd_test(enable = "avx512fp16,avx512vl")]
26214	unsafe fn test_mm_maskz_cvttph_epi32() {
26215	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26216	let r = _mm_maskz_cvttph_epi32(`0b0101`, a);
26217	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26218	assert_eq_m128i(r, e);
26219	}
26220
26221	#[simd_test(enable = "avx512fp16,avx512vl")]
26222	unsafe fn test_mm256_cvttph_epi32() {
26223	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26224	let r = _mm256_cvttph_epi32(a);
26225	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26226	assert_eq_m256i(r, e);
26227	}
26228
26229	#[simd_test(enable = "avx512fp16,avx512vl")]
26230	unsafe fn test_mm256_mask_cvttph_epi32() {
26231	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26232	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26233	let r = _mm256_mask_cvttph_epi32(src, `0b01010101`, a);
26234	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26235	assert_eq_m256i(r, e);
26236	}
26237
26238	#[simd_test(enable = "avx512fp16,avx512vl")]
26239	unsafe fn test_mm256_maskz_cvttph_epi32() {
26240	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26241	let r = _mm256_maskz_cvttph_epi32(`0b01010101`, a);
26242	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26243	assert_eq_m256i(r, e);
26244	}
26245
26246	#[simd_test(enable = "avx512fp16")]
26247	unsafe fn test_mm512_cvttph_epi32() {
26248	let a = _mm256_set_ph(
26249	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26250	);
26251	let r = _mm512_cvttph_epi32(a);
26252	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26253	assert_eq_m512i(r, e);
26254	}
26255
26256	#[simd_test(enable = "avx512fp16")]
26257	unsafe fn test_mm512_mask_cvttph_epi32() {
26258	let a = _mm256_set_ph(
26259	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26260	);
26261	let src = _mm512_set_epi32(
26262	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26263	);
26264	let r = _mm512_mask_cvttph_epi32(src, `0b0101010101010101`, a);
26265	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26266	assert_eq_m512i(r, e);
26267	}
26268
26269	#[simd_test(enable = "avx512fp16")]
26270	unsafe fn test_mm512_maskz_cvttph_epi32() {
26271	let a = _mm256_set_ph(
26272	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26273	);
26274	let r = _mm512_maskz_cvttph_epi32(`0b0101010101010101`, a);
26275	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26276	assert_eq_m512i(r, e);
26277	}
26278
26279	#[simd_test(enable = "avx512fp16")]
26280	unsafe fn test_mm512_cvtt_roundph_epi32() {
26281	let a = _mm256_set_ph(
26282	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26283	);
26284	let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26285	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26286	assert_eq_m512i(r, e);
26287	}
26288
26289	#[simd_test(enable = "avx512fp16")]
26290	unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
26291	let a = _mm256_set_ph(
26292	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26293	);
26294	let src = _mm512_set_epi32(
26295	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26296	);
26297	let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26298	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26299	assert_eq_m512i(r, e);
26300	}
26301
26302	#[simd_test(enable = "avx512fp16")]
26303	unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
26304	let a = _mm256_set_ph(
26305	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26306	);
26307	let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26308	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26309	assert_eq_m512i(r, e);
26310	}
26311
26312	#[simd_test(enable = "avx512fp16")]
26313	unsafe fn test_mm_cvttsh_i32() {
26314	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26315	let r = _mm_cvttsh_i32(a);
26316	assert_eq!(r, `1`);
26317	}
26318
26319	#[simd_test(enable = "avx512fp16")]
26320	unsafe fn test_mm_cvtt_roundsh_i32() {
26321	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26322	let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26323	assert_eq!(r, `1`);
26324	}
26325
26326	#[simd_test(enable = "avx512fp16,avx512vl")]
26327	unsafe fn test_mm_cvttph_epu32() {
26328	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26329	let r = _mm_cvttph_epu32(a);
26330	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26331	assert_eq_m128i(r, e);
26332	}
26333
26334	#[simd_test(enable = "avx512fp16,avx512vl")]
26335	unsafe fn test_mm_mask_cvttph_epu32() {
26336	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26337	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26338	let r = _mm_mask_cvttph_epu32(src, `0b0101`, a);
26339	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26340	assert_eq_m128i(r, e);
26341	}
26342
26343	#[simd_test(enable = "avx512fp16,avx512vl")]
26344	unsafe fn test_mm_maskz_cvttph_epu32() {
26345	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26346	let r = _mm_maskz_cvttph_epu32(`0b0101`, a);
26347	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26348	assert_eq_m128i(r, e);
26349	}
26350
26351	#[simd_test(enable = "avx512fp16,avx512vl")]
26352	unsafe fn test_mm256_cvttph_epu32() {
26353	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26354	let r = _mm256_cvttph_epu32(a);
26355	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26356	assert_eq_m256i(r, e);
26357	}
26358
26359	#[simd_test(enable = "avx512fp16,avx512vl")]
26360	unsafe fn test_mm256_mask_cvttph_epu32() {
26361	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26362	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26363	let r = _mm256_mask_cvttph_epu32(src, `0b01010101`, a);
26364	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26365	assert_eq_m256i(r, e);
26366	}
26367
26368	#[simd_test(enable = "avx512fp16,avx512vl")]
26369	unsafe fn test_mm256_maskz_cvttph_epu32() {
26370	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26371	let r = _mm256_maskz_cvttph_epu32(`0b01010101`, a);
26372	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26373	assert_eq_m256i(r, e);
26374	}
26375
26376	#[simd_test(enable = "avx512fp16")]
26377	unsafe fn test_mm512_cvttph_epu32() {
26378	let a = _mm256_set_ph(
26379	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26380	);
26381	let r = _mm512_cvttph_epu32(a);
26382	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26383	assert_eq_m512i(r, e);
26384	}
26385
26386	#[simd_test(enable = "avx512fp16")]
26387	unsafe fn test_mm512_mask_cvttph_epu32() {
26388	let a = _mm256_set_ph(
26389	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26390	);
26391	let src = _mm512_set_epi32(
26392	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26393	);
26394	let r = _mm512_mask_cvttph_epu32(src, `0b0101010101010101`, a);
26395	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26396	assert_eq_m512i(r, e);
26397	}
26398
26399	#[simd_test(enable = "avx512fp16")]
26400	unsafe fn test_mm512_maskz_cvttph_epu32() {
26401	let a = _mm256_set_ph(
26402	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26403	);
26404	let r = _mm512_maskz_cvttph_epu32(`0b0101010101010101`, a);
26405	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26406	assert_eq_m512i(r, e);
26407	}
26408
26409	#[simd_test(enable = "avx512fp16")]
26410	unsafe fn test_mm512_cvtt_roundph_epu32() {
26411	let a = _mm256_set_ph(
26412	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26413	);
26414	let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26415	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26416	assert_eq_m512i(r, e);
26417	}
26418
26419	#[simd_test(enable = "avx512fp16")]
26420	unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
26421	let a = _mm256_set_ph(
26422	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26423	);
26424	let src = _mm512_set_epi32(
26425	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26426	);
26427	let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26428	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26429	assert_eq_m512i(r, e);
26430	}
26431
26432	#[simd_test(enable = "avx512fp16")]
26433	unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
26434	let a = _mm256_set_ph(
26435	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26436	);
26437	let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26438	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26439	assert_eq_m512i(r, e);
26440	}
26441
26442	#[simd_test(enable = "avx512fp16")]
26443	unsafe fn test_mm_cvttsh_u32() {
26444	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26445	let r = _mm_cvttsh_u32(a);
26446	assert_eq!(r, `1`);
26447	}
26448
26449	#[simd_test(enable = "avx512fp16")]
26450	unsafe fn test_mm_cvtt_roundsh_u32() {
26451	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26452	let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26453	assert_eq!(r, `1`);
26454	}
26455
26456	#[simd_test(enable = "avx512fp16,avx512vl")]
26457	unsafe fn test_mm_cvtph_epi64() {
26458	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26459	let r = _mm_cvtph_epi64(a);
26460	let e = _mm_set_epi64x(`1`, `2`);
26461	assert_eq_m128i(r, e);
26462	}
26463
26464	#[simd_test(enable = "avx512fp16,avx512vl")]
26465	unsafe fn test_mm_mask_cvtph_epi64() {
26466	let src = _mm_set_epi64x(`3`, `4`);
26467	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26468	let r = _mm_mask_cvtph_epi64(src, `0b01`, a);
26469	let e = _mm_set_epi64x(`3`, `2`);
26470	assert_eq_m128i(r, e);
26471	}
26472
26473	#[simd_test(enable = "avx512fp16,avx512vl")]
26474	unsafe fn test_mm_maskz_cvtph_epi64() {
26475	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26476	let r = _mm_maskz_cvtph_epi64(`0b01`, a);
26477	let e = _mm_set_epi64x(`0`, `2`);
26478	assert_eq_m128i(r, e);
26479	}
26480
26481	#[simd_test(enable = "avx512fp16,avx512vl")]
26482	unsafe fn test_mm256_cvtph_epi64() {
26483	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26484	let r = _mm256_cvtph_epi64(a);
26485	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26486	assert_eq_m256i(r, e);
26487	}
26488
26489	#[simd_test(enable = "avx512fp16,avx512vl")]
26490	unsafe fn test_mm256_mask_cvtph_epi64() {
26491	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26492	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26493	let r = _mm256_mask_cvtph_epi64(src, `0b0101`, a);
26494	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26495	assert_eq_m256i(r, e);
26496	}
26497
26498	#[simd_test(enable = "avx512fp16,avx512vl")]
26499	unsafe fn test_mm256_maskz_cvtph_epi64() {
26500	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26501	let r = _mm256_maskz_cvtph_epi64(`0b0101`, a);
26502	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26503	assert_eq_m256i(r, e);
26504	}
26505
26506	#[simd_test(enable = "avx512fp16")]
26507	unsafe fn test_mm512_cvtph_epi64() {
26508	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26509	let r = _mm512_cvtph_epi64(a);
26510	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26511	assert_eq_m512i(r, e);
26512	}
26513
26514	#[simd_test(enable = "avx512fp16")]
26515	unsafe fn test_mm512_mask_cvtph_epi64() {
26516	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26517	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26518	let r = _mm512_mask_cvtph_epi64(src, `0b01010101`, a);
26519	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26520	assert_eq_m512i(r, e);
26521	}
26522
26523	#[simd_test(enable = "avx512fp16")]
26524	unsafe fn test_mm512_maskz_cvtph_epi64() {
26525	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26526	let r = _mm512_maskz_cvtph_epi64(`0b01010101`, a);
26527	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26528	assert_eq_m512i(r, e);
26529	}
26530
26531	#[simd_test(enable = "avx512fp16")]
26532	unsafe fn test_mm512_cvt_roundph_epi64() {
26533	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26534	let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26535	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26536	assert_eq_m512i(r, e);
26537	}
26538
26539	#[simd_test(enable = "avx512fp16")]
26540	unsafe fn test_mm512_mask_cvt_roundph_epi64() {
26541	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26542	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26543	let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26544	src, `0b01010101`, a,
26545	);
26546	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26547	assert_eq_m512i(r, e);
26548	}
26549
26550	#[simd_test(enable = "avx512fp16")]
26551	unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
26552	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26553	let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26554	`0b01010101`, a,
26555	);
26556	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26557	assert_eq_m512i(r, e);
26558	}
26559
26560	#[simd_test(enable = "avx512fp16,avx512vl")]
26561	unsafe fn test_mm_cvtph_epu64() {
26562	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26563	let r = _mm_cvtph_epu64(a);
26564	let e = _mm_set_epi64x(`1`, `2`);
26565	assert_eq_m128i(r, e);
26566	}
26567
26568	#[simd_test(enable = "avx512fp16,avx512vl")]
26569	unsafe fn test_mm_mask_cvtph_epu64() {
26570	let src = _mm_set_epi64x(`3`, `4`);
26571	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26572	let r = _mm_mask_cvtph_epu64(src, `0b01`, a);
26573	let e = _mm_set_epi64x(`3`, `2`);
26574	assert_eq_m128i(r, e);
26575	}
26576
26577	#[simd_test(enable = "avx512fp16,avx512vl")]
26578	unsafe fn test_mm_maskz_cvtph_epu64() {
26579	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26580	let r = _mm_maskz_cvtph_epu64(`0b01`, a);
26581	let e = _mm_set_epi64x(`0`, `2`);
26582	assert_eq_m128i(r, e);
26583	}
26584
26585	#[simd_test(enable = "avx512fp16,avx512vl")]
26586	unsafe fn test_mm256_cvtph_epu64() {
26587	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26588	let r = _mm256_cvtph_epu64(a);
26589	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26590	assert_eq_m256i(r, e);
26591	}
26592
26593	#[simd_test(enable = "avx512fp16,avx512vl")]
26594	unsafe fn test_mm256_mask_cvtph_epu64() {
26595	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26596	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26597	let r = _mm256_mask_cvtph_epu64(src, `0b0101`, a);
26598	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26599	assert_eq_m256i(r, e);
26600	}
26601
26602	#[simd_test(enable = "avx512fp16,avx512vl")]
26603	unsafe fn test_mm256_maskz_cvtph_epu64() {
26604	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26605	let r = _mm256_maskz_cvtph_epu64(`0b0101`, a);
26606	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26607	assert_eq_m256i(r, e);
26608	}
26609
26610	#[simd_test(enable = "avx512fp16")]
26611	unsafe fn test_mm512_cvtph_epu64() {
26612	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26613	let r = _mm512_cvtph_epu64(a);
26614	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26615	assert_eq_m512i(r, e);
26616	}
26617
26618	#[simd_test(enable = "avx512fp16")]
26619	unsafe fn test_mm512_mask_cvtph_epu64() {
26620	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26621	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26622	let r = _mm512_mask_cvtph_epu64(src, `0b01010101`, a);
26623	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26624	assert_eq_m512i(r, e);
26625	}
26626
26627	#[simd_test(enable = "avx512fp16")]
26628	unsafe fn test_mm512_maskz_cvtph_epu64() {
26629	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26630	let r = _mm512_maskz_cvtph_epu64(`0b01010101`, a);
26631	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26632	assert_eq_m512i(r, e);
26633	}
26634
26635	#[simd_test(enable = "avx512fp16")]
26636	unsafe fn test_mm512_cvt_roundph_epu64() {
26637	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26638	let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26639	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26640	assert_eq_m512i(r, e);
26641	}
26642
26643	#[simd_test(enable = "avx512fp16")]
26644	unsafe fn test_mm512_mask_cvt_roundph_epu64() {
26645	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26646	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26647	let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26648	src, `0b01010101`, a,
26649	);
26650	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26651	assert_eq_m512i(r, e);
26652	}
26653
26654	#[simd_test(enable = "avx512fp16")]
26655	unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
26656	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26657	let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26658	`0b01010101`, a,
26659	);
26660	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26661	assert_eq_m512i(r, e);
26662	}
26663
26664	#[simd_test(enable = "avx512fp16,avx512vl")]
26665	unsafe fn test_mm_cvttph_epi64() {
26666	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26667	let r = _mm_cvttph_epi64(a);
26668	let e = _mm_set_epi64x(`1`, `2`);
26669	assert_eq_m128i(r, e);
26670	}
26671
26672	#[simd_test(enable = "avx512fp16,avx512vl")]
26673	unsafe fn test_mm_mask_cvttph_epi64() {
26674	let src = _mm_set_epi64x(`3`, `4`);
26675	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26676	let r = _mm_mask_cvttph_epi64(src, `0b01`, a);
26677	let e = _mm_set_epi64x(`3`, `2`);
26678	assert_eq_m128i(r, e);
26679	}
26680
26681	#[simd_test(enable = "avx512fp16,avx512vl")]
26682	unsafe fn test_mm_maskz_cvttph_epi64() {
26683	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26684	let r = _mm_maskz_cvttph_epi64(`0b01`, a);
26685	let e = _mm_set_epi64x(`0`, `2`);
26686	assert_eq_m128i(r, e);
26687	}
26688
26689	#[simd_test(enable = "avx512fp16,avx512vl")]
26690	unsafe fn test_mm256_cvttph_epi64() {
26691	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26692	let r = _mm256_cvttph_epi64(a);
26693	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26694	assert_eq_m256i(r, e);
26695	}
26696
26697	#[simd_test(enable = "avx512fp16,avx512vl")]
26698	unsafe fn test_mm256_mask_cvttph_epi64() {
26699	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26700	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26701	let r = _mm256_mask_cvttph_epi64(src, `0b0101`, a);
26702	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26703	assert_eq_m256i(r, e);
26704	}
26705
26706	#[simd_test(enable = "avx512fp16,avx512vl")]
26707	unsafe fn test_mm256_maskz_cvttph_epi64() {
26708	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26709	let r = _mm256_maskz_cvttph_epi64(`0b0101`, a);
26710	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26711	assert_eq_m256i(r, e);
26712	}
26713
26714	#[simd_test(enable = "avx512fp16")]
26715	unsafe fn test_mm512_cvttph_epi64() {
26716	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26717	let r = _mm512_cvttph_epi64(a);
26718	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26719	assert_eq_m512i(r, e);
26720	}
26721
26722	#[simd_test(enable = "avx512fp16")]
26723	unsafe fn test_mm512_mask_cvttph_epi64() {
26724	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26725	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26726	let r = _mm512_mask_cvttph_epi64(src, `0b01010101`, a);
26727	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26728	assert_eq_m512i(r, e);
26729	}
26730
26731	#[simd_test(enable = "avx512fp16")]
26732	unsafe fn test_mm512_maskz_cvttph_epi64() {
26733	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26734	let r = _mm512_maskz_cvttph_epi64(`0b01010101`, a);
26735	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26736	assert_eq_m512i(r, e);
26737	}
26738
26739	#[simd_test(enable = "avx512fp16")]
26740	unsafe fn test_mm512_cvtt_roundph_epi64() {
26741	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26742	let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
26743	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26744	assert_eq_m512i(r, e);
26745	}
26746
26747	#[simd_test(enable = "avx512fp16")]
26748	unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
26749	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26750	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26751	let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
26752	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26753	assert_eq_m512i(r, e);
26754	}
26755
26756	#[simd_test(enable = "avx512fp16")]
26757	unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
26758	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26759	let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
26760	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26761	assert_eq_m512i(r, e);
26762	}
26763
26764	#[simd_test(enable = "avx512fp16,avx512vl")]
26765	unsafe fn test_mm_cvttph_epu64() {
26766	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26767	let r = _mm_cvttph_epu64(a);
26768	let e = _mm_set_epi64x(`1`, `2`);
26769	assert_eq_m128i(r, e);
26770	}
26771
26772	#[simd_test(enable = "avx512fp16,avx512vl")]
26773	unsafe fn test_mm_mask_cvttph_epu64() {
26774	let src = _mm_set_epi64x(`3`, `4`);
26775	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26776	let r = _mm_mask_cvttph_epu64(src, `0b01`, a);
26777	let e = _mm_set_epi64x(`3`, `2`);
26778	assert_eq_m128i(r, e);
26779	}
26780
26781	#[simd_test(enable = "avx512fp16,avx512vl")]
26782	unsafe fn test_mm_maskz_cvttph_epu64() {
26783	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26784	let r = _mm_maskz_cvttph_epu64(`0b01`, a);
26785	let e = _mm_set_epi64x(`0`, `2`);
26786	assert_eq_m128i(r, e);
26787	}
26788
26789	#[simd_test(enable = "avx512fp16,avx512vl")]
26790	unsafe fn test_mm256_cvttph_epu64() {
26791	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26792	let r = _mm256_cvttph_epu64(a);
26793	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26794	assert_eq_m256i(r, e);
26795	}
26796
26797	#[simd_test(enable = "avx512fp16,avx512vl")]
26798	unsafe fn test_mm256_mask_cvttph_epu64() {
26799	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26800	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26801	let r = _mm256_mask_cvttph_epu64(src, `0b0101`, a);
26802	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26803	assert_eq_m256i(r, e);
26804	}
26805
26806	#[simd_test(enable = "avx512fp16,avx512vl")]
26807	unsafe fn test_mm256_maskz_cvttph_epu64() {
26808	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26809	let r = _mm256_maskz_cvttph_epu64(`0b0101`, a);
26810	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26811	assert_eq_m256i(r, e);
26812	}
26813
26814	#[simd_test(enable = "avx512fp16")]
26815	unsafe fn test_mm512_cvttph_epu64() {
26816	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26817	let r = _mm512_cvttph_epu64(a);
26818	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26819	assert_eq_m512i(r, e);
26820	}
26821
26822	#[simd_test(enable = "avx512fp16")]
26823	unsafe fn test_mm512_mask_cvttph_epu64() {
26824	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26825	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26826	let r = _mm512_mask_cvttph_epu64(src, `0b01010101`, a);
26827	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26828	assert_eq_m512i(r, e);
26829	}
26830
26831	#[simd_test(enable = "avx512fp16")]
26832	unsafe fn test_mm512_maskz_cvttph_epu64() {
26833	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26834	let r = _mm512_maskz_cvttph_epu64(`0b01010101`, a);
26835	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26836	assert_eq_m512i(r, e);
26837	}
26838
26839	#[simd_test(enable = "avx512fp16")]
26840	unsafe fn test_mm512_cvtt_roundph_epu64() {
26841	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26842	let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
26843	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26844	assert_eq_m512i(r, e);
26845	}
26846
26847	#[simd_test(enable = "avx512fp16")]
26848	unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
26849	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26850	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26851	let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
26852	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26853	assert_eq_m512i(r, e);
26854	}
26855
26856	#[simd_test(enable = "avx512fp16")]
26857	unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
26858	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26859	let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
26860	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26861	assert_eq_m512i(r, e);
26862	}
26863
26864	#[simd_test(enable = "avx512fp16,avx512vl")]
26865	unsafe fn test_mm_cvtxph_ps() {
26866	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26867	let r = _mm_cvtxph_ps(a);
26868	let e = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
26869	assert_eq_m128(r, e);
26870	}
26871
26872	#[simd_test(enable = "avx512fp16,avx512vl")]
26873	unsafe fn test_mm_mask_cvtxph_ps() {
26874	let src = _mm_set_ps(`10.0`, `11.0`, `12.0`, `13.0`);
26875	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26876	let r = _mm_mask_cvtxph_ps(src, `0b0101`, a);
26877	let e = _mm_set_ps(`10.0`, `2.0`, `12.0`, `4.0`);
26878	assert_eq_m128(r, e);
26879	}
26880
26881	#[simd_test(enable = "avx512fp16,avx512vl")]
26882	unsafe fn test_mm_maskz_cvtxph_ps() {
26883	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26884	let r = _mm_maskz_cvtxph_ps(`0b0101`, a);
26885	let e = _mm_set_ps(`0.0`, `2.0`, `0.0`, `4.0`);
26886	assert_eq_m128(r, e);
26887	}
26888
26889	#[simd_test(enable = "avx512fp16,avx512vl")]
26890	unsafe fn test_mm256_cvtxph_ps() {
26891	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26892	let r = _mm256_cvtxph_ps(a);
26893	let e = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26894	assert_eq_m256(r, e);
26895	}
26896
26897	#[simd_test(enable = "avx512fp16,avx512vl")]
26898	unsafe fn test_mm256_mask_cvtxph_ps() {
26899	let src = _mm256_set_ps(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
26900	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26901	let r = _mm256_mask_cvtxph_ps(src, `0b01010101`, a);
26902	let e = _mm256_set_ps(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
26903	assert_eq_m256(r, e);
26904	}
26905
26906	#[simd_test(enable = "avx512fp16,avx512vl")]
26907	unsafe fn test_mm256_maskz_cvtxph_ps() {
26908	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26909	let r = _mm256_maskz_cvtxph_ps(`0b01010101`, a);
26910	let e = _mm256_set_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
26911	assert_eq_m256(r, e);
26912	}
26913
26914	#[simd_test(enable = "avx512fp16")]
26915	unsafe fn test_mm512_cvtxph_ps() {
26916	let a = _mm256_set_ph(
26917	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26918	);
26919	let r = _mm512_cvtxph_ps(a);
26920	let e = _mm512_set_ps(
26921	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26922	);
26923	assert_eq_m512(r, e);
26924	}
26925
26926	#[simd_test(enable = "avx512fp16")]
26927	unsafe fn test_mm512_mask_cvtxph_ps() {
26928	let src = _mm512_set_ps(
26929	`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`,
26930	`24.0`, `25.0`,
26931	);
26932	let a = _mm256_set_ph(
26933	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26934	);
26935	let r = _mm512_mask_cvtxph_ps(src, `0b0101010101010101`, a);
26936	let e = _mm512_set_ps(
26937	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
26938	`16.0`,
26939	);
26940	assert_eq_m512(r, e);
26941	}
26942
26943	#[simd_test(enable = "avx512fp16")]
26944	unsafe fn test_mm512_maskz_cvtxph_ps() {
26945	let a = _mm256_set_ph(
26946	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26947	);
26948	let r = _mm512_maskz_cvtxph_ps(`0b0101010101010101`, a);
26949	let e = _mm512_set_ps(
26950	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
26951	);
26952	assert_eq_m512(r, e);
26953	}
26954
26955	#[simd_test(enable = "avx512fp16")]
26956	unsafe fn test_mm512_cvtx_roundph_ps() {
26957	let a = _mm256_set_ph(
26958	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26959	);
26960	let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
26961	let e = _mm512_set_ps(
26962	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26963	);
26964	assert_eq_m512(r, e);
26965	}
26966
26967	#[simd_test(enable = "avx512fp16")]
26968	unsafe fn test_mm512_mask_cvtx_roundph_ps() {
26969	let src = _mm512_set_ps(
26970	`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`,
26971	`24.0`, `25.0`,
26972	);
26973	let a = _mm256_set_ph(
26974	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26975	);
26976	let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26977	let e = _mm512_set_ps(
26978	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
26979	`16.0`,
26980	);
26981	assert_eq_m512(r, e);
26982	}
26983
26984	#[simd_test(enable = "avx512fp16")]
26985	unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
26986	let a = _mm256_set_ph(
26987	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26988	);
26989	let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26990	let e = _mm512_set_ps(
26991	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
26992	);
26993	assert_eq_m512(r, e);
26994	}
26995
26996	#[simd_test(enable = "avx512fp16")]
26997	unsafe fn test_mm_cvtsh_ss() {
26998	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
26999	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27000	let r = _mm_cvtsh_ss(a, b);
27001	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27002	assert_eq_m128(r, e);
27003	}
27004
27005	#[simd_test(enable = "avx512fp16")]
27006	unsafe fn test_mm_mask_cvtsh_ss() {
27007	let src = _mm_setr_ps(`3.0`, `11.0`, `12.0`, `13.0`);
27008	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27009	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27010	let r = _mm_mask_cvtsh_ss(src, `0`, a, b);
27011	let e = _mm_setr_ps(`3.0`, `20.0`, `21.0`, `22.0`);
27012	assert_eq_m128(r, e);
27013	let r = _mm_mask_cvtsh_ss(src, `1`, a, b);
27014	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27015	assert_eq_m128(r, e);
27016	}
27017
27018	#[simd_test(enable = "avx512fp16")]
27019	unsafe fn test_mm_maskz_cvtsh_ss() {
27020	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27021	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27022	let r = _mm_maskz_cvtsh_ss(`0`, a, b);
27023	let e = _mm_setr_ps(`0.0`, `20.0`, `21.0`, `22.0`);
27024	assert_eq_m128(r, e);
27025	let r = _mm_maskz_cvtsh_ss(`1`, a, b);
27026	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27027	assert_eq_m128(r, e);
27028	}
27029
27030	#[simd_test(enable = "avx512fp16")]
27031	unsafe fn test_mm_cvt_roundsh_ss() {
27032	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27033	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27034	let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27035	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27036	assert_eq_m128(r, e);
27037	}
27038
27039	#[simd_test(enable = "avx512fp16")]
27040	unsafe fn test_mm_mask_cvt_roundsh_ss() {
27041	let src = _mm_setr_ps(`3.0`, `11.0`, `12.0`, `13.0`);
27042	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27043	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27044	let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
27045	let e = _mm_setr_ps(`3.0`, `20.0`, `21.0`, `22.0`);
27046	assert_eq_m128(r, e);
27047	let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
27048	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27049	assert_eq_m128(r, e);
27050	}
27051
27052	#[simd_test(enable = "avx512fp16")]
27053	unsafe fn test_mm_maskz_cvt_roundsh_ss() {
27054	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27055	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27056	let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(`0`, a, b);
27057	let e = _mm_setr_ps(`0.0`, `20.0`, `21.0`, `22.0`);
27058	assert_eq_m128(r, e);
27059	let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(`1`, a, b);
27060	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27061	assert_eq_m128(r, e);
27062	}
27063
27064	#[simd_test(enable = "avx512fp16,avx512vl")]
27065	unsafe fn test_mm_cvtph_pd() {
27066	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27067	let r = _mm_cvtph_pd(a);
27068	let e = _mm_set_pd(`1.0`, `2.0`);
27069	assert_eq_m128d(r, e);
27070	}
27071
27072	#[simd_test(enable = "avx512fp16,avx512vl")]
27073	unsafe fn test_mm_mask_cvtph_pd() {
27074	let src = _mm_set_pd(`10.0`, `11.0`);
27075	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27076	let r = _mm_mask_cvtph_pd(src, `0b01`, a);
27077	let e = _mm_set_pd(`10.0`, `2.0`);
27078	assert_eq_m128d(r, e);
27079	}
27080
27081	#[simd_test(enable = "avx512fp16,avx512vl")]
27082	unsafe fn test_mm_maskz_cvtph_pd() {
27083	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27084	let r = _mm_maskz_cvtph_pd(`0b01`, a);
27085	let e = _mm_set_pd(`0.0`, `2.0`);
27086	assert_eq_m128d(r, e);
27087	}
27088
27089	#[simd_test(enable = "avx512fp16,avx512vl")]
27090	unsafe fn test_mm256_cvtph_pd() {
27091	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27092	let r = _mm256_cvtph_pd(a);
27093	let e = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
27094	assert_eq_m256d(r, e);
27095	}
27096
27097	#[simd_test(enable = "avx512fp16,avx512vl")]
27098	unsafe fn test_mm256_mask_cvtph_pd() {
27099	let src = _mm256_set_pd(`10.0`, `11.0`, `12.0`, `13.0`);
27100	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27101	let r = _mm256_mask_cvtph_pd(src, `0b0101`, a);
27102	let e = _mm256_set_pd(`10.0`, `2.0`, `12.0`, `4.0`);
27103	assert_eq_m256d(r, e);
27104	}
27105
27106	#[simd_test(enable = "avx512fp16,avx512vl")]
27107	unsafe fn test_mm256_maskz_cvtph_pd() {
27108	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27109	let r = _mm256_maskz_cvtph_pd(`0b0101`, a);
27110	let e = _mm256_set_pd(`0.0`, `2.0`, `0.0`, `4.0`);
27111	assert_eq_m256d(r, e);
27112	}
27113
27114	#[simd_test(enable = "avx512fp16")]
27115	unsafe fn test_mm512_cvtph_pd() {
27116	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27117	let r = _mm512_cvtph_pd(a);
27118	let e = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27119	assert_eq_m512d(r, e);
27120	}
27121
27122	#[simd_test(enable = "avx512fp16")]
27123	unsafe fn test_mm512_mask_cvtph_pd() {
27124	let src = _mm512_set_pd(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
27125	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27126	let r = _mm512_mask_cvtph_pd(src, `0b01010101`, a);
27127	let e = _mm512_set_pd(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
27128	assert_eq_m512d(r, e);
27129	}
27130
27131	#[simd_test(enable = "avx512fp16")]
27132	unsafe fn test_mm512_maskz_cvtph_pd() {
27133	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27134	let r = _mm512_maskz_cvtph_pd(`0b01010101`, a);
27135	let e = _mm512_set_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
27136	assert_eq_m512d(r, e);
27137	}
27138
27139	#[simd_test(enable = "avx512fp16")]
27140	unsafe fn test_mm512_cvt_roundph_pd() {
27141	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27142	let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27143	let e = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27144	assert_eq_m512d(r, e);
27145	}
27146
27147	#[simd_test(enable = "avx512fp16")]
27148	unsafe fn test_mm512_mask_cvt_roundph_pd() {
27149	let src = _mm512_set_pd(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
27150	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27151	let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
27152	let e = _mm512_set_pd(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
27153	assert_eq_m512d(r, e);
27154	}
27155
27156	#[simd_test(enable = "avx512fp16")]
27157	unsafe fn test_mm512_maskz_cvt_roundph_pd() {
27158	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27159	let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
27160	let e = _mm512_set_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
27161	assert_eq_m512d(r, e);
27162	}
27163
27164	#[simd_test(enable = "avx512fp16")]
27165	unsafe fn test_mm_cvtsh_sd() {
27166	let a = _mm_setr_pd(`2.0`, `20.0`);
27167	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27168	let r = _mm_cvtsh_sd(a, b);
27169	let e = _mm_setr_pd(`1.0`, `20.0`);
27170	assert_eq_m128d(r, e);
27171	}
27172
27173	#[simd_test(enable = "avx512fp16")]
27174	unsafe fn test_mm_mask_cvtsh_sd() {
27175	let src = _mm_setr_pd(`3.0`, `11.0`);
27176	let a = _mm_setr_pd(`2.0`, `20.0`);
27177	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27178	let r = _mm_mask_cvtsh_sd(src, `0`, a, b);
27179	let e = _mm_setr_pd(`3.0`, `20.0`);
27180	assert_eq_m128d(r, e);
27181	let r = _mm_mask_cvtsh_sd(src, `1`, a, b);
27182	let e = _mm_setr_pd(`1.0`, `20.0`);
27183	assert_eq_m128d(r, e);
27184	}
27185
27186	#[simd_test(enable = "avx512fp16")]
27187	unsafe fn test_mm_maskz_cvtsh_sd() {
27188	let a = _mm_setr_pd(`2.0`, `20.0`);
27189	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27190	let r = _mm_maskz_cvtsh_sd(`0`, a, b);
27191	let e = _mm_setr_pd(`0.0`, `20.0`);
27192	assert_eq_m128d(r, e);
27193	let r = _mm_maskz_cvtsh_sd(`1`, a, b);
27194	let e = _mm_setr_pd(`1.0`, `20.0`);
27195	assert_eq_m128d(r, e);
27196	}
27197
27198	#[simd_test(enable = "avx512fp16")]
27199	unsafe fn test_mm_cvt_roundsh_sd() {
27200	let a = _mm_setr_pd(`2.0`, `20.0`);
27201	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27202	let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27203	let e = _mm_setr_pd(`1.0`, `20.0`);
27204	assert_eq_m128d(r, e);
27205	}
27206
27207	#[simd_test(enable = "avx512fp16")]
27208	unsafe fn test_mm_mask_cvt_roundsh_sd() {
27209	let src = _mm_setr_pd(`3.0`, `11.0`);
27210	let a = _mm_setr_pd(`2.0`, `20.0`);
27211	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27212	let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
27213	let e = _mm_setr_pd(`3.0`, `20.0`);
27214	assert_eq_m128d(r, e);
27215	let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
27216	let e = _mm_setr_pd(`1.0`, `20.0`);
27217	assert_eq_m128d(r, e);
27218	}
27219
27220	#[simd_test(enable = "avx512fp16")]
27221	unsafe fn test_mm_maskz_cvt_roundsh_sd() {
27222	let a = _mm_setr_pd(`2.0`, `20.0`);
27223	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27224	let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(`0`, a, b);
27225	let e = _mm_setr_pd(`0.0`, `20.0`);
27226	assert_eq_m128d(r, e);
27227	let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(`1`, a, b);
27228	let e = _mm_setr_pd(`1.0`, `20.0`);
27229	assert_eq_m128d(r, e);
27230	}
27231
27232	#[simd_test(enable = "avx512fp16")]
27233	unsafe fn test_mm_cvtsh_h() {
27234	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27235	let r = _mm_cvtsh_h(a);
27236	assert_eq!(r, `1.0`);
27237	}
27238
27239	#[simd_test(enable = "avx512fp16")]
27240	unsafe fn test_mm256_cvtsh_h() {
27241	let a = _mm256_setr_ph(
27242	`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
27243	);
27244	let r = _mm256_cvtsh_h(a);
27245	assert_eq!(r, `1.0`);
27246	}
27247
27248	#[simd_test(enable = "avx512fp16")]
27249	unsafe fn test_mm512_cvtsh_h() {
27250	let a = _mm512_setr_ph(
27251	`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
27252	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
27253	`31.0`, `32.0`,
27254	);
27255	let r = _mm512_cvtsh_h(a);
27256	assert_eq!(r, `1.0`);
27257	}
27258
27259	#[simd_test(enable = "avx512fp16")]
27260	unsafe fn test_mm_cvtsi128_si16() {
27261	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
27262	let r = _mm_cvtsi128_si16(a);
27263	assert_eq!(r, `1`);
27264	}
27265
27266	#[simd_test(enable = "avx512fp16")]
27267	unsafe fn test_mm_cvtsi16_si128() {
27268	let a = `1`;
27269	let r = _mm_cvtsi16_si128(a);
27270	let e = _mm_setr_epi16(`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
27271	assert_eq_m128i(r, e);
27272	}
27273	}
27274