avx512fp16.rs source code [crates/core_arch/src/x86/avx512fp16.rs]

1	use crate::arch::asm;
2	use crate::core_arch::{simd::, x86::};
3	use crate::intrinsics::{fmaf16, simd::*};
4	use crate::ptr;
5
6	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7	///
8	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9	#[inline]
10	#[target_feature(enable = "avx512fp16")]
11	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12	pub fn _mm_set_ph(
13	e7: f16,
14	e6: f16,
15	e5: f16,
16	e4: f16,
17	e3: f16,
18	e2: f16,
19	e1: f16,
20	e0: f16,
21	) -> __m128h {
22	__m128h([e0, e1, e2, e3, e4, e5, e6, e7])
23	}
24
25	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
26	///
27	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
28	#[inline]
29	#[target_feature(enable = "avx512fp16")]
30	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
31	pub fn _mm256_set_ph(
32	e15: f16,
33	e14: f16,
34	e13: f16,
35	e12: f16,
36	e11: f16,
37	e10: f16,
38	e9: f16,
39	e8: f16,
40	e7: f16,
41	e6: f16,
42	e5: f16,
43	e4: f16,
44	e3: f16,
45	e2: f16,
46	e1: f16,
47	e0: f16,
48	) -> __m256h {
49	__m256h([
50	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
51	])
52	}
53
54	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
55	///
56	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
57	#[inline]
58	#[target_feature(enable = "avx512fp16")]
59	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
60	pub fn _mm512_set_ph(
61	e31: f16,
62	e30: f16,
63	e29: f16,
64	e28: f16,
65	e27: f16,
66	e26: f16,
67	e25: f16,
68	e24: f16,
69	e23: f16,
70	e22: f16,
71	e21: f16,
72	e20: f16,
73	e19: f16,
74	e18: f16,
75	e17: f16,
76	e16: f16,
77	e15: f16,
78	e14: f16,
79	e13: f16,
80	e12: f16,
81	e11: f16,
82	e10: f16,
83	e9: f16,
84	e8: f16,
85	e7: f16,
86	e6: f16,
87	e5: f16,
88	e4: f16,
89	e3: f16,
90	e2: f16,
91	e1: f16,
92	e0: f16,
93	) -> __m512h {
94	__m512h([
95	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
96	e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
97	])
98	}
99
100	/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
101	/// the upper 7 elements.
102	///
103	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
104	#[inline]
105	#[target_feature(enable = "avx512fp16")]
106	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
107	pub fn _mm_set_sh(a: f16) -> __m128h {
108	__m128h([a, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`])
109	}
110
111	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
112	///
113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
114	#[inline]
115	#[target_feature(enable = "avx512fp16")]
116	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
117	pub fn _mm_set1_ph(a: f16) -> __m128h {
118	unsafe { transmute(src:f16x8::splat(a)) }
119	}
120
121	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
122	///
123	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
124	#[inline]
125	#[target_feature(enable = "avx512fp16")]
126	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127	pub fn _mm256_set1_ph(a: f16) -> __m256h {
128	unsafe { transmute(src:f16x16::splat(a)) }
129	}
130
131	/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
132	///
133	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
134	#[inline]
135	#[target_feature(enable = "avx512fp16")]
136	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
137	pub fn _mm512_set1_ph(a: f16) -> __m512h {
138	unsafe { transmute(src:f16x32::splat(a)) }
139	}
140
141	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
142	///
143	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
144	#[inline]
145	#[target_feature(enable = "avx512fp16")]
146	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
147	pub fn _mm_setr_ph(
148	e0: f16,
149	e1: f16,
150	e2: f16,
151	e3: f16,
152	e4: f16,
153	e5: f16,
154	e6: f16,
155	e7: f16,
156	) -> __m128h {
157	__m128h([e0, e1, e2, e3, e4, e5, e6, e7])
158	}
159
160	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
161	///
162	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
163	#[inline]
164	#[target_feature(enable = "avx512fp16")]
165	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
166	pub fn _mm256_setr_ph(
167	e0: f16,
168	e1: f16,
169	e2: f16,
170	e3: f16,
171	e4: f16,
172	e5: f16,
173	e6: f16,
174	e7: f16,
175	e8: f16,
176	e9: f16,
177	e10: f16,
178	e11: f16,
179	e12: f16,
180	e13: f16,
181	e14: f16,
182	e15: f16,
183	) -> __m256h {
184	__m256h([
185	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
186	])
187	}
188
189	/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
190	///
191	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
192	#[inline]
193	#[target_feature(enable = "avx512fp16")]
194	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
195	pub fn _mm512_setr_ph(
196	e0: f16,
197	e1: f16,
198	e2: f16,
199	e3: f16,
200	e4: f16,
201	e5: f16,
202	e6: f16,
203	e7: f16,
204	e8: f16,
205	e9: f16,
206	e10: f16,
207	e11: f16,
208	e12: f16,
209	e13: f16,
210	e14: f16,
211	e15: f16,
212	e16: f16,
213	e17: f16,
214	e18: f16,
215	e19: f16,
216	e20: f16,
217	e21: f16,
218	e22: f16,
219	e23: f16,
220	e24: f16,
221	e25: f16,
222	e26: f16,
223	e27: f16,
224	e28: f16,
225	e29: f16,
226	e30: f16,
227	e31: f16,
228	) -> __m512h {
229	__m512h([
230	e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
231	e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
232	])
233	}
234
235	/// Return vector of type __m128h with all elements set to zero.
236	///
237	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
238	#[inline]
239	#[target_feature(enable = "avx512fp16,avx512vl")]
240	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
241	pub fn _mm_setzero_ph() -> __m128h {
242	unsafe { transmute(src:f16x8::ZERO) }
243	}
244
245	/// Return vector of type __m256h with all elements set to zero.
246	///
247	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
248	#[inline]
249	#[target_feature(enable = "avx512fp16,avx512vl")]
250	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
251	pub fn _mm256_setzero_ph() -> __m256h {
252	f16x16::ZERO.as_m256h()
253	}
254
255	/// Return vector of type __m512h with all elements set to zero.
256	///
257	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
258	#[inline]
259	#[target_feature(enable = "avx512fp16")]
260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
261	pub fn _mm512_setzero_ph() -> __m512h {
262	f16x32::ZERO.as_m512h()
263	}
264
265	/// Return vector of type `__m128h` with indetermination elements.
266	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
267	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
268	/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
269	///
270	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
271	#[inline]
272	#[target_feature(enable = "avx512fp16,avx512vl")]
273	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
274	pub fn _mm_undefined_ph() -> __m128h {
275	f16x8::ZERO.as_m128h()
276	}
277
278	/// Return vector of type `__m256h` with indetermination elements.
279	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
280	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
281	/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
282	///
283	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
284	#[inline]
285	#[target_feature(enable = "avx512fp16,avx512vl")]
286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
287	pub fn _mm256_undefined_ph() -> __m256h {
288	f16x16::ZERO.as_m256h()
289	}
290
291	/// Return vector of type `__m512h` with indetermination elements.
292	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
293	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
294	/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
295	///
296	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
297	#[inline]
298	#[target_feature(enable = "avx512fp16")]
299	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
300	pub fn _mm512_undefined_ph() -> __m512h {
301	f16x32::ZERO.as_m512h()
302	}
303
304	/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
305	/// does not generate any instructions, thus it has zero latency.
306	///
307	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
308	#[inline]
309	#[target_feature(enable = "avx512fp16")]
310	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
311	pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
312	unsafe { transmute(src:a) }
313	}
314
315	/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
316	/// does not generate any instructions, thus it has zero latency.
317	///
318	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
319	#[inline]
320	#[target_feature(enable = "avx512fp16")]
321	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
322	pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
323	unsafe { transmute(src:a) }
324	}
325
326	/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
327	/// does not generate any instructions, thus it has zero latency.
328	///
329	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
330	#[inline]
331	#[target_feature(enable = "avx512fp16")]
332	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
333	pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
334	unsafe { transmute(src:a) }
335	}
336
337	/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
338	/// does not generate any instructions, thus it has zero latency.
339	///
340	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
341	#[inline]
342	#[target_feature(enable = "avx512fp16")]
343	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
344	pub fn _mm_castph_pd(a: __m128h) -> __m128d {
345	unsafe { transmute(src:a) }
346	}
347
348	/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
349	/// does not generate any instructions, thus it has zero latency.
350	///
351	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
352	#[inline]
353	#[target_feature(enable = "avx512fp16")]
354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
355	pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
356	unsafe { transmute(src:a) }
357	}
358
359	/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
360	/// does not generate any instructions, thus it has zero latency.
361	///
362	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
363	#[inline]
364	#[target_feature(enable = "avx512fp16")]
365	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
366	pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
367	unsafe { transmute(src:a) }
368	}
369
370	/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
371	/// does not generate any instructions, thus it has zero latency.
372	///
373	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
374	#[inline]
375	#[target_feature(enable = "avx512fp16")]
376	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
377	pub fn _mm_castps_ph(a: __m128) -> __m128h {
378	unsafe { transmute(src:a) }
379	}
380
381	/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
382	/// does not generate any instructions, thus it has zero latency.
383	///
384	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
385	#[inline]
386	#[target_feature(enable = "avx512fp16")]
387	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
388	pub fn _mm256_castps_ph(a: __m256) -> __m256h {
389	unsafe { transmute(src:a) }
390	}
391
392	/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
393	/// does not generate any instructions, thus it has zero latency.
394	///
395	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
396	#[inline]
397	#[target_feature(enable = "avx512fp16")]
398	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
399	pub fn _mm512_castps_ph(a: __m512) -> __m512h {
400	unsafe { transmute(src:a) }
401	}
402
403	/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
404	/// does not generate any instructions, thus it has zero latency.
405	///
406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
407	#[inline]
408	#[target_feature(enable = "avx512fp16")]
409	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
410	pub fn _mm_castph_ps(a: __m128h) -> __m128 {
411	unsafe { transmute(src:a) }
412	}
413
414	/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
415	/// does not generate any instructions, thus it has zero latency.
416	///
417	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
418	#[inline]
419	#[target_feature(enable = "avx512fp16")]
420	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
421	pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
422	unsafe { transmute(src:a) }
423	}
424
425	/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
426	/// does not generate any instructions, thus it has zero latency.
427	///
428	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
429	#[inline]
430	#[target_feature(enable = "avx512fp16")]
431	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
432	pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
433	unsafe { transmute(src:a) }
434	}
435
436	/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
437	/// does not generate any instructions, thus it has zero latency.
438	///
439	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
440	#[inline]
441	#[target_feature(enable = "avx512fp16")]
442	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
443	pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
444	unsafe { transmute(src:a) }
445	}
446
447	/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
448	/// does not generate any instructions, thus it has zero latency.
449	///
450	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
451	#[inline]
452	#[target_feature(enable = "avx512fp16")]
453	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
454	pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
455	unsafe { transmute(src:a) }
456	}
457
458	/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
459	/// does not generate any instructions, thus it has zero latency.
460	///
461	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
462	#[inline]
463	#[target_feature(enable = "avx512fp16")]
464	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
465	pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
466	unsafe { transmute(src:a) }
467	}
468
469	/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
470	/// does not generate any instructions, thus it has zero latency.
471	///
472	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
473	#[inline]
474	#[target_feature(enable = "avx512fp16")]
475	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
476	pub fn _mm_castph_si128(a: __m128h) -> __m128i {
477	unsafe { transmute(src:a) }
478	}
479
480	/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
481	/// does not generate any instructions, thus it has zero latency.
482	///
483	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
484	#[inline]
485	#[target_feature(enable = "avx512fp16")]
486	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
487	pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
488	unsafe { transmute(src:a) }
489	}
490
491	/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
492	/// does not generate any instructions, thus it has zero latency.
493	///
494	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
495	#[inline]
496	#[target_feature(enable = "avx512fp16")]
497	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
498	pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
499	unsafe { transmute(src:a) }
500	}
501
502	/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
503	/// does not generate any instructions, thus it has zero latency.
504	///
505	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
506	#[inline]
507	#[target_feature(enable = "avx512fp16")]
508	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
509	pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
510	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
511	}
512
513	/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
514	/// does not generate any instructions, thus it has zero latency.
515	///
516	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
517	#[inline]
518	#[target_feature(enable = "avx512fp16")]
519	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
520	pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
521	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
522	}
523
524	/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
525	/// does not generate any instructions, thus it has zero latency.
526	///
527	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
528	#[inline]
529	#[target_feature(enable = "avx512fp16")]
530	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
531	pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
532	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]) }
533	}
534
535	/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
536	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
537	/// but most of the time it does not generate any instructions.
538	///
539	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
540	#[inline]
541	#[target_feature(enable = "avx512fp16")]
542	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
543	pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
544	unsafe {
545	simd_shuffle!(
546	a,
547	_mm_undefined_ph(),
548	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`]
549	)
550	}
551	}
552
553	/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
554	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
555	/// but most of the time it does not generate any instructions.
556	///
557	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
558	#[inline]
559	#[target_feature(enable = "avx512fp16")]
560	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
561	pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
562	unsafe {
563	simd_shuffle!(
564	a,
565	_mm_undefined_ph(),
566	[
567	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`,
568	`8`, `8`, `8`, `8`
569	]
570	)
571	}
572	}
573
574	/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
575	/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
576	/// but most of the time it does not generate any instructions.
577	///
578	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
579	#[inline]
580	#[target_feature(enable = "avx512fp16")]
581	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
582	pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
583	unsafe {
584	simd_shuffle!(
585	a,
586	_mm256_undefined_ph(),
587	[
588	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `16`, `16`, `16`, `16`, `16`, `16`,
589	`16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`
590	]
591	)
592	}
593	}
594
595	/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
596	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
597	/// any instructions.
598	///
599	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
600	#[inline]
601	#[target_feature(enable = "avx512fp16")]
602	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
603	pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
604	unsafe {
605	simd_shuffle!(
606	a,
607	_mm_setzero_ph(),
608	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`]
609	)
610	}
611	}
612
613	/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
614	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
615	/// any instructions.
616	///
617	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
618	#[inline]
619	#[target_feature(enable = "avx512fp16")]
620	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
621	pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
622	unsafe {
623	simd_shuffle!(
624	a,
625	_mm256_setzero_ph(),
626	[
627	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `16`, `16`, `16`, `16`, `16`, `16`,
628	`16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`
629	]
630	)
631	}
632	}
633
634	/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
635	/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
636	/// any instructions.
637	///
638	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
639	#[inline]
640	#[target_feature(enable = "avx512fp16")]
641	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
642	pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
643	unsafe {
644	simd_shuffle!(
645	a,
646	_mm_setzero_ph(),
647	[
648	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`,
649	`8`, `8`, `8`, `8`
650	]
651	)
652	}
653	}
654
655	macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
656	($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
657	let dst: $mask_type;
658	asm!(
659	"vcmpph {k}, {a}, {b}, {imm8}",
660	k = lateout(kreg) dst,
661	a = in($reg) $a,
662	b = in($reg) $b,
663	imm8 = const IMM5,
664	options(pure, nomem, nostack)
665	);
666	dst
667	}};
668	($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
669	let dst: $mask_type;
670	asm!(
671	"vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
672	k = lateout(kreg) dst,
673	mask = in(kreg) $mask,
674	a = in($reg) $a,
675	b = in($reg) $b,
676	imm8 = const IMM5,
677	options(pure, nomem, nostack)
678	);
679	dst
680	}};
681	}
682
683	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
684	/// operand specified by imm8, and store the results in mask vector k.
685	///
686	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
687	#[inline]
688	#[target_feature(enable = "avx512fp16,avx512vl")]
689	#[rustc_legacy_const_generics(`2`)]
690	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
691	pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
692	unsafe {
693	static_assert_uimm_bits!(IMM5, `5`);
694	cmp_asm!(__mmask8, xmm_reg, a, b)
695	}
696	}
697
698	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
699	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
700	/// zeroed out when the corresponding mask bit is not set).
701	///
702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
703	#[inline]
704	#[target_feature(enable = "avx512fp16,avx512vl")]
705	#[rustc_legacy_const_generics(`3`)]
706	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
707	pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
708	unsafe {
709	static_assert_uimm_bits!(IMM5, `5`);
710	cmp_asm!(__mmask8, k1, xmm_reg, a, b)
711	}
712	}
713
714	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
715	/// operand specified by imm8, and store the results in mask vector k.
716	///
717	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
718	#[inline]
719	#[target_feature(enable = "avx512fp16,avx512vl")]
720	#[rustc_legacy_const_generics(`2`)]
721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
722	pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
723	unsafe {
724	static_assert_uimm_bits!(IMM5, `5`);
725	cmp_asm!(__mmask16, ymm_reg, a, b)
726	}
727	}
728
729	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
730	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
731	/// zeroed out when the corresponding mask bit is not set).
732	///
733	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
734	#[inline]
735	#[target_feature(enable = "avx512fp16,avx512vl")]
736	#[rustc_legacy_const_generics(`3`)]
737	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
738	pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
739	k1: __mmask16,
740	a: __m256h,
741	b: __m256h,
742	) -> __mmask16 {
743	unsafe {
744	static_assert_uimm_bits!(IMM5, `5`);
745	cmp_asm!(__mmask16, k1, ymm_reg, a, b)
746	}
747	}
748
749	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
750	/// operand specified by imm8, and store the results in mask vector k.
751	///
752	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
753	#[inline]
754	#[target_feature(enable = "avx512fp16")]
755	#[rustc_legacy_const_generics(`2`)]
756	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
757	pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
758	unsafe {
759	static_assert_uimm_bits!(IMM5, `5`);
760	cmp_asm!(__mmask32, zmm_reg, a, b)
761	}
762	}
763
764	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
765	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
766	/// zeroed out when the corresponding mask bit is not set).
767	///
768	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
769	#[inline]
770	#[target_feature(enable = "avx512fp16")]
771	#[rustc_legacy_const_generics(`3`)]
772	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
773	pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
774	k1: __mmask32,
775	a: __m512h,
776	b: __m512h,
777	) -> __mmask32 {
778	unsafe {
779	static_assert_uimm_bits!(IMM5, `5`);
780	cmp_asm!(__mmask32, k1, zmm_reg, a, b)
781	}
782	}
783
784	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
785	/// operand specified by imm8, and store the results in mask vector k.
786	///
787	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
788	///
789	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
790	#[inline]
791	#[target_feature(enable = "avx512fp16")]
792	#[rustc_legacy_const_generics(`2`, `3`)]
793	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
794	pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
795	a: __m512h,
796	b: __m512h,
797	) -> __mmask32 {
798	unsafe {
799	static_assert_uimm_bits!(IMM5, `5`);
800	static_assert_sae!(SAE);
801	if SAE == _MM_FROUND_NO_EXC {
802	let dst: __mmask32;
803	asm!(
804	"vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
805	k = lateout(kreg) dst,
806	a = in(zmm_reg) a,
807	b = in(zmm_reg) b,
808	imm8 = const IMM5,
809	options(pure, nomem, nostack)
810	);
811	dst
812	} else {
813	cmp_asm!(__mmask32, zmm_reg, a, b)
814	}
815	}
816	}
817
818	/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
819	/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
820	/// zeroed out when the corresponding mask bit is not set).
821	///
822	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
823	///
824	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
825	#[inline]
826	#[target_feature(enable = "avx512fp16")]
827	#[rustc_legacy_const_generics(`3`, `4`)]
828	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
829	pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
830	k1: __mmask32,
831	a: __m512h,
832	b: __m512h,
833	) -> __mmask32 {
834	unsafe {
835	static_assert_uimm_bits!(IMM5, `5`);
836	static_assert_sae!(SAE);
837	if SAE == _MM_FROUND_NO_EXC {
838	let dst: __mmask32;
839	asm!(
840	"vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
841	k = lateout(kreg) dst,
842	k1 = in(kreg) k1,
843	a = in(zmm_reg) a,
844	b = in(zmm_reg) b,
845	imm8 = const IMM5,
846	options(pure, nomem, nostack)
847	);
848	dst
849	} else {
850	cmp_asm!(__mmask32, k1, zmm_reg, a, b)
851	}
852	}
853	}
854
855	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
856	/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
857	/// passing _MM_FROUND_NO_EXC in the sae parameter.
858	///
859	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
860	#[inline]
861	#[target_feature(enable = "avx512fp16")]
862	#[rustc_legacy_const_generics(`2`, `3`)]
863	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
864	pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
865	static_assert_uimm_bits!(IMM5, `5`);
866	static_assert_sae!(SAE);
867	_mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:`0xff`, a, b)
868	}
869
870	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
871	/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
872	/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
873	///
874	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
875	#[inline]
876	#[target_feature(enable = "avx512fp16")]
877	#[rustc_legacy_const_generics(`3`, `4`)]
878	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
879	pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
880	k1: __mmask8,
881	a: __m128h,
882	b: __m128h,
883	) -> __mmask8 {
884	unsafe {
885	static_assert_uimm_bits!(IMM5, `5`);
886	static_assert_sae!(SAE);
887	vcmpsh(a, b, IMM5, mask:k1, SAE)
888	}
889	}
890
891	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
892	/// operand specified by imm8, and store the result in mask vector k.
893	///
894	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
895	#[inline]
896	#[target_feature(enable = "avx512fp16")]
897	#[rustc_legacy_const_generics(`2`)]
898	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
899	pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
900	static_assert_uimm_bits!(IMM5, `5`);
901	_mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
902	}
903
904	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
905	/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
906	///
907	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
908	#[inline]
909	#[target_feature(enable = "avx512fp16")]
910	#[rustc_legacy_const_generics(`3`)]
911	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
912	pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
913	static_assert_uimm_bits!(IMM5, `5`);
914	_mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
915	}
916
917	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
918	/// operand specified by imm8, and return the boolean result (0 or 1).
919	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
920	///
921	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
922	#[inline]
923	#[target_feature(enable = "avx512fp16")]
924	#[rustc_legacy_const_generics(`2`, `3`)]
925	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
926	pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
927	unsafe {
928	static_assert_uimm_bits!(IMM5, `5`);
929	static_assert_sae!(SAE);
930	vcomish(a, b, IMM5, SAE)
931	}
932	}
933
934	/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
935	/// operand specified by imm8, and return the boolean result (0 or 1).
936	///
937	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
938	#[inline]
939	#[target_feature(enable = "avx512fp16")]
940	#[rustc_legacy_const_generics(`2`)]
941	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
942	pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
943	static_assert_uimm_bits!(IMM5, `5`);
944	_mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
945	}
946
947	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
948	/// the boolean result (0 or 1).
949	///
950	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
951	#[inline]
952	#[target_feature(enable = "avx512fp16")]
953	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
954	pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
955	_mm_comi_sh::<_CMP_EQ_OS>(a, b)
956	}
957
958	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
959	/// and return the boolean result (0 or 1).
960	///
961	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
962	#[inline]
963	#[target_feature(enable = "avx512fp16")]
964	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
965	pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
966	_mm_comi_sh::<_CMP_GE_OS>(a, b)
967	}
968
969	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
970	/// the boolean result (0 or 1).
971	///
972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
973	#[inline]
974	#[target_feature(enable = "avx512fp16")]
975	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
976	pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
977	_mm_comi_sh::<_CMP_GT_OS>(a, b)
978	}
979
980	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
981	/// return the boolean result (0 or 1).
982	///
983	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
984	#[inline]
985	#[target_feature(enable = "avx512fp16")]
986	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
987	pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
988	_mm_comi_sh::<_CMP_LE_OS>(a, b)
989	}
990
991	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
992	/// the boolean result (0 or 1).
993	///
994	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
995	#[inline]
996	#[target_feature(enable = "avx512fp16")]
997	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
998	pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
999	_mm_comi_sh::<_CMP_LT_OS>(a, b)
1000	}
1001
1002	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1003	/// the boolean result (0 or 1).
1004	///
1005	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1006	#[inline]
1007	#[target_feature(enable = "avx512fp16")]
1008	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1009	pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1010	_mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1011	}
1012
1013	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1014	/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1015	///
1016	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1017	#[inline]
1018	#[target_feature(enable = "avx512fp16")]
1019	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1020	pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1021	_mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1022	}
1023
1024	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1025	/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1026	///
1027	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1028	#[inline]
1029	#[target_feature(enable = "avx512fp16")]
1030	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1031	pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1032	_mm_comi_sh::<_CMP_GE_OQ>(a, b)
1033	}
1034
1035	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1036	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1037	///
1038	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1039	#[inline]
1040	#[target_feature(enable = "avx512fp16")]
1041	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1042	pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1043	_mm_comi_sh::<_CMP_GT_OQ>(a, b)
1044	}
1045
1046	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1047	/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1048	///
1049	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1050	#[inline]
1051	#[target_feature(enable = "avx512fp16")]
1052	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1053	pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1054	_mm_comi_sh::<_CMP_LE_OQ>(a, b)
1055	}
1056
1057	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1058	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1059	///
1060	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1061	#[inline]
1062	#[target_feature(enable = "avx512fp16")]
1063	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1064	pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1065	_mm_comi_sh::<_CMP_LT_OQ>(a, b)
1066	}
1067
1068	/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1069	/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1070	///
1071	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1072	#[inline]
1073	#[target_feature(enable = "avx512fp16")]
1074	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1075	pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1076	_mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1077	}
1078
1079	/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1080	/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1081	///
1082	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1083	#[inline]
1084	#[target_feature(enable = "avx512fp16,avx512vl")]
1085	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1086	pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1087	*mem_addr.cast()
1088	}
1089
1090	/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1091	/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1092	///
1093	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1094	#[inline]
1095	#[target_feature(enable = "avx512fp16,avx512vl")]
1096	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1097	pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1098	*mem_addr.cast()
1099	}
1100
1101	/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1102	/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1103	///
1104	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1105	#[inline]
1106	#[target_feature(enable = "avx512fp16")]
1107	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1108	pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1109	*mem_addr.cast()
1110	}
1111
1112	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1113	/// and zero the upper elements
1114	///
1115	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1116	#[inline]
1117	#[target_feature(enable = "avx512fp16")]
1118	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1119	pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1120	_mm_set_sh(*mem_addr)
1121	}
1122
1123	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1124	/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1125	///
1126	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1127	#[inline]
1128	#[target_feature(enable = "avx512fp16")]
1129	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1130	pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1131	let mut dst: __m128h = src;
1132	asm!(
1133	vpl!("vmovsh {dst}{{{k}}}"),
1134	dst = inout(xmm_reg) dst,
1135	k = in(kreg) k,
1136	p = in(reg) mem_addr,
1137	options(pure, readonly, nostack, preserves_flags)
1138	);
1139	dst
1140	}
1141
1142	/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1143	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1144	///
1145	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1146	#[inline]
1147	#[target_feature(enable = "avx512fp16")]
1148	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1149	pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1150	let mut dst: __m128h;
1151	asm!(
1152	vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1153	dst = out(xmm_reg) dst,
1154	k = in(kreg) k,
1155	p = in(reg) mem_addr,
1156	options(pure, readonly, nostack, preserves_flags)
1157	);
1158	dst
1159	}
1160
1161	/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1162	/// a new vector. The address does not need to be aligned to any particular boundary.
1163	///
1164	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1165	#[inline]
1166	#[target_feature(enable = "avx512fp16,avx512vl")]
1167	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1168	pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1169	ptr::read_unaligned(src:mem_addr.cast())
1170	}
1171
1172	/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1173	/// a new vector. The address does not need to be aligned to any particular boundary.
1174	///
1175	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1176	#[inline]
1177	#[target_feature(enable = "avx512fp16,avx512vl")]
1178	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1179	pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1180	ptr::read_unaligned(src:mem_addr.cast())
1181	}
1182
1183	/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1184	/// a new vector. The address does not need to be aligned to any particular boundary.
1185	///
1186	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1187	#[inline]
1188	#[target_feature(enable = "avx512fp16")]
1189	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1190	pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1191	ptr::read_unaligned(src:mem_addr.cast())
1192	}
1193
1194	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1195	/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1196	/// 7 packed elements from a to the upper elements of dst.
1197	///
1198	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1199	#[inline]
1200	#[target_feature(enable = "avx512fp16")]
1201	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1202	pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1203	unsafe {
1204	let mut mov: f16 = simd_extract!(src, `0`);
1205	if (k & `1`) != `0` {
1206	mov = simd_extract!(b, `0`);
1207	}
1208	simd_insert!(a, `0`, mov)
1209	}
1210	}
1211
1212	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1213	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1214	/// elements from a to the upper elements of dst.
1215	///
1216	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1217	#[inline]
1218	#[target_feature(enable = "avx512fp16")]
1219	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1220	pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1221	unsafe {
1222	let mut mov: f16 = `0.`;
1223	if (k & `1`) != `0` {
1224	mov = simd_extract!(b, `0`);
1225	}
1226	simd_insert!(a, `0`, mov)
1227	}
1228	}
1229
1230	/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1231	/// and copy the upper 7 packed elements from a to the upper elements of dst.
1232	///
1233	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1234	#[inline]
1235	#[target_feature(enable = "avx512fp16")]
1236	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1237	pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1238	unsafe {
1239	let mov: f16 = simd_extract!(b, `0`);
1240	simd_insert!(a, `0`, mov)
1241	}
1242	}
1243
1244	/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1245	/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1246	///
1247	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1248	#[inline]
1249	#[target_feature(enable = "avx512fp16,avx512vl")]
1250	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1251	pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1252	*mem_addr.cast() = a;
1253	}
1254
1255	/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1256	/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1257	///
1258	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1259	#[inline]
1260	#[target_feature(enable = "avx512fp16,avx512vl")]
1261	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1262	pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1263	*mem_addr.cast() = a;
1264	}
1265
1266	/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1267	/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1268	///
1269	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1270	#[inline]
1271	#[target_feature(enable = "avx512fp16")]
1272	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1273	pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1274	*mem_addr.cast() = a;
1275	}
1276
1277	/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1278	///
1279	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1280	#[inline]
1281	#[target_feature(enable = "avx512fp16")]
1282	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1283	pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1284	*mem_addr = simd_extract!(a, `0`);
1285	}
1286
1287	/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1288	///
1289	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1290	#[inline]
1291	#[target_feature(enable = "avx512fp16")]
1292	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1293	pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1294	asm!(
1295	vps!("vmovdqu16", "{{{k}}}, {src}"),
1296	p = in(reg) mem_addr,
1297	k = in(kreg) k,
1298	src = in(xmm_reg) a,
1299	options(nostack, preserves_flags)
1300	);
1301	}
1302
1303	/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1304	/// The address does not need to be aligned to any particular boundary.
1305	///
1306	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1307	#[inline]
1308	#[target_feature(enable = "avx512fp16,avx512vl")]
1309	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1310	pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1311	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1312	}
1313
1314	/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1315	/// The address does not need to be aligned to any particular boundary.
1316	///
1317	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1318	#[inline]
1319	#[target_feature(enable = "avx512fp16,avx512vl")]
1320	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1321	pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1322	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1323	}
1324
1325	/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1326	/// The address does not need to be aligned to any particular boundary.
1327	///
1328	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1329	#[inline]
1330	#[target_feature(enable = "avx512fp16")]
1331	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1332	pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1333	ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1334	}
1335
1336	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1337	///
1338	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1339	#[inline]
1340	#[target_feature(enable = "avx512fp16,avx512vl")]
1341	#[cfg_attr(test, assert_instr(vaddph))]
1342	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1343	pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1344	unsafe { simd_add(x:a, y:b) }
1345	}
1346
1347	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1348	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1349	///
1350	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1351	#[inline]
1352	#[target_feature(enable = "avx512fp16,avx512vl")]
1353	#[cfg_attr(test, assert_instr(vaddph))]
1354	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1355	pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1356	unsafe {
1357	let r: __m128h = _mm_add_ph(a, b);
1358	simd_select_bitmask(m:k, yes:r, no:src)
1359	}
1360	}
1361
1362	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1363	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1364	///
1365	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1366	#[inline]
1367	#[target_feature(enable = "avx512fp16,avx512vl")]
1368	#[cfg_attr(test, assert_instr(vaddph))]
1369	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1370	pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1371	unsafe {
1372	let r: __m128h = _mm_add_ph(a, b);
1373	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1374	}
1375	}
1376
1377	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1378	///
1379	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1380	#[inline]
1381	#[target_feature(enable = "avx512fp16,avx512vl")]
1382	#[cfg_attr(test, assert_instr(vaddph))]
1383	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1384	pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1385	unsafe { simd_add(x:a, y:b) }
1386	}
1387
1388	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1389	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1390	///
1391	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1392	#[inline]
1393	#[target_feature(enable = "avx512fp16,avx512vl")]
1394	#[cfg_attr(test, assert_instr(vaddph))]
1395	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1396	pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1397	unsafe {
1398	let r: __m256h = _mm256_add_ph(a, b);
1399	simd_select_bitmask(m:k, yes:r, no:src)
1400	}
1401	}
1402
1403	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1404	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1405	///
1406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1407	#[inline]
1408	#[target_feature(enable = "avx512fp16,avx512vl")]
1409	#[cfg_attr(test, assert_instr(vaddph))]
1410	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1411	pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1412	unsafe {
1413	let r: __m256h = _mm256_add_ph(a, b);
1414	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1415	}
1416	}
1417
1418	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1419	///
1420	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1421	#[inline]
1422	#[target_feature(enable = "avx512fp16")]
1423	#[cfg_attr(test, assert_instr(vaddph))]
1424	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1425	pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1426	unsafe { simd_add(x:a, y:b) }
1427	}
1428
1429	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1430	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1431	///
1432	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1433	#[inline]
1434	#[target_feature(enable = "avx512fp16")]
1435	#[cfg_attr(test, assert_instr(vaddph))]
1436	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1437	pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1438	unsafe {
1439	let r: __m512h = _mm512_add_ph(a, b);
1440	simd_select_bitmask(m:k, yes:r, no:src)
1441	}
1442	}
1443
1444	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1445	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1446	///
1447	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1448	#[inline]
1449	#[target_feature(enable = "avx512fp16")]
1450	#[cfg_attr(test, assert_instr(vaddph))]
1451	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1452	pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1453	unsafe {
1454	let r: __m512h = _mm512_add_ph(a, b);
1455	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1456	}
1457	}
1458
1459	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1460	/// Rounding is done according to the rounding parameter, which can be one of:
1461	///
1462	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1463	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1464	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1465	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1466	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1467	///
1468	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1469	#[inline]
1470	#[target_feature(enable = "avx512fp16")]
1471	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1472	#[rustc_legacy_const_generics(`2`)]
1473	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1474	pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1475	unsafe {
1476	static_assert_rounding!(ROUNDING);
1477	vaddph(a, b, ROUNDING)
1478	}
1479	}
1480
1481	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1482	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1483	/// Rounding is done according to the rounding parameter, which can be one of:
1484	///
1485	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1486	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1487	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1488	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1489	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1490	///
1491	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1492	#[inline]
1493	#[target_feature(enable = "avx512fp16")]
1494	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1495	#[rustc_legacy_const_generics(`4`)]
1496	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1497	pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1498	src: __m512h,
1499	k: __mmask32,
1500	a: __m512h,
1501	b: __m512h,
1502	) -> __m512h {
1503	unsafe {
1504	static_assert_rounding!(ROUNDING);
1505	let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1506	simd_select_bitmask(m:k, yes:r, no:src)
1507	}
1508	}
1509
1510	/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1511	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1512	/// Rounding is done according to the rounding parameter, which can be one of:
1513	///
1514	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1515	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1516	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1517	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1518	///
1519	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1520	#[inline]
1521	#[target_feature(enable = "avx512fp16")]
1522	#[cfg_attr(test, assert_instr(vaddph, ROUNDING = `8`))]
1523	#[rustc_legacy_const_generics(`3`)]
1524	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1525	pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1526	k: __mmask32,
1527	a: __m512h,
1528	b: __m512h,
1529	) -> __m512h {
1530	unsafe {
1531	static_assert_rounding!(ROUNDING);
1532	let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1533	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1534	}
1535	}
1536
1537	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1538	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1539	/// Rounding is done according to the rounding parameter, which can be one of:
1540	///
1541	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1542	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1543	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1544	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1545	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1546	///
1547	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1548	#[inline]
1549	#[target_feature(enable = "avx512fp16")]
1550	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1551	#[rustc_legacy_const_generics(`2`)]
1552	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1553	pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1554	static_assert_rounding!(ROUNDING);
1555	_mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
1556	}
1557
1558	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1559	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1560	/// writemask k (the element is copied from src when mask bit 0 is not set).
1561	/// Rounding is done according to the rounding parameter, which can be one of:
1562	///
1563	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1564	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1565	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1566	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1567	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1568	///
1569	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1570	#[inline]
1571	#[target_feature(enable = "avx512fp16")]
1572	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1573	#[rustc_legacy_const_generics(`4`)]
1574	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1575	pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1576	src: __m128h,
1577	k: __mmask8,
1578	a: __m128h,
1579	b: __m128h,
1580	) -> __m128h {
1581	unsafe {
1582	static_assert_rounding!(ROUNDING);
1583	vaddsh(a, b, src, k, ROUNDING)
1584	}
1585	}
1586
1587	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1588	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1589	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1590	/// Rounding is done according to the rounding parameter, which can be one of:
1591	///
1592	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1593	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1594	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1595	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1596	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1597	///
1598	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1599	#[inline]
1600	#[target_feature(enable = "avx512fp16")]
1601	#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = `8`))]
1602	#[rustc_legacy_const_generics(`3`)]
1603	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1604	pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1605	static_assert_rounding!(ROUNDING);
1606	_mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1607	}
1608
1609	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1610	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1611	///
1612	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1613	#[inline]
1614	#[target_feature(enable = "avx512fp16")]
1615	#[cfg_attr(test, assert_instr(vaddsh))]
1616	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1617	pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1618	_mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1619	}
1620
1621	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1622	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1623	/// writemask k (the element is copied from src when mask bit 0 is not set).
1624	///
1625	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1626	#[inline]
1627	#[target_feature(enable = "avx512fp16")]
1628	#[cfg_attr(test, assert_instr(vaddsh))]
1629	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1630	pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1631	_mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1632	}
1633
1634	/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1635	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1636	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1637	///
1638	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1639	#[inline]
1640	#[target_feature(enable = "avx512fp16")]
1641	#[cfg_attr(test, assert_instr(vaddsh))]
1642	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1643	pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1644	_mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1645	}
1646
1647	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1648	///
1649	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1650	#[inline]
1651	#[target_feature(enable = "avx512fp16,avx512vl")]
1652	#[cfg_attr(test, assert_instr(vsubph))]
1653	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1654	pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1655	unsafe { simd_sub(lhs:a, rhs:b) }
1656	}
1657
1658	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1659	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1660	///
1661	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1662	#[inline]
1663	#[target_feature(enable = "avx512fp16,avx512vl")]
1664	#[cfg_attr(test, assert_instr(vsubph))]
1665	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1666	pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1667	unsafe {
1668	let r: __m128h = _mm_sub_ph(a, b);
1669	simd_select_bitmask(m:k, yes:r, no:src)
1670	}
1671	}
1672
1673	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1674	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1675	///
1676	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1677	#[inline]
1678	#[target_feature(enable = "avx512fp16,avx512vl")]
1679	#[cfg_attr(test, assert_instr(vsubph))]
1680	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1681	pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1682	unsafe {
1683	let r: __m128h = _mm_sub_ph(a, b);
1684	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1685	}
1686	}
1687
1688	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1689	///
1690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1691	#[inline]
1692	#[target_feature(enable = "avx512fp16,avx512vl")]
1693	#[cfg_attr(test, assert_instr(vsubph))]
1694	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1695	pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1696	unsafe { simd_sub(lhs:a, rhs:b) }
1697	}
1698
1699	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1700	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1701	///
1702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1703	#[inline]
1704	#[target_feature(enable = "avx512fp16,avx512vl")]
1705	#[cfg_attr(test, assert_instr(vsubph))]
1706	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1707	pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1708	unsafe {
1709	let r: __m256h = _mm256_sub_ph(a, b);
1710	simd_select_bitmask(m:k, yes:r, no:src)
1711	}
1712	}
1713
1714	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1715	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1716	///
1717	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1718	#[inline]
1719	#[target_feature(enable = "avx512fp16,avx512vl")]
1720	#[cfg_attr(test, assert_instr(vsubph))]
1721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1722	pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1723	unsafe {
1724	let r: __m256h = _mm256_sub_ph(a, b);
1725	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1726	}
1727	}
1728
1729	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1730	///
1731	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1732	#[inline]
1733	#[target_feature(enable = "avx512fp16")]
1734	#[cfg_attr(test, assert_instr(vsubph))]
1735	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1736	pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1737	unsafe { simd_sub(lhs:a, rhs:b) }
1738	}
1739
1740	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1741	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1742	///
1743	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1744	#[inline]
1745	#[target_feature(enable = "avx512fp16")]
1746	#[cfg_attr(test, assert_instr(vsubph))]
1747	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1748	pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1749	unsafe {
1750	let r: __m512h = _mm512_sub_ph(a, b);
1751	simd_select_bitmask(m:k, yes:r, no:src)
1752	}
1753	}
1754
1755	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1756	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1757	///
1758	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1759	#[inline]
1760	#[target_feature(enable = "avx512fp16")]
1761	#[cfg_attr(test, assert_instr(vsubph))]
1762	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1763	pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1764	unsafe {
1765	let r: __m512h = _mm512_sub_ph(a, b);
1766	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1767	}
1768	}
1769
1770	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1771	/// Rounding is done according to the rounding parameter, which can be one of:
1772	///
1773	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1774	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1775	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1776	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1777	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1778	///
1779	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1780	#[inline]
1781	#[target_feature(enable = "avx512fp16")]
1782	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1783	#[rustc_legacy_const_generics(`2`)]
1784	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1785	pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1786	unsafe {
1787	static_assert_rounding!(ROUNDING);
1788	vsubph(a, b, ROUNDING)
1789	}
1790	}
1791
1792	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1793	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1794	/// Rounding is done according to the rounding parameter, which can be one of:
1795	///
1796	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1797	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1798	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1799	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1800	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1801	///
1802	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1803	#[inline]
1804	#[target_feature(enable = "avx512fp16")]
1805	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1806	#[rustc_legacy_const_generics(`4`)]
1807	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1808	pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1809	src: __m512h,
1810	k: __mmask32,
1811	a: __m512h,
1812	b: __m512h,
1813	) -> __m512h {
1814	unsafe {
1815	static_assert_rounding!(ROUNDING);
1816	let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1817	simd_select_bitmask(m:k, yes:r, no:src)
1818	}
1819	}
1820
1821	/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1822	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1823	/// Rounding is done according to the rounding parameter, which can be one of:
1824	///
1825	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1826	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1827	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1828	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1829	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1830	///
1831	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1832	#[inline]
1833	#[target_feature(enable = "avx512fp16")]
1834	#[cfg_attr(test, assert_instr(vsubph, ROUNDING = `8`))]
1835	#[rustc_legacy_const_generics(`3`)]
1836	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1837	pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1838	k: __mmask32,
1839	a: __m512h,
1840	b: __m512h,
1841	) -> __m512h {
1842	unsafe {
1843	static_assert_rounding!(ROUNDING);
1844	let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1845	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1846	}
1847	}
1848
1849	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1850	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1851	/// Rounding is done according to the rounding parameter, which can be one of:
1852	///
1853	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1854	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1855	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1856	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1857	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1858	///
1859	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1860	#[inline]
1861	#[target_feature(enable = "avx512fp16")]
1862	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1863	#[rustc_legacy_const_generics(`2`)]
1864	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1865	pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1866	static_assert_rounding!(ROUNDING);
1867	_mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
1868	}
1869
1870	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1871	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1872	/// writemask k (the element is copied from src when mask bit 0 is not set).
1873	/// Rounding is done according to the rounding parameter, which can be one of:
1874	///
1875	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1876	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1877	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1878	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1879	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1880	///
1881	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1882	#[inline]
1883	#[target_feature(enable = "avx512fp16")]
1884	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1885	#[rustc_legacy_const_generics(`4`)]
1886	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1887	pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1888	src: __m128h,
1889	k: __mmask8,
1890	a: __m128h,
1891	b: __m128h,
1892	) -> __m128h {
1893	unsafe {
1894	static_assert_rounding!(ROUNDING);
1895	vsubsh(a, b, src, k, ROUNDING)
1896	}
1897	}
1898
1899	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1900	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1901	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1902	/// Rounding is done according to the rounding parameter, which can be one of:
1903	///
1904	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1905	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1906	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1907	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1908	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1909	///
1910	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
1911	#[inline]
1912	#[target_feature(enable = "avx512fp16")]
1913	#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = `8`))]
1914	#[rustc_legacy_const_generics(`3`)]
1915	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1916	pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1917	static_assert_rounding!(ROUNDING);
1918	_mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1919	}
1920
1921	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1922	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1923	///
1924	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
1925	#[inline]
1926	#[target_feature(enable = "avx512fp16")]
1927	#[cfg_attr(test, assert_instr(vsubsh))]
1928	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1929	pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
1930	_mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1931	}
1932
1933	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1934	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1935	/// writemask k (the element is copied from src when mask bit 0 is not set).
1936	///
1937	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
1938	#[inline]
1939	#[target_feature(enable = "avx512fp16")]
1940	#[cfg_attr(test, assert_instr(vsubsh))]
1941	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1942	pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1943	_mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1944	}
1945
1946	/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1947	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1948	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1949	///
1950	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
1951	#[inline]
1952	#[target_feature(enable = "avx512fp16")]
1953	#[cfg_attr(test, assert_instr(vsubsh))]
1954	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1955	pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1956	_mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1957	}
1958
1959	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1960	///
1961	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
1962	#[inline]
1963	#[target_feature(enable = "avx512fp16,avx512vl")]
1964	#[cfg_attr(test, assert_instr(vmulph))]
1965	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1966	pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
1967	unsafe { simd_mul(x:a, y:b) }
1968	}
1969
1970	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1971	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1972	///
1973	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
1974	#[inline]
1975	#[target_feature(enable = "avx512fp16,avx512vl")]
1976	#[cfg_attr(test, assert_instr(vmulph))]
1977	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1978	pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1979	unsafe {
1980	let r: __m128h = _mm_mul_ph(a, b);
1981	simd_select_bitmask(m:k, yes:r, no:src)
1982	}
1983	}
1984
1985	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1986	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1987	///
1988	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
1989	#[inline]
1990	#[target_feature(enable = "avx512fp16,avx512vl")]
1991	#[cfg_attr(test, assert_instr(vmulph))]
1992	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1993	pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1994	unsafe {
1995	let r: __m128h = _mm_mul_ph(a, b);
1996	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1997	}
1998	}
1999
2000	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2001	///
2002	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
2003	#[inline]
2004	#[target_feature(enable = "avx512fp16,avx512vl")]
2005	#[cfg_attr(test, assert_instr(vmulph))]
2006	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2007	pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2008	unsafe { simd_mul(x:a, y:b) }
2009	}
2010
2011	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2012	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2013	///
2014	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2015	#[inline]
2016	#[target_feature(enable = "avx512fp16,avx512vl")]
2017	#[cfg_attr(test, assert_instr(vmulph))]
2018	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2019	pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2020	unsafe {
2021	let r: __m256h = _mm256_mul_ph(a, b);
2022	simd_select_bitmask(m:k, yes:r, no:src)
2023	}
2024	}
2025
2026	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2027	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2028	///
2029	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2030	#[inline]
2031	#[target_feature(enable = "avx512fp16,avx512vl")]
2032	#[cfg_attr(test, assert_instr(vmulph))]
2033	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2034	pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2035	unsafe {
2036	let r: __m256h = _mm256_mul_ph(a, b);
2037	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2038	}
2039	}
2040
2041	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2042	///
2043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2044	#[inline]
2045	#[target_feature(enable = "avx512fp16")]
2046	#[cfg_attr(test, assert_instr(vmulph))]
2047	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2048	pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2049	unsafe { simd_mul(x:a, y:b) }
2050	}
2051
2052	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2053	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2054	///
2055	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2056	#[inline]
2057	#[target_feature(enable = "avx512fp16")]
2058	#[cfg_attr(test, assert_instr(vmulph))]
2059	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2060	pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2061	unsafe {
2062	let r: __m512h = _mm512_mul_ph(a, b);
2063	simd_select_bitmask(m:k, yes:r, no:src)
2064	}
2065	}
2066
2067	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2068	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2069	///
2070	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2071	#[inline]
2072	#[target_feature(enable = "avx512fp16")]
2073	#[cfg_attr(test, assert_instr(vmulph))]
2074	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2075	pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2076	unsafe {
2077	let r: __m512h = _mm512_mul_ph(a, b);
2078	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2079	}
2080	}
2081
2082	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2083	/// Rounding is done according to the rounding parameter, which can be one of:
2084	///
2085	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2086	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2087	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2088	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2089	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2090	///
2091	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2092	#[inline]
2093	#[target_feature(enable = "avx512fp16")]
2094	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2095	#[rustc_legacy_const_generics(`2`)]
2096	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2097	pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2098	unsafe {
2099	static_assert_rounding!(ROUNDING);
2100	vmulph(a, b, ROUNDING)
2101	}
2102	}
2103
2104	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2105	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2106	/// Rounding is done according to the rounding parameter, which can be one of:
2107	///
2108	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2109	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2110	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2111	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2112	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2113	///
2114	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2115	#[inline]
2116	#[target_feature(enable = "avx512fp16")]
2117	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2118	#[rustc_legacy_const_generics(`4`)]
2119	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2120	pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2121	src: __m512h,
2122	k: __mmask32,
2123	a: __m512h,
2124	b: __m512h,
2125	) -> __m512h {
2126	unsafe {
2127	static_assert_rounding!(ROUNDING);
2128	let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2129	simd_select_bitmask(m:k, yes:r, no:src)
2130	}
2131	}
2132
2133	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2134	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2135	/// Rounding is done according to the rounding parameter, which can be one of:
2136	///
2137	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2138	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2139	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2140	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2141	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2142	///
2143	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2144	#[inline]
2145	#[target_feature(enable = "avx512fp16")]
2146	#[cfg_attr(test, assert_instr(vmulph, ROUNDING = `8`))]
2147	#[rustc_legacy_const_generics(`3`)]
2148	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2149	pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2150	k: __mmask32,
2151	a: __m512h,
2152	b: __m512h,
2153	) -> __m512h {
2154	unsafe {
2155	static_assert_rounding!(ROUNDING);
2156	let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2157	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2158	}
2159	}
2160
2161	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2162	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2163	/// Rounding is done according to the rounding parameter, which can be one of:
2164	///
2165	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2166	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2167	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2168	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2169	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2170	///
2171	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2172	#[inline]
2173	#[target_feature(enable = "avx512fp16")]
2174	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2175	#[rustc_legacy_const_generics(`2`)]
2176	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2177	pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2178	static_assert_rounding!(ROUNDING);
2179	_mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
2180	}
2181
2182	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2183	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2184	/// writemask k (the element is copied from src when mask bit 0 is not set).
2185	/// Rounding is done according to the rounding parameter, which can be one of:
2186	///
2187	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2188	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2189	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2190	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2191	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2192	///
2193	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2194	#[inline]
2195	#[target_feature(enable = "avx512fp16")]
2196	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2197	#[rustc_legacy_const_generics(`4`)]
2198	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2199	pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2200	src: __m128h,
2201	k: __mmask8,
2202	a: __m128h,
2203	b: __m128h,
2204	) -> __m128h {
2205	unsafe {
2206	static_assert_rounding!(ROUNDING);
2207	vmulsh(a, b, src, k, ROUNDING)
2208	}
2209	}
2210
2211	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2212	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2213	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2214	/// Rounding is done according to the rounding parameter, which can be one of:
2215	///
2216	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2217	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2218	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2219	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2220	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2221	///
2222	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2223	#[inline]
2224	#[target_feature(enable = "avx512fp16")]
2225	#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = `8`))]
2226	#[rustc_legacy_const_generics(`3`)]
2227	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2228	pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2229	static_assert_rounding!(ROUNDING);
2230	_mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2231	}
2232
2233	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2234	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2235	///
2236	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2237	#[inline]
2238	#[target_feature(enable = "avx512fp16")]
2239	#[cfg_attr(test, assert_instr(vmulsh))]
2240	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2241	pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2242	_mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2243	}
2244
2245	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2246	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2247	/// writemask k (the element is copied from src when mask bit 0 is not set).
2248	///
2249	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2250	#[inline]
2251	#[target_feature(enable = "avx512fp16")]
2252	#[cfg_attr(test, assert_instr(vmulsh))]
2253	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2254	pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2255	_mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2256	}
2257
2258	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2259	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2260	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2261	///
2262	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2263	#[inline]
2264	#[target_feature(enable = "avx512fp16")]
2265	#[cfg_attr(test, assert_instr(vmulsh))]
2266	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2267	pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2268	_mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2269	}
2270
2271	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2272	///
2273	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2274	#[inline]
2275	#[target_feature(enable = "avx512fp16,avx512vl")]
2276	#[cfg_attr(test, assert_instr(vdivph))]
2277	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2278	pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2279	unsafe { simd_div(lhs:a, rhs:b) }
2280	}
2281
2282	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2283	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2284	///
2285	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2286	#[inline]
2287	#[target_feature(enable = "avx512fp16,avx512vl")]
2288	#[cfg_attr(test, assert_instr(vdivph))]
2289	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2290	pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2291	unsafe {
2292	let r: __m128h = _mm_div_ph(a, b);
2293	simd_select_bitmask(m:k, yes:r, no:src)
2294	}
2295	}
2296
2297	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2298	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2299	///
2300	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2301	#[inline]
2302	#[target_feature(enable = "avx512fp16,avx512vl")]
2303	#[cfg_attr(test, assert_instr(vdivph))]
2304	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2305	pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2306	unsafe {
2307	let r: __m128h = _mm_div_ph(a, b);
2308	simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2309	}
2310	}
2311
2312	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2313	///
2314	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2315	#[inline]
2316	#[target_feature(enable = "avx512fp16,avx512vl")]
2317	#[cfg_attr(test, assert_instr(vdivph))]
2318	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2319	pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2320	unsafe { simd_div(lhs:a, rhs:b) }
2321	}
2322
2323	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2324	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2325	///
2326	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2327	#[inline]
2328	#[target_feature(enable = "avx512fp16,avx512vl")]
2329	#[cfg_attr(test, assert_instr(vdivph))]
2330	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2331	pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2332	unsafe {
2333	let r: __m256h = _mm256_div_ph(a, b);
2334	simd_select_bitmask(m:k, yes:r, no:src)
2335	}
2336	}
2337
2338	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2339	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2340	///
2341	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2342	#[inline]
2343	#[target_feature(enable = "avx512fp16,avx512vl")]
2344	#[cfg_attr(test, assert_instr(vdivph))]
2345	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2346	pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2347	unsafe {
2348	let r: __m256h = _mm256_div_ph(a, b);
2349	simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2350	}
2351	}
2352
2353	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2354	///
2355	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2356	#[inline]
2357	#[target_feature(enable = "avx512fp16")]
2358	#[cfg_attr(test, assert_instr(vdivph))]
2359	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2360	pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2361	unsafe { simd_div(lhs:a, rhs:b) }
2362	}
2363
2364	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2365	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2366	///
2367	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2368	#[inline]
2369	#[target_feature(enable = "avx512fp16")]
2370	#[cfg_attr(test, assert_instr(vdivph))]
2371	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2372	pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2373	unsafe {
2374	let r: __m512h = _mm512_div_ph(a, b);
2375	simd_select_bitmask(m:k, yes:r, no:src)
2376	}
2377	}
2378
2379	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2380	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2381	///
2382	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2383	#[inline]
2384	#[target_feature(enable = "avx512fp16")]
2385	#[cfg_attr(test, assert_instr(vdivph))]
2386	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2387	pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2388	unsafe {
2389	let r: __m512h = _mm512_div_ph(a, b);
2390	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2391	}
2392	}
2393
2394	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2395	/// Rounding is done according to the rounding parameter, which can be one of:
2396	///
2397	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2398	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2399	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2400	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2401	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2402	///
2403	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2404	#[inline]
2405	#[target_feature(enable = "avx512fp16")]
2406	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2407	#[rustc_legacy_const_generics(`2`)]
2408	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2409	pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2410	unsafe {
2411	static_assert_rounding!(ROUNDING);
2412	vdivph(a, b, ROUNDING)
2413	}
2414	}
2415
2416	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2417	/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2418	/// Rounding is done according to the rounding parameter, which can be one of:
2419	///
2420	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2421	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2422	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2423	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2424	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2425	///
2426	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2427	#[inline]
2428	#[target_feature(enable = "avx512fp16")]
2429	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2430	#[rustc_legacy_const_generics(`4`)]
2431	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2432	pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2433	src: __m512h,
2434	k: __mmask32,
2435	a: __m512h,
2436	b: __m512h,
2437	) -> __m512h {
2438	unsafe {
2439	static_assert_rounding!(ROUNDING);
2440	let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2441	simd_select_bitmask(m:k, yes:r, no:src)
2442	}
2443	}
2444
2445	/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2446	/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2447	/// Rounding is done according to the rounding parameter, which can be one of:
2448	///
2449	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2450	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2451	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2452	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2453	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2454	///
2455	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2456	#[inline]
2457	#[target_feature(enable = "avx512fp16")]
2458	#[cfg_attr(test, assert_instr(vdivph, ROUNDING = `8`))]
2459	#[rustc_legacy_const_generics(`3`)]
2460	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2461	pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2462	k: __mmask32,
2463	a: __m512h,
2464	b: __m512h,
2465	) -> __m512h {
2466	unsafe {
2467	static_assert_rounding!(ROUNDING);
2468	let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2469	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2470	}
2471	}
2472
2473	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2474	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2475	/// Rounding is done according to the rounding parameter, which can be one of:
2476	///
2477	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2478	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2479	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2480	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2481	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2482	///
2483	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2484	#[inline]
2485	#[target_feature(enable = "avx512fp16")]
2486	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2487	#[rustc_legacy_const_generics(`2`)]
2488	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2489	pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2490	static_assert_rounding!(ROUNDING);
2491	_mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
2492	}
2493
2494	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2495	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2496	/// writemask k (the element is copied from src when mask bit 0 is not set).
2497	/// Rounding is done according to the rounding parameter, which can be one of:
2498	///
2499	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2500	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2501	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2502	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2503	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2504	///
2505	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2506	#[inline]
2507	#[target_feature(enable = "avx512fp16")]
2508	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2509	#[rustc_legacy_const_generics(`4`)]
2510	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2511	pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2512	src: __m128h,
2513	k: __mmask8,
2514	a: __m128h,
2515	b: __m128h,
2516	) -> __m128h {
2517	unsafe {
2518	static_assert_rounding!(ROUNDING);
2519	vdivsh(a, b, src, k, ROUNDING)
2520	}
2521	}
2522
2523	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2524	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2525	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2526	/// Rounding is done according to the rounding parameter, which can be one of:
2527	///
2528	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2529	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2530	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2531	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2532	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2533	///
2534	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2535	#[inline]
2536	#[target_feature(enable = "avx512fp16")]
2537	#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = `8`))]
2538	#[rustc_legacy_const_generics(`3`)]
2539	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2540	pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2541	static_assert_rounding!(ROUNDING);
2542	_mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2543	}
2544
2545	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2546	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2547	///
2548	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2549	#[inline]
2550	#[target_feature(enable = "avx512fp16")]
2551	#[cfg_attr(test, assert_instr(vdivsh))]
2552	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2553	pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2554	_mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2555	}
2556
2557	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2558	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2559	/// writemask k (the element is copied from src when mask bit 0 is not set).
2560	///
2561	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2562	#[inline]
2563	#[target_feature(enable = "avx512fp16")]
2564	#[cfg_attr(test, assert_instr(vdivsh))]
2565	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2566	pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2567	_mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2568	}
2569
2570	/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2571	/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2572	/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2573	///
2574	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2575	#[inline]
2576	#[target_feature(enable = "avx512fp16")]
2577	#[cfg_attr(test, assert_instr(vdivsh))]
2578	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2579	pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2580	_mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2581	}
2582
2583	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2584	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2585	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2586	///
2587	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2588	#[inline]
2589	#[target_feature(enable = "avx512fp16,avx512vl")]
2590	#[cfg_attr(test, assert_instr(vfmulcph))]
2591	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2592	pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2593	_mm_mask_mul_pch(src:_mm_undefined_ph(), k:`0xff`, a, b)
2594	}
2595
2596	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2597	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2598	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2599	///
2600	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2601	#[inline]
2602	#[target_feature(enable = "avx512fp16,avx512vl")]
2603	#[cfg_attr(test, assert_instr(vfmulcph))]
2604	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2605	pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2606	unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2607	}
2608
2609	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2610	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2611	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2612	///
2613	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2614	#[inline]
2615	#[target_feature(enable = "avx512fp16,avx512vl")]
2616	#[cfg_attr(test, assert_instr(vfmulcph))]
2617	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2618	pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2619	_mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2620	}
2621
2622	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2623	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2624	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2625	///
2626	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2627	#[inline]
2628	#[target_feature(enable = "avx512fp16,avx512vl")]
2629	#[cfg_attr(test, assert_instr(vfmulcph))]
2630	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2631	pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2632	_mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:`0xff`, a, b)
2633	}
2634
2635	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2636	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2637	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2638	///
2639	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2640	#[inline]
2641	#[target_feature(enable = "avx512fp16,avx512vl")]
2642	#[cfg_attr(test, assert_instr(vfmulcph))]
2643	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2644	pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2645	unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2646	}
2647
2648	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2649	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2650	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2651	///
2652	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2653	#[inline]
2654	#[target_feature(enable = "avx512fp16,avx512vl")]
2655	#[cfg_attr(test, assert_instr(vfmulcph))]
2656	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2657	pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2658	_mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2659	}
2660
2661	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2662	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2663	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2664	///
2665	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2666	#[inline]
2667	#[target_feature(enable = "avx512fp16")]
2668	#[cfg_attr(test, assert_instr(vfmulcph))]
2669	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2670	pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2671	_mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
2672	}
2673
2674	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2675	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2676	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2677	///
2678	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2679	#[inline]
2680	#[target_feature(enable = "avx512fp16")]
2681	#[cfg_attr(test, assert_instr(vfmulcph))]
2682	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2683	pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2684	_mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2685	}
2686
2687	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2688	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2689	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2690	///
2691	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2692	#[inline]
2693	#[target_feature(enable = "avx512fp16")]
2694	#[cfg_attr(test, assert_instr(vfmulcph))]
2695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2696	pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2697	_mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2698	}
2699
2700	/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2701	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2702	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2703	///
2704	/// Rounding is done according to the rounding parameter, which can be one of:
2705	///
2706	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2707	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2708	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2709	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2710	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2711	///
2712	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2713	#[inline]
2714	#[target_feature(enable = "avx512fp16")]
2715	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2716	#[rustc_legacy_const_generics(`2`)]
2717	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2718	pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2719	static_assert_rounding!(ROUNDING);
2720	_mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
2721	}
2722
2723	/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2724	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2725	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2726	///
2727	/// Rounding is done according to the rounding parameter, which can be one of:
2728	///
2729	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2730	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2731	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2732	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2733	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2734	///
2735	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2736	#[inline]
2737	#[target_feature(enable = "avx512fp16")]
2738	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2739	#[rustc_legacy_const_generics(`4`)]
2740	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2741	pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2742	src: __m512h,
2743	k: __mmask16,
2744	a: __m512h,
2745	b: __m512h,
2746	) -> __m512h {
2747	unsafe {
2748	static_assert_rounding!(ROUNDING);
2749	transmute(src:vfmulcph_512(
2750	a:transmute(a),
2751	b:transmute(b),
2752	src:transmute(src),
2753	k,
2754	ROUNDING,
2755	))
2756	}
2757	}
2758
2759	/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2760	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2761	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2762	///
2763	/// Rounding is done according to the rounding parameter, which can be one of:
2764	///
2765	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2766	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2767	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2768	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2769	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2770	///
2771	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2772	#[inline]
2773	#[target_feature(enable = "avx512fp16")]
2774	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
2775	#[rustc_legacy_const_generics(`3`)]
2776	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2777	pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2778	k: __mmask16,
2779	a: __m512h,
2780	b: __m512h,
2781	) -> __m512h {
2782	static_assert_rounding!(ROUNDING);
2783	_mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2784	}
2785
2786	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2787	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2788	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2789	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2790	///
2791	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2792	#[inline]
2793	#[target_feature(enable = "avx512fp16")]
2794	#[cfg_attr(test, assert_instr(vfmulcsh))]
2795	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2796	pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2797	_mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
2798	}
2799
2800	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2801	/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2802	/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2803	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2804	///
2805	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2806	#[inline]
2807	#[target_feature(enable = "avx512fp16")]
2808	#[cfg_attr(test, assert_instr(vfmulcsh))]
2809	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2810	pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2811	_mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2812	}
2813
2814	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2815	/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2816	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2817	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2818	///
2819	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2820	#[inline]
2821	#[target_feature(enable = "avx512fp16")]
2822	#[cfg_attr(test, assert_instr(vfmulcsh))]
2823	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2824	pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2825	_mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
2826	}
2827
2828	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2829	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2830	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2831	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2832	///
2833	/// Rounding is done according to the rounding parameter, which can be one of:
2834	///
2835	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2836	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2837	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2838	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2839	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2840	///
2841	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
2842	#[inline]
2843	#[target_feature(enable = "avx512fp16")]
2844	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2845	#[rustc_legacy_const_generics(`2`)]
2846	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2847	pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2848	static_assert_rounding!(ROUNDING);
2849	_mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
2850	}
2851
2852	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2853	/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2854	/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2855	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2856	///
2857	/// Rounding is done according to the rounding parameter, which can be one of:
2858	///
2859	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2860	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2861	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2862	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2863	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2864	///
2865	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
2866	#[inline]
2867	#[target_feature(enable = "avx512fp16")]
2868	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2869	#[rustc_legacy_const_generics(`4`)]
2870	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2871	pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
2872	src: __m128h,
2873	k: __mmask8,
2874	a: __m128h,
2875	b: __m128h,
2876	) -> __m128h {
2877	unsafe {
2878	static_assert_rounding!(ROUNDING);
2879	transmute(src:vfmulcsh(
2880	a:transmute(a),
2881	b:transmute(b),
2882	src:transmute(src),
2883	k,
2884	ROUNDING,
2885	))
2886	}
2887	}
2888
2889	/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2890	/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2891	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2892	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2893	///
2894	/// Rounding is done according to the rounding parameter, which can be one of:
2895	///
2896	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2897	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2898	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2899	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2900	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2901	///
2902	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
2903	#[inline]
2904	#[target_feature(enable = "avx512fp16")]
2905	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
2906	#[rustc_legacy_const_generics(`3`)]
2907	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2908	pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
2909	k: __mmask8,
2910	a: __m128h,
2911	b: __m128h,
2912	) -> __m128h {
2913	static_assert_rounding!(ROUNDING);
2914	_mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2915	}
2916
2917	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2918	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2919	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2920	///
2921	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
2922	#[inline]
2923	#[target_feature(enable = "avx512fp16,avx512vl")]
2924	#[cfg_attr(test, assert_instr(vfmulcph))]
2925	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2926	pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
2927	_mm_mul_pch(a, b)
2928	}
2929
2930	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2931	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2932	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2933	///
2934	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
2935	#[inline]
2936	#[target_feature(enable = "avx512fp16,avx512vl")]
2937	#[cfg_attr(test, assert_instr(vfmulcph))]
2938	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2939	pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2940	_mm_mask_mul_pch(src, k, a, b)
2941	}
2942
2943	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2944	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2945	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2946	///
2947	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
2948	#[inline]
2949	#[target_feature(enable = "avx512fp16,avx512vl")]
2950	#[cfg_attr(test, assert_instr(vfmulcph))]
2951	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2952	pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2953	_mm_maskz_mul_pch(k, a, b)
2954	}
2955
2956	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2957	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2958	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2959	///
2960	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
2961	#[inline]
2962	#[target_feature(enable = "avx512fp16,avx512vl")]
2963	#[cfg_attr(test, assert_instr(vfmulcph))]
2964	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2965	pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
2966	_mm256_mul_pch(a, b)
2967	}
2968
2969	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2970	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2971	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2972	///
2973	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
2974	#[inline]
2975	#[target_feature(enable = "avx512fp16,avx512vl")]
2976	#[cfg_attr(test, assert_instr(vfmulcph))]
2977	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2978	pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2979	_mm256_mask_mul_pch(src, k, a, b)
2980	}
2981
2982	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2983	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2984	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2985	///
2986	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
2987	#[inline]
2988	#[target_feature(enable = "avx512fp16,avx512vl")]
2989	#[cfg_attr(test, assert_instr(vfmulcph))]
2990	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2991	pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2992	_mm256_maskz_mul_pch(k, a, b)
2993	}
2994
2995	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
2996	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
2997	///
2998	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
2999	#[inline]
3000	#[target_feature(enable = "avx512fp16")]
3001	#[cfg_attr(test, assert_instr(vfmulcph))]
3002	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3003	pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
3004	_mm512_mul_pch(a, b)
3005	}
3006
3007	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3008	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3009	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3010	///
3011	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3012	#[inline]
3013	#[target_feature(enable = "avx512fp16")]
3014	#[cfg_attr(test, assert_instr(vfmulcph))]
3015	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3016	pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3017	_mm512_mask_mul_pch(src, k, a, b)
3018	}
3019
3020	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3021	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3022	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3023	///
3024	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3025	#[inline]
3026	#[target_feature(enable = "avx512fp16")]
3027	#[cfg_attr(test, assert_instr(vfmulcph))]
3028	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3029	pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3030	_mm512_maskz_mul_pch(k, a, b)
3031	}
3032
3033	/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3034	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3035	/// Rounding is done according to the rounding parameter, which can be one of:
3036	///
3037	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3038	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3039	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3040	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3041	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3042	///
3043	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3044	#[inline]
3045	#[target_feature(enable = "avx512fp16")]
3046	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3047	#[rustc_legacy_const_generics(`2`)]
3048	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3049	pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3050	static_assert_rounding!(ROUNDING);
3051	_mm512_mul_round_pch::<ROUNDING>(a, b)
3052	}
3053
3054	/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3055	/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3056	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3057	/// Rounding is done according to the rounding parameter, which can be one of:
3058	///
3059	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3060	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3061	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3062	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3063	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3064	///
3065	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3066	#[inline]
3067	#[target_feature(enable = "avx512fp16")]
3068	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3069	#[rustc_legacy_const_generics(`4`)]
3070	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3071	pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3072	src: __m512h,
3073	k: __mmask16,
3074	a: __m512h,
3075	b: __m512h,
3076	) -> __m512h {
3077	static_assert_rounding!(ROUNDING);
3078	_mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3079	}
3080
3081	/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3082	/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3083	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3084	/// Rounding is done according to the rounding parameter, which can be one of:
3085	///
3086	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3087	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3088	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3089	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3090	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3091	///
3092	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3093	#[inline]
3094	#[target_feature(enable = "avx512fp16")]
3095	#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = `8`))]
3096	#[rustc_legacy_const_generics(`3`)]
3097	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3098	pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3099	k: __mmask16,
3100	a: __m512h,
3101	b: __m512h,
3102	) -> __m512h {
3103	static_assert_rounding!(ROUNDING);
3104	_mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3105	}
3106
3107	/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3108	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3109	/// number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3110	///
3111	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3112	#[inline]
3113	#[target_feature(enable = "avx512fp16")]
3114	#[cfg_attr(test, assert_instr(vfmulcsh))]
3115	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3116	pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3117	_mm_mul_sch(a, b)
3118	}
3119
3120	/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3121	/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3122	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3123	///
3124	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3125	#[inline]
3126	#[target_feature(enable = "avx512fp16")]
3127	#[cfg_attr(test, assert_instr(vfmulcsh))]
3128	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3129	pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3130	_mm_mask_mul_sch(src, k, a, b)
3131	}
3132
3133	/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3134	/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3135	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3136	///
3137	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3138	#[inline]
3139	#[target_feature(enable = "avx512fp16")]
3140	#[cfg_attr(test, assert_instr(vfmulcsh))]
3141	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3142	pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3143	_mm_maskz_mul_sch(k, a, b)
3144	}
3145
3146	/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3147	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3148	///
3149	/// Rounding is done according to the rounding parameter, which can be one of:
3150	///
3151	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3152	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3153	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3154	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3155	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3156	///
3157	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3158	#[inline]
3159	#[target_feature(enable = "avx512fp16")]
3160	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3161	#[rustc_legacy_const_generics(`2`)]
3162	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3163	pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3164	static_assert_rounding!(ROUNDING);
3165	_mm_mul_round_sch::<ROUNDING>(a, b)
3166	}
3167
3168	/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3169	/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3170	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3171	///
3172	/// Rounding is done according to the rounding parameter, which can be one of:
3173	///
3174	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3175	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3176	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3177	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3178	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3179	///
3180	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3181	#[inline]
3182	#[target_feature(enable = "avx512fp16")]
3183	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3184	#[rustc_legacy_const_generics(`4`)]
3185	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3186	pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3187	src: __m128h,
3188	k: __mmask8,
3189	a: __m128h,
3190	b: __m128h,
3191	) -> __m128h {
3192	static_assert_rounding!(ROUNDING);
3193	_mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3194	}
3195
3196	/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3197	/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3198	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
3199	///
3200	/// Rounding is done according to the rounding parameter, which can be one of:
3201	///
3202	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3203	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3204	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3205	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3206	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3207	///
3208	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3209	#[inline]
3210	#[target_feature(enable = "avx512fp16")]
3211	#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = `8`))]
3212	#[rustc_legacy_const_generics(`3`)]
3213	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3214	pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3215	k: __mmask8,
3216	a: __m128h,
3217	b: __m128h,
3218	) -> __m128h {
3219	static_assert_rounding!(ROUNDING);
3220	_mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3221	}
3222
3223	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3224	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3225	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3226	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3227	///
3228	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3229	#[inline]
3230	#[target_feature(enable = "avx512fp16,avx512vl")]
3231	#[cfg_attr(test, assert_instr(vfcmulcph))]
3232	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3233	pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3234	_mm_mask_cmul_pch(src:_mm_undefined_ph(), k:`0xff`, a, b)
3235	}
3236
3237	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3238	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3239	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3240	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3241	///
3242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3243	#[inline]
3244	#[target_feature(enable = "avx512fp16,avx512vl")]
3245	#[cfg_attr(test, assert_instr(vfcmulcph))]
3246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3247	pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3248	unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3249	}
3250
3251	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3252	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3253	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3254	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3255	///
3256	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3257	#[inline]
3258	#[target_feature(enable = "avx512fp16,avx512vl")]
3259	#[cfg_attr(test, assert_instr(vfcmulcph))]
3260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3261	pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3262	_mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3263	}
3264
3265	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3266	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3267	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3268	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3269	///
3270	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3271	#[inline]
3272	#[target_feature(enable = "avx512fp16,avx512vl")]
3273	#[cfg_attr(test, assert_instr(vfcmulcph))]
3274	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3275	pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3276	_mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:`0xff`, a, b)
3277	}
3278
3279	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3280	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3281	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3282	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3283	///
3284	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3285	#[inline]
3286	#[target_feature(enable = "avx512fp16,avx512vl")]
3287	#[cfg_attr(test, assert_instr(vfcmulcph))]
3288	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3289	pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3290	unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3291	}
3292
3293	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3294	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3295	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3296	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3297	///
3298	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3299	#[inline]
3300	#[target_feature(enable = "avx512fp16,avx512vl")]
3301	#[cfg_attr(test, assert_instr(vfcmulcph))]
3302	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3303	pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3304	_mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3305	}
3306
3307	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3308	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3309	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3310	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3311	///
3312	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3313	#[inline]
3314	#[target_feature(enable = "avx512fp16")]
3315	#[cfg_attr(test, assert_instr(vfcmulcph))]
3316	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3317	pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3318	_mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
3319	}
3320
3321	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3322	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3323	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3324	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3325	///
3326	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3327	#[inline]
3328	#[target_feature(enable = "avx512fp16")]
3329	#[cfg_attr(test, assert_instr(vfcmulcph))]
3330	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3331	pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3332	_mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3333	}
3334
3335	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3336	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3337	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3338	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3339	///
3340	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3341	#[inline]
3342	#[target_feature(enable = "avx512fp16")]
3343	#[cfg_attr(test, assert_instr(vfcmulcph))]
3344	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3345	pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3346	_mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3347	}
3348
3349	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3350	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3351	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3352	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3353	///
3354	/// Rounding is done according to the rounding parameter, which can be one of:
3355	///
3356	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3357	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3358	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3359	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3360	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3361	///
3362	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3363	#[inline]
3364	#[target_feature(enable = "avx512fp16")]
3365	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3366	#[rustc_legacy_const_generics(`2`)]
3367	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3368	pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3369	static_assert_rounding!(ROUNDING);
3370	_mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffff`, a, b)
3371	}
3372
3373	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3374	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3375	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3376	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3377	///
3378	/// Rounding is done according to the rounding parameter, which can be one of:
3379	///
3380	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3381	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3382	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3383	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3384	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3385	///
3386	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3387	#[inline]
3388	#[target_feature(enable = "avx512fp16")]
3389	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3390	#[rustc_legacy_const_generics(`4`)]
3391	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3392	pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3393	src: __m512h,
3394	k: __mmask16,
3395	a: __m512h,
3396	b: __m512h,
3397	) -> __m512h {
3398	unsafe {
3399	static_assert_rounding!(ROUNDING);
3400	transmute(src:vfcmulcph_512(
3401	a:transmute(a),
3402	b:transmute(b),
3403	src:transmute(src),
3404	k,
3405	ROUNDING,
3406	))
3407	}
3408	}
3409
3410	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3411	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3412	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3413	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3414	///
3415	/// Rounding is done according to the rounding parameter, which can be one of:
3416	///
3417	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3418	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3419	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3420	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3421	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3422	///
3423	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3424	#[inline]
3425	#[target_feature(enable = "avx512fp16")]
3426	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3427	#[rustc_legacy_const_generics(`3`)]
3428	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3429	pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3430	k: __mmask16,
3431	a: __m512h,
3432	b: __m512h,
3433	) -> __m512h {
3434	static_assert_rounding!(ROUNDING);
3435	_mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3436	}
3437
3438	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3439	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3440	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3441	///
3442	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3443	#[inline]
3444	#[target_feature(enable = "avx512fp16")]
3445	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3446	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3447	pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3448	_mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
3449	}
3450
3451	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3452	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3453	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3454	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3455	///
3456	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3457	#[inline]
3458	#[target_feature(enable = "avx512fp16")]
3459	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3460	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3461	pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3462	_mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3463	}
3464
3465	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3466	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3467	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3468	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3469	///
3470	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3471	#[inline]
3472	#[target_feature(enable = "avx512fp16")]
3473	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3474	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3475	pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3476	_mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
3477	}
3478
3479	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3480	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3481	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3482	///
3483	/// Rounding is done according to the rounding parameter, which can be one of:
3484	///
3485	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3486	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3487	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3488	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3489	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3490	///
3491	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3492	#[inline]
3493	#[target_feature(enable = "avx512fp16")]
3494	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3495	#[rustc_legacy_const_generics(`2`)]
3496	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3497	pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3498	static_assert_rounding!(ROUNDING);
3499	_mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
3500	}
3501
3502	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3503	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3504	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3505	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3506	///
3507	/// Rounding is done according to the rounding parameter, which can be one of:
3508	///
3509	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3510	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3511	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3512	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3513	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3514	///
3515	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3516	#[inline]
3517	#[target_feature(enable = "avx512fp16")]
3518	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3519	#[rustc_legacy_const_generics(`4`)]
3520	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3521	pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3522	src: __m128h,
3523	k: __mmask8,
3524	a: __m128h,
3525	b: __m128h,
3526	) -> __m128h {
3527	unsafe {
3528	static_assert_rounding!(ROUNDING);
3529	transmute(src:vfcmulcsh(
3530	a:transmute(a),
3531	b:transmute(b),
3532	src:transmute(src),
3533	k,
3534	ROUNDING,
3535	))
3536	}
3537	}
3538
3539	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3540	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3541	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3542	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3543	///
3544	/// Rounding is done according to the rounding parameter, which can be one of:
3545	///
3546	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3547	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3548	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3549	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3550	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3551	///
3552	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3553	#[inline]
3554	#[target_feature(enable = "avx512fp16")]
3555	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3556	#[rustc_legacy_const_generics(`3`)]
3557	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3558	pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3559	k: __mmask8,
3560	a: __m128h,
3561	b: __m128h,
3562	) -> __m128h {
3563	static_assert_rounding!(ROUNDING);
3564	_mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
3565	}
3566
3567	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3568	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3569	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3570	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3571	///
3572	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3573	#[inline]
3574	#[target_feature(enable = "avx512fp16,avx512vl")]
3575	#[cfg_attr(test, assert_instr(vfcmulcph))]
3576	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3577	pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3578	_mm_cmul_pch(a, b)
3579	}
3580
3581	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3582	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3583	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3584	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3585	///
3586	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3587	#[inline]
3588	#[target_feature(enable = "avx512fp16,avx512vl")]
3589	#[cfg_attr(test, assert_instr(vfcmulcph))]
3590	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3591	pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3592	_mm_mask_cmul_pch(src, k, a, b)
3593	}
3594
3595	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3596	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3597	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3598	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3599	///
3600	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3601	#[inline]
3602	#[target_feature(enable = "avx512fp16,avx512vl")]
3603	#[cfg_attr(test, assert_instr(vfcmulcph))]
3604	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3605	pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3606	_mm_maskz_cmul_pch(k, a, b)
3607	}
3608
3609	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3610	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3611	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3612	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3613	///
3614	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3615	#[inline]
3616	#[target_feature(enable = "avx512fp16,avx512vl")]
3617	#[cfg_attr(test, assert_instr(vfcmulcph))]
3618	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3619	pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3620	_mm256_cmul_pch(a, b)
3621	}
3622
3623	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3624	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3625	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3626	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3627	///
3628	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3629	#[inline]
3630	#[target_feature(enable = "avx512fp16,avx512vl")]
3631	#[cfg_attr(test, assert_instr(vfcmulcph))]
3632	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3633	pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3634	_mm256_mask_cmul_pch(src, k, a, b)
3635	}
3636
3637	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3638	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3639	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3640	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3641	///
3642	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3643	#[inline]
3644	#[target_feature(enable = "avx512fp16,avx512vl")]
3645	#[cfg_attr(test, assert_instr(vfcmulcph))]
3646	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3647	pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3648	_mm256_maskz_cmul_pch(k, a, b)
3649	}
3650
3651	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3652	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3653	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3654	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3655	///
3656	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3657	#[inline]
3658	#[target_feature(enable = "avx512fp16")]
3659	#[cfg_attr(test, assert_instr(vfcmulcph))]
3660	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3661	pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3662	_mm512_cmul_pch(a, b)
3663	}
3664
3665	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3666	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3667	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3668	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3669	///
3670	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3671	#[inline]
3672	#[target_feature(enable = "avx512fp16")]
3673	#[cfg_attr(test, assert_instr(vfcmulcph))]
3674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3675	pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3676	_mm512_mask_cmul_pch(src, k, a, b)
3677	}
3678
3679	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3680	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3681	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3682	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3683	///
3684	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3685	#[inline]
3686	#[target_feature(enable = "avx512fp16")]
3687	#[cfg_attr(test, assert_instr(vfcmulcph))]
3688	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3689	pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3690	_mm512_maskz_cmul_pch(k, a, b)
3691	}
3692
3693	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3694	/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3695	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3696	///
3697	/// Rounding is done according to the rounding parameter, which can be one of:
3698	///
3699	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3700	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3701	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3702	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3703	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3704	///
3705	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3706	#[inline]
3707	#[target_feature(enable = "avx512fp16")]
3708	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3709	#[rustc_legacy_const_generics(`2`)]
3710	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3711	pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3712	static_assert_rounding!(ROUNDING);
3713	_mm512_cmul_round_pch::<ROUNDING>(a, b)
3714	}
3715
3716	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3717	/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3718	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3719	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3720	///
3721	/// Rounding is done according to the rounding parameter, which can be one of:
3722	///
3723	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3724	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3725	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3726	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3727	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3728	///
3729	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3730	#[inline]
3731	#[target_feature(enable = "avx512fp16")]
3732	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3733	#[rustc_legacy_const_generics(`4`)]
3734	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3735	pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3736	src: __m512h,
3737	k: __mmask16,
3738	a: __m512h,
3739	b: __m512h,
3740	) -> __m512h {
3741	static_assert_rounding!(ROUNDING);
3742	_mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3743	}
3744
3745	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3746	/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3747	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3748	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3749	///
3750	/// Rounding is done according to the rounding parameter, which can be one of:
3751	///
3752	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3753	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3754	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3755	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3756	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3757	///
3758	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3759	#[inline]
3760	#[target_feature(enable = "avx512fp16")]
3761	#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = `8`))]
3762	#[rustc_legacy_const_generics(`3`)]
3763	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3764	pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3765	k: __mmask16,
3766	a: __m512h,
3767	b: __m512h,
3768	) -> __m512h {
3769	static_assert_rounding!(ROUNDING);
3770	_mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3771	}
3772
3773	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3774	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3775	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3776	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3777	///
3778	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3779	#[inline]
3780	#[target_feature(enable = "avx512fp16")]
3781	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3782	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3783	pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3784	_mm_cmul_sch(a, b)
3785	}
3786
3787	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3788	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3789	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3790	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3791	///
3792	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3793	#[inline]
3794	#[target_feature(enable = "avx512fp16")]
3795	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3796	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3797	pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3798	_mm_mask_cmul_sch(src, k, a, b)
3799	}
3800
3801	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3802	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3803	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3804	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3805	///
3806	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3807	#[inline]
3808	#[target_feature(enable = "avx512fp16")]
3809	#[cfg_attr(test, assert_instr(vfcmulcsh))]
3810	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3811	pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3812	_mm_maskz_cmul_sch(k, a, b)
3813	}
3814
3815	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3816	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3817	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3818	///
3819	/// Rounding is done according to the rounding parameter, which can be one of:
3820	///
3821	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3822	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3823	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3824	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3825	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3826	///
3827	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
3828	#[inline]
3829	#[target_feature(enable = "avx512fp16")]
3830	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3831	#[rustc_legacy_const_generics(`2`)]
3832	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3833	pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3834	static_assert_rounding!(ROUNDING);
3835	_mm_cmul_round_sch::<ROUNDING>(a, b)
3836	}
3837
3838	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3839	/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3840	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3841	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3842	///
3843	/// Rounding is done according to the rounding parameter, which can be one of:
3844	///
3845	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3846	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3847	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3848	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3849	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3850	///
3851	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
3852	#[inline]
3853	#[target_feature(enable = "avx512fp16")]
3854	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3855	#[rustc_legacy_const_generics(`4`)]
3856	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3857	pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
3858	src: __m128h,
3859	k: __mmask8,
3860	a: __m128h,
3861	b: __m128h,
3862	) -> __m128h {
3863	static_assert_rounding!(ROUNDING);
3864	_mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
3865	}
3866
3867	/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3868	/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3869	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3870	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3871	///
3872	/// Rounding is done according to the rounding parameter, which can be one of:
3873	///
3874	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3875	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3876	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3877	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3878	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3879	///
3880	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
3881	#[inline]
3882	#[target_feature(enable = "avx512fp16")]
3883	#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = `8`))]
3884	#[rustc_legacy_const_generics(`3`)]
3885	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3886	pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
3887	k: __mmask8,
3888	a: __m128h,
3889	b: __m128h,
3890	) -> __m128h {
3891	static_assert_rounding!(ROUNDING);
3892	_mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
3893	}
3894
3895	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3896	/// the results in dst.
3897	///
3898	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
3899	#[inline]
3900	#[target_feature(enable = "avx512fp16,avx512vl")]
3901	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3902	pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
3903	unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
3904	}
3905
3906	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3907	/// the result in dst.
3908	///
3909	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
3910	#[inline]
3911	#[target_feature(enable = "avx512fp16,avx512vl")]
3912	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3913	pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
3914	unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
3915	}
3916
3917	/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3918	/// the result in dst.
3919	///
3920	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
3921	#[inline]
3922	#[target_feature(enable = "avx512fp16")]
3923	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3924	pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
3925	unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
3926	}
3927
3928	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
3929	/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
3930	/// the complex number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate*
3931	/// `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3932	///
3933	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
3934	#[inline]
3935	#[target_feature(enable = "avx512fp16,avx512vl")]
3936	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3937	pub fn _mm_conj_pch(a: __m128h) -> __m128h {
3938	unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
3939	}
3940
3941	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3942	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3943	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
3944	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3945	///
3946	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
3947	#[inline]
3948	#[target_feature(enable = "avx512fp16,avx512vl")]
3949	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3950	pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
3951	unsafe {
3952	let r: __m128 = transmute(src:_mm_conj_pch(a));
3953	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3954	}
3955	}
3956
3957	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3958	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3959	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3960	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3961	///
3962	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
3963	#[inline]
3964	#[target_feature(enable = "avx512fp16,avx512vl")]
3965	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3966	pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
3967	_mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
3968	}
3969
3970	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
3971	/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3972	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
3973	///
3974	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
3975	#[inline]
3976	#[target_feature(enable = "avx512fp16,avx512vl")]
3977	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3978	pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
3979	unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
3980	}
3981
3982	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3983	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3984	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
3985	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
3986	///
3987	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
3988	#[inline]
3989	#[target_feature(enable = "avx512fp16,avx512vl")]
3990	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3991	pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
3992	unsafe {
3993	let r: __m256 = transmute(src:_mm256_conj_pch(a));
3994	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3995	}
3996	}
3997
3998	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3999	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4000	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4001	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4002	///
4003	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
4004	#[inline]
4005	#[target_feature(enable = "avx512fp16,avx512vl")]
4006	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4007	pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4008	_mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4009	}
4010
4011	/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4012	/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4013	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4014	///
4015	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4016	#[inline]
4017	#[target_feature(enable = "avx512fp16")]
4018	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4019	pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
4020	unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4021	}
4022
4023	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4024	/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4025	/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4026	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4027	///
4028	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4029	#[inline]
4030	#[target_feature(enable = "avx512fp16")]
4031	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4032	pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4033	unsafe {
4034	let r: __m512 = transmute(src:_mm512_conj_pch(a));
4035	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4036	}
4037	}
4038
4039	/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4040	/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4041	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4042	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4043	///
4044	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4045	#[inline]
4046	#[target_feature(enable = "avx512fp16")]
4047	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4048	pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4049	_mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4050	}
4051
4052	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4053	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4054	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4055	///
4056	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4057	#[inline]
4058	#[target_feature(enable = "avx512fp16,avx512vl")]
4059	#[cfg_attr(test, assert_instr(vfmaddcph))]
4060	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4061	pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4062	_mm_mask3_fmadd_pch(a, b, c, k:`0xff`)
4063	}
4064
4065	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4066	/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4067	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4068	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4069	///
4070	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4071	#[inline]
4072	#[target_feature(enable = "avx512fp16,avx512vl")]
4073	#[cfg_attr(test, assert_instr(vfmaddcph))]
4074	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4075	pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4076	unsafe {
4077	let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4078	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4079	}
4080	}
4081
4082	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4083	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4084	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4085	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4086	///
4087	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4088	#[inline]
4089	#[target_feature(enable = "avx512fp16,avx512vl")]
4090	#[cfg_attr(test, assert_instr(vfmaddcph))]
4091	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4092	pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4093	unsafe {
4094	transmute(src:vfmaddcph_mask3_128(
4095	a:transmute(a),
4096	b:transmute(b),
4097	c:transmute(src:c),
4098	k,
4099	))
4100	}
4101	}
4102
4103	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4104	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4105	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4106	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4107	///
4108	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4109	#[inline]
4110	#[target_feature(enable = "avx512fp16,avx512vl")]
4111	#[cfg_attr(test, assert_instr(vfmaddcph))]
4112	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4113	pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4114	unsafe {
4115	transmute(src:vfmaddcph_maskz_128(
4116	a:transmute(a),
4117	b:transmute(b),
4118	c:transmute(src:c),
4119	k,
4120	))
4121	}
4122	}
4123
4124	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4125	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4126	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4127	///
4128	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4129	#[inline]
4130	#[target_feature(enable = "avx512fp16,avx512vl")]
4131	#[cfg_attr(test, assert_instr(vfmaddcph))]
4132	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4133	pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4134	_mm256_mask3_fmadd_pch(a, b, c, k:`0xff`)
4135	}
4136
4137	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4138	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4139	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4140	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4141	///
4142	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4143	#[inline]
4144	#[target_feature(enable = "avx512fp16,avx512vl")]
4145	#[cfg_attr(test, assert_instr(vfmaddcph))]
4146	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4147	pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4148	unsafe {
4149	let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4150	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4151	}
4152	}
4153
4154	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4155	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4156	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4157	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4158	///
4159	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4160	#[inline]
4161	#[target_feature(enable = "avx512fp16,avx512vl")]
4162	#[cfg_attr(test, assert_instr(vfmaddcph))]
4163	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4164	pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4165	unsafe {
4166	transmute(src:vfmaddcph_mask3_256(
4167	a:transmute(a),
4168	b:transmute(b),
4169	c:transmute(src:c),
4170	k,
4171	))
4172	}
4173	}
4174
4175	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4176	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4177	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4178	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4179	///
4180	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4181	#[inline]
4182	#[target_feature(enable = "avx512fp16,avx512vl")]
4183	#[cfg_attr(test, assert_instr(vfmaddcph))]
4184	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4185	pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4186	unsafe {
4187	transmute(src:vfmaddcph_maskz_256(
4188	a:transmute(a),
4189	b:transmute(b),
4190	c:transmute(src:c),
4191	k,
4192	))
4193	}
4194	}
4195
4196	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4197	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4198	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4199	///
4200	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4201	#[inline]
4202	#[target_feature(enable = "avx512fp16")]
4203	#[cfg_attr(test, assert_instr(vfmaddcph))]
4204	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4205	pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4206	_mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4207	}
4208
4209	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4210	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4211	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4212	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4213	///
4214	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4215	#[inline]
4216	#[target_feature(enable = "avx512fp16")]
4217	#[cfg_attr(test, assert_instr(vfmaddcph))]
4218	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4219	pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4220	_mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4221	}
4222
4223	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4224	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4225	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4226	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4227	///
4228	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4229	#[inline]
4230	#[target_feature(enable = "avx512fp16")]
4231	#[cfg_attr(test, assert_instr(vfmaddcph))]
4232	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4233	pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4234	_mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4235	}
4236
4237	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4238	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4239	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4240	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4241	///
4242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4243	#[inline]
4244	#[target_feature(enable = "avx512fp16")]
4245	#[cfg_attr(test, assert_instr(vfmaddcph))]
4246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4247	pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4248	_mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4249	}
4250
4251	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4252	/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4253	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4254	///
4255	/// Rounding is done according to the rounding parameter, which can be one of:
4256	///
4257	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4258	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4259	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4260	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4261	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4262	///
4263	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4264	#[inline]
4265	#[target_feature(enable = "avx512fp16")]
4266	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4267	#[rustc_legacy_const_generics(`3`)]
4268	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4269	pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4270	static_assert_rounding!(ROUNDING);
4271	_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:`0xffff`)
4272	}
4273
4274	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4275	/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4276	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4277	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4278	///
4279	/// Rounding is done according to the rounding parameter, which can be one of:
4280	///
4281	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4282	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4283	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4284	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4285	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4286	///
4287	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4288	#[inline]
4289	#[target_feature(enable = "avx512fp16")]
4290	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4291	#[rustc_legacy_const_generics(`4`)]
4292	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4293	pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4294	a: __m512h,
4295	k: __mmask16,
4296	b: __m512h,
4297	c: __m512h,
4298	) -> __m512h {
4299	unsafe {
4300	static_assert_rounding!(ROUNDING);
4301	let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4302	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4303	}
4304	}
4305
4306	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4307	/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4308	/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4309	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4310	///
4311	/// Rounding is done according to the rounding parameter, which can be one of:
4312	///
4313	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4314	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4315	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4316	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4317	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4318	///
4319	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4320	#[inline]
4321	#[target_feature(enable = "avx512fp16")]
4322	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4323	#[rustc_legacy_const_generics(`4`)]
4324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4325	pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4326	a: __m512h,
4327	b: __m512h,
4328	c: __m512h,
4329	k: __mmask16,
4330	) -> __m512h {
4331	unsafe {
4332	static_assert_rounding!(ROUNDING);
4333	transmute(src:vfmaddcph_mask3_512(
4334	a:transmute(a),
4335	b:transmute(b),
4336	c:transmute(src:c),
4337	k,
4338	ROUNDING,
4339	))
4340	}
4341	}
4342
4343	/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4344	/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4345	/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4346	/// elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4347	///
4348	/// Rounding is done according to the rounding parameter, which can be one of:
4349	///
4350	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4351	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4352	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4353	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4354	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4355	///
4356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4357	#[inline]
4358	#[target_feature(enable = "avx512fp16")]
4359	#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = `8`))]
4360	#[rustc_legacy_const_generics(`4`)]
4361	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4362	pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4363	k: __mmask16,
4364	a: __m512h,
4365	b: __m512h,
4366	c: __m512h,
4367	) -> __m512h {
4368	unsafe {
4369	static_assert_rounding!(ROUNDING);
4370	transmute(src:vfmaddcph_maskz_512(
4371	a:transmute(a),
4372	b:transmute(b),
4373	c:transmute(src:c),
4374	k,
4375	ROUNDING,
4376	))
4377	}
4378	}
4379
4380	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4381	/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4382	/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4383	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4384	///
4385	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4386	#[inline]
4387	#[target_feature(enable = "avx512fp16")]
4388	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4389	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4390	pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4391	_mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4392	}
4393
4394	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4395	/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4396	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4397	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4398	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4399	///
4400	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4401	#[inline]
4402	#[target_feature(enable = "avx512fp16")]
4403	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4404	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4405	pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4406	_mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4407	}
4408
4409	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4410	/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4411	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4412	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4413	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4414	///
4415	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4416	#[inline]
4417	#[target_feature(enable = "avx512fp16")]
4418	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4419	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4420	pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4421	_mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4422	}
4423
4424	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4425	/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4426	/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4427	/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4428	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4429	///
4430	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4431	#[inline]
4432	#[target_feature(enable = "avx512fp16")]
4433	#[cfg_attr(test, assert_instr(vfmaddcsh))]
4434	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4435	pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4436	_mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4437	}
4438
4439	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4440	/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4441	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4442	///
4443	/// Rounding is done according to the rounding parameter, which can be one of:
4444	///
4445	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4446	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4447	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4448	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4449	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4450	///
4451	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4452	#[inline]
4453	#[target_feature(enable = "avx512fp16")]
4454	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4455	#[rustc_legacy_const_generics(`3`)]
4456	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4457	pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4458	unsafe {
4459	static_assert_rounding!(ROUNDING);
4460	transmute(src:vfmaddcsh_mask(
4461	a:transmute(a),
4462	b:transmute(b),
4463	c:transmute(c),
4464	k:`0xff`,
4465	ROUNDING,
4466	))
4467	}
4468	}
4469
4470	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4471	/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4472	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4473	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4474	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4475	///
4476	/// Rounding is done according to the rounding parameter, which can be one of:
4477	///
4478	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4479	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4480	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4481	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4482	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4483	///
4484	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4485	#[inline]
4486	#[target_feature(enable = "avx512fp16")]
4487	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4488	#[rustc_legacy_const_generics(`4`)]
4489	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4490	pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4491	a: __m128h,
4492	k: __mmask8,
4493	b: __m128h,
4494	c: __m128h,
4495	) -> __m128h {
4496	unsafe {
4497	static_assert_rounding!(ROUNDING);
4498	let a: __m128 = transmute(src:a);
4499	let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4500	transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4501	}
4502	}
4503
4504	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4505	/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4506	/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4507	/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4508	/// which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4509	///
4510	/// Rounding is done according to the rounding parameter, which can be one of:
4511	///
4512	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4513	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4514	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4515	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4516	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4517	///
4518	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4519	#[inline]
4520	#[target_feature(enable = "avx512fp16")]
4521	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4522	#[rustc_legacy_const_generics(`4`)]
4523	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4524	pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4525	a: __m128h,
4526	b: __m128h,
4527	c: __m128h,
4528	k: __mmask8,
4529	) -> __m128h {
4530	unsafe {
4531	static_assert_rounding!(ROUNDING);
4532	let c: __m128 = transmute(src:c);
4533	let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4534	transmute(src:_mm_move_ss(a:c, b:r))
4535	}
4536	}
4537
4538	/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4539	/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4540	/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4541	/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4542	/// defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`.*
4543	///
4544	/// Rounding is done according to the rounding parameter, which can be one of:
4545	///
4546	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4547	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4548	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4549	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4550	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4551	///
4552	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4553	#[inline]
4554	#[target_feature(enable = "avx512fp16")]
4555	#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = `8`))]
4556	#[rustc_legacy_const_generics(`4`)]
4557	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4558	pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4559	k: __mmask8,
4560	a: __m128h,
4561	b: __m128h,
4562	c: __m128h,
4563	) -> __m128h {
4564	unsafe {
4565	static_assert_rounding!(ROUNDING);
4566	transmute(src:vfmaddcsh_maskz(
4567	a:transmute(a),
4568	b:transmute(b),
4569	c:transmute(src:c),
4570	k,
4571	ROUNDING,
4572	))
4573	}
4574	}
4575
4576	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4577	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4578	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4579	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4580	///
4581	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4582	#[inline]
4583	#[target_feature(enable = "avx512fp16,avx512vl")]
4584	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4585	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4586	pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4587	_mm_mask3_fcmadd_pch(a, b, c, k:`0xff`)
4588	}
4589
4590	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4591	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4592	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4593	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4594	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4595	///
4596	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4597	#[inline]
4598	#[target_feature(enable = "avx512fp16,avx512vl")]
4599	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4600	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4601	pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4602	unsafe {
4603	let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4604	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4605	}
4606	}
4607
4608	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4609	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4610	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4611	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4612	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4613	///
4614	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4615	#[inline]
4616	#[target_feature(enable = "avx512fp16,avx512vl")]
4617	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4618	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4619	pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4620	unsafe {
4621	transmute(src:vfcmaddcph_mask3_128(
4622	a:transmute(a),
4623	b:transmute(b),
4624	c:transmute(src:c),
4625	k,
4626	))
4627	}
4628	}
4629
4630	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4631	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4632	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4633	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4634	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4635	///
4636	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4637	#[inline]
4638	#[target_feature(enable = "avx512fp16,avx512vl")]
4639	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4640	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4641	pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4642	unsafe {
4643	transmute(src:vfcmaddcph_maskz_128(
4644	a:transmute(a),
4645	b:transmute(b),
4646	c:transmute(src:c),
4647	k,
4648	))
4649	}
4650	}
4651
4652	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4653	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4654	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4655	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4656	///
4657	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4658	#[inline]
4659	#[target_feature(enable = "avx512fp16,avx512vl")]
4660	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4661	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4662	pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4663	_mm256_mask3_fcmadd_pch(a, b, c, k:`0xff`)
4664	}
4665
4666	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4667	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4668	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4669	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4670	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4671	///
4672	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4673	#[inline]
4674	#[target_feature(enable = "avx512fp16,avx512vl")]
4675	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4676	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4677	pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4678	unsafe {
4679	let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4680	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4681	}
4682	}
4683
4684	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4685	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4686	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4687	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4688	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4689	///
4690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4691	#[inline]
4692	#[target_feature(enable = "avx512fp16,avx512vl")]
4693	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4694	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4695	pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4696	unsafe {
4697	transmute(src:vfcmaddcph_mask3_256(
4698	a:transmute(a),
4699	b:transmute(b),
4700	c:transmute(src:c),
4701	k,
4702	))
4703	}
4704	}
4705
4706	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4707	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4708	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4709	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4710	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4711	///
4712	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4713	#[inline]
4714	#[target_feature(enable = "avx512fp16,avx512vl")]
4715	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4716	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4717	pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4718	unsafe {
4719	transmute(src:vfcmaddcph_maskz_256(
4720	a:transmute(a),
4721	b:transmute(b),
4722	c:transmute(src:c),
4723	k,
4724	))
4725	}
4726	}
4727
4728	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4729	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4730	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4731	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4732	///
4733	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4734	#[inline]
4735	#[target_feature(enable = "avx512fp16")]
4736	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4737	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4738	pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4739	_mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4740	}
4741
4742	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4743	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4744	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4745	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4746	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4747	///
4748	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4749	#[inline]
4750	#[target_feature(enable = "avx512fp16")]
4751	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4752	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4753	pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4754	_mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4755	}
4756
4757	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4758	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4759	/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4760	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4761	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4762	///
4763	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4764	#[inline]
4765	#[target_feature(enable = "avx512fp16")]
4766	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4767	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4768	pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4769	_mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4770	}
4771
4772	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4773	/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4774	/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4775	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4776	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4777	///
4778	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4779	#[inline]
4780	#[target_feature(enable = "avx512fp16")]
4781	#[cfg_attr(test, assert_instr(vfcmaddcph))]
4782	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4783	pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4784	_mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4785	}
4786
4787	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4788	/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4789	/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4790	/// `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4791	///
4792	/// Rounding is done according to the rounding parameter, which can be one of:
4793	///
4794	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4795	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4796	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4797	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4798	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4799	///
4800	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4801	#[inline]
4802	#[target_feature(enable = "avx512fp16")]
4803	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4804	#[rustc_legacy_const_generics(`3`)]
4805	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4806	pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4807	static_assert_rounding!(ROUNDING);
4808	_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:`0xffff`)
4809	}
4810
4811	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4812	/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4813	/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4814	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4815	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4816	///
4817	/// Rounding is done according to the rounding parameter, which can be one of:
4818	///
4819	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4820	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4821	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4822	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4823	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4824	///
4825	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
4826	#[inline]
4827	#[target_feature(enable = "avx512fp16")]
4828	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4829	#[rustc_legacy_const_generics(`4`)]
4830	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4831	pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
4832	a: __m512h,
4833	k: __mmask16,
4834	b: __m512h,
4835	c: __m512h,
4836	) -> __m512h {
4837	unsafe {
4838	static_assert_rounding!(ROUNDING);
4839	let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4840	transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4841	}
4842	}
4843
4844	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4845	/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
4846	/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4847	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
4848	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4849	///
4850	/// Rounding is done according to the rounding parameter, which can be one of:
4851	///
4852	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4853	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4854	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4855	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4856	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4857	///
4858	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
4859	#[inline]
4860	#[target_feature(enable = "avx512fp16")]
4861	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4862	#[rustc_legacy_const_generics(`4`)]
4863	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4864	pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
4865	a: __m512h,
4866	b: __m512h,
4867	c: __m512h,
4868	k: __mmask16,
4869	) -> __m512h {
4870	unsafe {
4871	static_assert_rounding!(ROUNDING);
4872	transmute(src:vfcmaddcph_mask3_512(
4873	a:transmute(a),
4874	b:transmute(b),
4875	c:transmute(src:c),
4876	k,
4877	ROUNDING,
4878	))
4879	}
4880	}
4881
4882	/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4883	/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
4884	/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4885	/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
4886	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4887	///
4888	/// Rounding is done according to the rounding parameter, which can be one of:
4889	///
4890	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4891	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4892	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4893	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4894	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4895	///
4896	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
4897	#[inline]
4898	#[target_feature(enable = "avx512fp16")]
4899	#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = `8`))]
4900	#[rustc_legacy_const_generics(`4`)]
4901	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4902	pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
4903	k: __mmask16,
4904	a: __m512h,
4905	b: __m512h,
4906	c: __m512h,
4907	) -> __m512h {
4908	unsafe {
4909	static_assert_rounding!(ROUNDING);
4910	transmute(src:vfcmaddcph_maskz_512(
4911	a:transmute(a),
4912	b:transmute(b),
4913	c:transmute(src:c),
4914	k,
4915	ROUNDING,
4916	))
4917	}
4918	}
4919
4920	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4921	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4922	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4923	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4924	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4925	///
4926	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
4927	#[inline]
4928	#[target_feature(enable = "avx512fp16")]
4929	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4930	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4931	pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4932	_mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4933	}
4934
4935	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4936	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4937	/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
4938	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4939	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4940	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4941	///
4942	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
4943	#[inline]
4944	#[target_feature(enable = "avx512fp16")]
4945	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4946	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4947	pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4948	_mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4949	}
4950
4951	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4952	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4953	/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
4954	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4955	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4956	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4957	///
4958	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
4959	#[inline]
4960	#[target_feature(enable = "avx512fp16")]
4961	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4962	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4963	pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4964	_mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4965	}
4966
4967	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4968	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4969	/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
4970	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4971	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
4972	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
4973	///
4974	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
4975	#[inline]
4976	#[target_feature(enable = "avx512fp16")]
4977	#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4978	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4979	pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4980	_mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4981	}
4982
4983	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4984	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4985	/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4986	/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4987	/// number `complex = vec.fp16[0] + i vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.*
4988	///
4989	/// Rounding is done according to the rounding parameter, which can be one of:
4990	///
4991	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4992	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4993	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4994	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4995	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4996	///
4997	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
4998	#[inline]
4999	#[target_feature(enable = "avx512fp16")]
5000	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5001	#[rustc_legacy_const_generics(`3`)]
5002	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5003	pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5004	unsafe {
5005	static_assert_rounding!(ROUNDING);
5006	transmute(src:vfcmaddcsh_mask(
5007	a:transmute(a),
5008	b:transmute(b),
5009	c:transmute(c),
5010	k:`0xff`,
5011	ROUNDING,
5012	))
5013	}
5014	}
5015
5016	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5017	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5018	/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5019	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5020	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
5021	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5022	///
5023	/// Rounding is done according to the rounding parameter, which can be one of:
5024	///
5025	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5026	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5027	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5028	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5029	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5030	///
5031	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5032	#[inline]
5033	#[target_feature(enable = "avx512fp16")]
5034	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5035	#[rustc_legacy_const_generics(`4`)]
5036	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5037	pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5038	a: __m128h,
5039	k: __mmask8,
5040	b: __m128h,
5041	c: __m128h,
5042	) -> __m128h {
5043	unsafe {
5044	static_assert_rounding!(ROUNDING);
5045	let a: __m128 = transmute(src:a);
5046	let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5047	transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5048	}
5049	}
5050
5051	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5052	/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5053	/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5054	/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5055	/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1]`,*
5056	/// or the complex conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5057	///
5058	/// Rounding is done according to the rounding parameter, which can be one of:
5059	///
5060	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5061	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5062	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5063	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5064	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5065	///
5066	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5067	#[inline]
5068	#[target_feature(enable = "avx512fp16")]
5069	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5070	#[rustc_legacy_const_generics(`4`)]
5071	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5072	pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5073	a: __m128h,
5074	b: __m128h,
5075	c: __m128h,
5076	k: __mmask8,
5077	) -> __m128h {
5078	unsafe {
5079	static_assert_rounding!(ROUNDING);
5080	let c: __m128 = transmute(src:c);
5081	let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5082	transmute(src:_mm_move_ss(a:c, b:r))
5083	}
5084	}
5085
5086	/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5087	/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5088	/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5089	/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5090	/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i vec.fp16[1`, or the complex*
5091	/// conjugate `conjugate = vec.fp16[0] - i vec.fp16[1]`.*
5092	///
5093	/// Rounding is done according to the rounding parameter, which can be one of:
5094	///
5095	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5096	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5097	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5098	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5099	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5100	///
5101	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5102	#[inline]
5103	#[target_feature(enable = "avx512fp16")]
5104	#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = `8`))]
5105	#[rustc_legacy_const_generics(`4`)]
5106	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5107	pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5108	k: __mmask8,
5109	a: __m128h,
5110	b: __m128h,
5111	c: __m128h,
5112	) -> __m128h {
5113	unsafe {
5114	static_assert_rounding!(ROUNDING);
5115	transmute(src:vfcmaddcsh_maskz(
5116	a:transmute(a),
5117	b:transmute(b),
5118	c:transmute(src:c),
5119	k,
5120	ROUNDING,
5121	))
5122	}
5123	}
5124
5125	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5126	/// result to packed elements in c, and store the results in dst.
5127	///
5128	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5129	#[inline]
5130	#[target_feature(enable = "avx512fp16,avx512vl")]
5131	#[cfg_attr(test, assert_instr(vfmadd))]
5132	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5133	pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5134	unsafe { simd_fma(x:a, y:b, z:c) }
5135	}
5136
5137	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5138	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5139	/// from a when the corresponding mask bit is not set).
5140	///
5141	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5142	#[inline]
5143	#[target_feature(enable = "avx512fp16,avx512vl")]
5144	#[cfg_attr(test, assert_instr(vfmadd))]
5145	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5146	pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5147	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5148	}
5149
5150	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5151	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5152	/// from c when the corresponding mask bit is not set).
5153	///
5154	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5155	#[inline]
5156	#[target_feature(enable = "avx512fp16,avx512vl")]
5157	#[cfg_attr(test, assert_instr(vfmadd))]
5158	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5159	pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5160	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5161	}
5162
5163	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5164	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5165	/// out when the corresponding mask bit is not set).
5166	///
5167	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5168	#[inline]
5169	#[target_feature(enable = "avx512fp16,avx512vl")]
5170	#[cfg_attr(test, assert_instr(vfmadd))]
5171	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5172	pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5173	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5174	}
5175
5176	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5177	/// result to packed elements in c, and store the results in dst.
5178	///
5179	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5180	#[inline]
5181	#[target_feature(enable = "avx512fp16,avx512vl")]
5182	#[cfg_attr(test, assert_instr(vfmadd))]
5183	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5184	pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5185	unsafe { simd_fma(x:a, y:b, z:c) }
5186	}
5187
5188	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5189	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5190	/// from a when the corresponding mask bit is not set).
5191	///
5192	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5193	#[inline]
5194	#[target_feature(enable = "avx512fp16,avx512vl")]
5195	#[cfg_attr(test, assert_instr(vfmadd))]
5196	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5197	pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5198	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5199	}
5200
5201	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5202	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5203	/// from c when the corresponding mask bit is not set).
5204	///
5205	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5206	#[inline]
5207	#[target_feature(enable = "avx512fp16,avx512vl")]
5208	#[cfg_attr(test, assert_instr(vfmadd))]
5209	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5210	pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5211	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5212	}
5213
5214	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5215	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5216	/// out when the corresponding mask bit is not set).
5217	///
5218	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5219	#[inline]
5220	#[target_feature(enable = "avx512fp16,avx512vl")]
5221	#[cfg_attr(test, assert_instr(vfmadd))]
5222	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5223	pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5224	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5225	}
5226
5227	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5228	/// result to packed elements in c, and store the results in dst.
5229	///
5230	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5231	#[inline]
5232	#[target_feature(enable = "avx512fp16")]
5233	#[cfg_attr(test, assert_instr(vfmadd))]
5234	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5235	pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5236	unsafe { simd_fma(x:a, y:b, z:c) }
5237	}
5238
5239	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5240	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5241	/// from a when the corresponding mask bit is not set).
5242	///
5243	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5244	#[inline]
5245	#[target_feature(enable = "avx512fp16")]
5246	#[cfg_attr(test, assert_instr(vfmadd))]
5247	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5248	pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5249	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5250	}
5251
5252	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5253	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5254	/// from c when the corresponding mask bit is not set).
5255	///
5256	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5257	#[inline]
5258	#[target_feature(enable = "avx512fp16")]
5259	#[cfg_attr(test, assert_instr(vfmadd))]
5260	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5261	pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5262	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5263	}
5264
5265	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5266	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5267	/// out when the corresponding mask bit is not set).
5268	///
5269	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5270	#[inline]
5271	#[target_feature(enable = "avx512fp16")]
5272	#[cfg_attr(test, assert_instr(vfmadd))]
5273	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5274	pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5275	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5276	}
5277
5278	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5279	/// result to packed elements in c, and store the results in dst.
5280	///
5281	/// Rounding is done according to the rounding parameter, which can be one of:
5282	///
5283	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5284	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5285	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5286	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5287	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5288	///
5289	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5290	#[inline]
5291	#[target_feature(enable = "avx512fp16")]
5292	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5293	#[rustc_legacy_const_generics(`3`)]
5294	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5295	pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5296	unsafe {
5297	static_assert_rounding!(ROUNDING);
5298	vfmaddph_512(a, b, c, ROUNDING)
5299	}
5300	}
5301
5302	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5303	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5304	/// from a when the corresponding mask bit is not set).
5305	///
5306	/// Rounding is done according to the rounding parameter, which can be one of:
5307	///
5308	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5309	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5310	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5311	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5312	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5313	///
5314	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5315	#[inline]
5316	#[target_feature(enable = "avx512fp16")]
5317	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5318	#[rustc_legacy_const_generics(`4`)]
5319	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5320	pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5321	a: __m512h,
5322	k: __mmask32,
5323	b: __m512h,
5324	c: __m512h,
5325	) -> __m512h {
5326	unsafe {
5327	static_assert_rounding!(ROUNDING);
5328	simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5329	}
5330	}
5331
5332	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5333	/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5334	/// from c when the corresponding mask bit is not set).
5335	///
5336	/// Rounding is done according to the rounding parameter, which can be one of:
5337	///
5338	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5339	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5340	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5341	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5342	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5343	///
5344	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5345	#[inline]
5346	#[target_feature(enable = "avx512fp16")]
5347	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5348	#[rustc_legacy_const_generics(`4`)]
5349	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5350	pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5351	a: __m512h,
5352	b: __m512h,
5353	c: __m512h,
5354	k: __mmask32,
5355	) -> __m512h {
5356	unsafe {
5357	static_assert_rounding!(ROUNDING);
5358	simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5359	}
5360	}
5361
5362	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5363	/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5364	/// out when the corresponding mask bit is not set).
5365	///
5366	/// Rounding is done according to the rounding parameter, which can be one of:
5367	///
5368	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5369	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5370	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5371	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5372	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5373	///
5374	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5375	#[inline]
5376	#[target_feature(enable = "avx512fp16")]
5377	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5378	#[rustc_legacy_const_generics(`4`)]
5379	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5380	pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5381	k: __mmask32,
5382	a: __m512h,
5383	b: __m512h,
5384	c: __m512h,
5385	) -> __m512h {
5386	unsafe {
5387	static_assert_rounding!(ROUNDING);
5388	simd_select_bitmask(
5389	m:k,
5390	yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5391	no:_mm512_setzero_ph(),
5392	)
5393	}
5394	}
5395
5396	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5397	/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5398	/// 7 packed elements from a to the upper elements of dst.
5399	///
5400	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5401	#[inline]
5402	#[target_feature(enable = "avx512fp16")]
5403	#[cfg_attr(test, assert_instr(vfmadd))]
5404	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5405	pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5406	unsafe {
5407	let extracta: f16 = simd_extract!(a, `0`);
5408	let extractb: f16 = simd_extract!(b, `0`);
5409	let extractc: f16 = simd_extract!(c, `0`);
5410	let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5411	simd_insert!(a, `0`, r)
5412	}
5413	}
5414
5415	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5416	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5417	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5418	/// upper elements of dst.
5419	///
5420	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5421	#[inline]
5422	#[target_feature(enable = "avx512fp16")]
5423	#[cfg_attr(test, assert_instr(vfmadd))]
5424	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5425	pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5426	unsafe {
5427	let mut fmadd: f16 = simd_extract!(a, `0`);
5428	if k & `1` != `0` {
5429	let extractb: f16 = simd_extract!(b, `0`);
5430	let extractc: f16 = simd_extract!(c, `0`);
5431	fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5432	}
5433	simd_insert!(a, `0`, fmadd)
5434	}
5435	}
5436
5437	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5438	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5439	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5440	/// upper elements of dst.
5441	///
5442	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5443	#[inline]
5444	#[target_feature(enable = "avx512fp16")]
5445	#[cfg_attr(test, assert_instr(vfmadd))]
5446	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5447	pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5448	unsafe {
5449	let mut fmadd: f16 = simd_extract!(c, `0`);
5450	if k & `1` != `0` {
5451	let extracta: f16 = simd_extract!(a, `0`);
5452	let extractb: f16 = simd_extract!(b, `0`);
5453	fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5454	}
5455	simd_insert!(c, `0`, fmadd)
5456	}
5457	}
5458
5459	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5460	/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5461	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5462	/// upper elements of dst.
5463	///
5464	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5465	#[inline]
5466	#[target_feature(enable = "avx512fp16")]
5467	#[cfg_attr(test, assert_instr(vfmadd))]
5468	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5469	pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5470	unsafe {
5471	let mut fmadd: f16 = `0.0`;
5472	if k & `1` != `0` {
5473	let extracta: f16 = simd_extract!(a, `0`);
5474	let extractb: f16 = simd_extract!(b, `0`);
5475	let extractc: f16 = simd_extract!(c, `0`);
5476	fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5477	}
5478	simd_insert!(a, `0`, fmadd)
5479	}
5480	}
5481
5482	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5483	/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5484	/// 7 packed elements from a to the upper elements of dst.
5485	///
5486	/// Rounding is done according to the rounding parameter, which can be one of:
5487	///
5488	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5489	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5490	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5491	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5492	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5493	///
5494	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5495	#[inline]
5496	#[target_feature(enable = "avx512fp16")]
5497	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5498	#[rustc_legacy_const_generics(`3`)]
5499	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5500	pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5501	unsafe {
5502	static_assert_rounding!(ROUNDING);
5503	let extracta: f16 = simd_extract!(a, `0`);
5504	let extractb: f16 = simd_extract!(b, `0`);
5505	let extractc: f16 = simd_extract!(c, `0`);
5506	let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5507	simd_insert!(a, `0`, r)
5508	}
5509	}
5510
5511	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5512	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5513	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5514	/// upper elements of dst.
5515	///
5516	/// Rounding is done according to the rounding parameter, which can be one of:
5517	///
5518	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5519	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5520	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5521	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5522	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5523	///
5524	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5525	#[inline]
5526	#[target_feature(enable = "avx512fp16")]
5527	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5528	#[rustc_legacy_const_generics(`4`)]
5529	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5530	pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5531	a: __m128h,
5532	k: __mmask8,
5533	b: __m128h,
5534	c: __m128h,
5535	) -> __m128h {
5536	unsafe {
5537	static_assert_rounding!(ROUNDING);
5538	let mut fmadd: f16 = simd_extract!(a, `0`);
5539	if k & `1` != `0` {
5540	let extractb: f16 = simd_extract!(b, `0`);
5541	let extractc: f16 = simd_extract!(c, `0`);
5542	fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5543	}
5544	simd_insert!(a, `0`, fmadd)
5545	}
5546	}
5547
5548	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5549	/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5550	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5551	/// upper elements of dst.
5552	///
5553	/// Rounding is done according to the rounding parameter, which can be one of:
5554	///
5555	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5556	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5557	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5558	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5559	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560	///
5561	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5562	#[inline]
5563	#[target_feature(enable = "avx512fp16")]
5564	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5565	#[rustc_legacy_const_generics(`4`)]
5566	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5567	pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5568	a: __m128h,
5569	b: __m128h,
5570	c: __m128h,
5571	k: __mmask8,
5572	) -> __m128h {
5573	unsafe {
5574	static_assert_rounding!(ROUNDING);
5575	let mut fmadd: f16 = simd_extract!(c, `0`);
5576	if k & `1` != `0` {
5577	let extracta: f16 = simd_extract!(a, `0`);
5578	let extractb: f16 = simd_extract!(b, `0`);
5579	fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5580	}
5581	simd_insert!(c, `0`, fmadd)
5582	}
5583	}
5584
5585	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5586	/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5587	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5588	/// upper elements of dst.
5589	///
5590	/// Rounding is done according to the rounding parameter, which can be one of:
5591	///
5592	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5593	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5594	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5595	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5596	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5597	///
5598	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5599	#[inline]
5600	#[target_feature(enable = "avx512fp16")]
5601	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
5602	#[rustc_legacy_const_generics(`4`)]
5603	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5604	pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5605	k: __mmask8,
5606	a: __m128h,
5607	b: __m128h,
5608	c: __m128h,
5609	) -> __m128h {
5610	unsafe {
5611	static_assert_rounding!(ROUNDING);
5612	let mut fmadd: f16 = `0.0`;
5613	if k & `1` != `0` {
5614	let extracta: f16 = simd_extract!(a, `0`);
5615	let extractb: f16 = simd_extract!(b, `0`);
5616	let extractc: f16 = simd_extract!(c, `0`);
5617	fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5618	}
5619	simd_insert!(a, `0`, fmadd)
5620	}
5621	}
5622
5623	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5624	/// in c from the intermediate result, and store the results in dst.
5625	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5626	///
5627	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5628	#[inline]
5629	#[target_feature(enable = "avx512fp16,avx512vl")]
5630	#[cfg_attr(test, assert_instr(vfmsub))]
5631	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5632	pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5633	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5634	}
5635
5636	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5637	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5638	/// from a when the corresponding mask bit is not set).
5639	///
5640	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5641	#[inline]
5642	#[target_feature(enable = "avx512fp16,avx512vl")]
5643	#[cfg_attr(test, assert_instr(vfmsub))]
5644	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5645	pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5646	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5647	}
5648
5649	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5650	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5651	/// from c when the corresponding mask bit is not set).
5652	///
5653	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5654	#[inline]
5655	#[target_feature(enable = "avx512fp16,avx512vl")]
5656	#[cfg_attr(test, assert_instr(vfmsub))]
5657	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5658	pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5659	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5660	}
5661
5662	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5663	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5664	/// out when the corresponding mask bit is not set).
5665	///
5666	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5667	#[inline]
5668	#[target_feature(enable = "avx512fp16,avx512vl")]
5669	#[cfg_attr(test, assert_instr(vfmsub))]
5670	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5671	pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5672	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5673	}
5674
5675	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5676	/// in c from the intermediate result, and store the results in dst.
5677	///
5678	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5679	#[inline]
5680	#[target_feature(enable = "avx512fp16,avx512vl")]
5681	#[cfg_attr(test, assert_instr(vfmsub))]
5682	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5683	pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5684	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5685	}
5686
5687	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5688	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5689	/// from a when the corresponding mask bit is not set).
5690	///
5691	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5692	#[inline]
5693	#[target_feature(enable = "avx512fp16,avx512vl")]
5694	#[cfg_attr(test, assert_instr(vfmsub))]
5695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5696	pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5697	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5698	}
5699
5700	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5701	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5702	/// from c when the corresponding mask bit is not set).
5703	///
5704	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5705	#[inline]
5706	#[target_feature(enable = "avx512fp16,avx512vl")]
5707	#[cfg_attr(test, assert_instr(vfmsub))]
5708	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5709	pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5710	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5711	}
5712
5713	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5714	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5715	/// out when the corresponding mask bit is not set).
5716	///
5717	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5718	#[inline]
5719	#[target_feature(enable = "avx512fp16,avx512vl")]
5720	#[cfg_attr(test, assert_instr(vfmsub))]
5721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5722	pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5723	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5724	}
5725
5726	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5727	/// in c from the intermediate result, and store the results in dst.
5728	///
5729	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5730	#[inline]
5731	#[target_feature(enable = "avx512fp16")]
5732	#[cfg_attr(test, assert_instr(vfmsub))]
5733	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5734	pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5735	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5736	}
5737
5738	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5739	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5740	/// from a when the corresponding mask bit is not set).
5741	///
5742	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5743	#[inline]
5744	#[target_feature(enable = "avx512fp16")]
5745	#[cfg_attr(test, assert_instr(vfmsub))]
5746	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5747	pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5748	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5749	}
5750
5751	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5752	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5753	/// from c when the corresponding mask bit is not set).
5754	///
5755	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5756	#[inline]
5757	#[target_feature(enable = "avx512fp16")]
5758	#[cfg_attr(test, assert_instr(vfmsub))]
5759	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5760	pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5761	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5762	}
5763
5764	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5765	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5766	/// out when the corresponding mask bit is not set).
5767	///
5768	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5769	#[inline]
5770	#[target_feature(enable = "avx512fp16")]
5771	#[cfg_attr(test, assert_instr(vfmsub))]
5772	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5773	pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5774	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5775	}
5776
5777	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5778	/// in c from the intermediate result, and store the results in dst.
5779	///
5780	/// Rounding is done according to the rounding parameter, which can be one of:
5781	///
5782	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5783	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5784	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5785	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5786	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5787	///
5788	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
5789	#[inline]
5790	#[target_feature(enable = "avx512fp16")]
5791	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5792	#[rustc_legacy_const_generics(`3`)]
5793	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5794	pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5795	unsafe {
5796	static_assert_rounding!(ROUNDING);
5797	vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
5798	}
5799	}
5800
5801	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5802	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5803	/// from a when the corresponding mask bit is not set).
5804	///
5805	/// Rounding is done according to the rounding parameter, which can be one of:
5806	///
5807	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5808	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5809	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5810	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5811	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812	///
5813	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
5814	#[inline]
5815	#[target_feature(enable = "avx512fp16")]
5816	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5817	#[rustc_legacy_const_generics(`4`)]
5818	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5819	pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
5820	a: __m512h,
5821	k: __mmask32,
5822	b: __m512h,
5823	c: __m512h,
5824	) -> __m512h {
5825	unsafe {
5826	static_assert_rounding!(ROUNDING);
5827	simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
5828	}
5829	}
5830
5831	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5832	/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5833	/// from c when the corresponding mask bit is not set).
5834	///
5835	/// Rounding is done according to the rounding parameter, which can be one of:
5836	///
5837	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5838	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5839	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5840	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5841	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5842	///
5843	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
5844	#[inline]
5845	#[target_feature(enable = "avx512fp16")]
5846	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5847	#[rustc_legacy_const_generics(`4`)]
5848	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5849	pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
5850	a: __m512h,
5851	b: __m512h,
5852	c: __m512h,
5853	k: __mmask32,
5854	) -> __m512h {
5855	unsafe {
5856	static_assert_rounding!(ROUNDING);
5857	simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
5858	}
5859	}
5860
5861	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5862	/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5863	/// out when the corresponding mask bit is not set).
5864	///
5865	/// Rounding is done according to the rounding parameter, which can be one of:
5866	///
5867	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5868	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5869	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5870	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5871	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872	///
5873	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
5874	#[inline]
5875	#[target_feature(enable = "avx512fp16")]
5876	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5877	#[rustc_legacy_const_generics(`4`)]
5878	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5879	pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
5880	k: __mmask32,
5881	a: __m512h,
5882	b: __m512h,
5883	c: __m512h,
5884	) -> __m512h {
5885	unsafe {
5886	static_assert_rounding!(ROUNDING);
5887	simd_select_bitmask(
5888	m:k,
5889	yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
5890	no:_mm512_setzero_ph(),
5891	)
5892	}
5893	}
5894
5895	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5896	/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5897	/// 7 packed elements from a to the upper elements of dst.
5898	///
5899	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
5900	#[inline]
5901	#[target_feature(enable = "avx512fp16")]
5902	#[cfg_attr(test, assert_instr(vfmsub))]
5903	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5904	pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5905	unsafe {
5906	let extracta: f16 = simd_extract!(a, `0`);
5907	let extractb: f16 = simd_extract!(b, `0`);
5908	let extractc: f16 = simd_extract!(c, `0`);
5909	let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
5910	simd_insert!(a, `0`, r)
5911	}
5912	}
5913
5914	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5915	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5916	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5917	/// upper elements of dst.
5918	///
5919	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
5920	#[inline]
5921	#[target_feature(enable = "avx512fp16")]
5922	#[cfg_attr(test, assert_instr(vfmsub))]
5923	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5924	pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5925	unsafe {
5926	let mut fmsub: f16 = simd_extract!(a, `0`);
5927	if k & `1` != `0` {
5928	let extractb: f16 = simd_extract!(b, `0`);
5929	let extractc: f16 = simd_extract!(c, `0`);
5930	fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
5931	}
5932	simd_insert!(a, `0`, fmsub)
5933	}
5934	}
5935
5936	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5937	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5938	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5939	/// upper elements of dst.
5940	///
5941	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
5942	#[inline]
5943	#[target_feature(enable = "avx512fp16")]
5944	#[cfg_attr(test, assert_instr(vfmsub))]
5945	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5946	pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5947	unsafe {
5948	let mut fmsub: f16 = simd_extract!(c, `0`);
5949	if k & `1` != `0` {
5950	let extracta: f16 = simd_extract!(a, `0`);
5951	let extractb: f16 = simd_extract!(b, `0`);
5952	fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
5953	}
5954	simd_insert!(c, `0`, fmsub)
5955	}
5956	}
5957
5958	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5959	/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
5960	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5961	/// upper elements of dst.
5962	///
5963	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
5964	#[inline]
5965	#[target_feature(enable = "avx512fp16")]
5966	#[cfg_attr(test, assert_instr(vfmsub))]
5967	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5968	pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5969	unsafe {
5970	let mut fmsub: f16 = `0.0`;
5971	if k & `1` != `0` {
5972	let extracta: f16 = simd_extract!(a, `0`);
5973	let extractb: f16 = simd_extract!(b, `0`);
5974	let extractc: f16 = simd_extract!(c, `0`);
5975	fmsub = fmaf16(a:extracta, b:extractb, -extractc);
5976	}
5977	simd_insert!(a, `0`, fmsub)
5978	}
5979	}
5980
5981	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5982	/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5983	/// 7 packed elements from a to the upper elements of dst.
5984	///
5985	/// Rounding is done according to the rounding parameter, which can be one of:
5986	///
5987	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5988	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5989	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5990	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5991	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5992	///
5993	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
5994	#[inline]
5995	#[target_feature(enable = "avx512fp16")]
5996	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
5997	#[rustc_legacy_const_generics(`3`)]
5998	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5999	pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6000	unsafe {
6001	static_assert_rounding!(ROUNDING);
6002	let extracta: f16 = simd_extract!(a, `0`);
6003	let extractb: f16 = simd_extract!(b, `0`);
6004	let extractc: f16 = simd_extract!(c, `0`);
6005	let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6006	simd_insert!(a, `0`, r)
6007	}
6008	}
6009
6010	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6011	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6012	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6013	/// upper elements of dst.
6014	///
6015	/// Rounding is done according to the rounding parameter, which can be one of:
6016	///
6017	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6018	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6019	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6020	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6021	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6022	///
6023	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6024	#[inline]
6025	#[target_feature(enable = "avx512fp16")]
6026	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6027	#[rustc_legacy_const_generics(`4`)]
6028	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6029	pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6030	a: __m128h,
6031	k: __mmask8,
6032	b: __m128h,
6033	c: __m128h,
6034	) -> __m128h {
6035	unsafe {
6036	static_assert_rounding!(ROUNDING);
6037	let mut fmsub: f16 = simd_extract!(a, `0`);
6038	if k & `1` != `0` {
6039	let extractb: f16 = simd_extract!(b, `0`);
6040	let extractc: f16 = simd_extract!(c, `0`);
6041	fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6042	}
6043	simd_insert!(a, `0`, fmsub)
6044	}
6045	}
6046
6047	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6048	/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6049	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6050	/// upper elements of dst.
6051	///
6052	/// Rounding is done according to the rounding parameter, which can be one of:
6053	///
6054	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6055	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6056	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6057	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6058	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6059	///
6060	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6061	#[inline]
6062	#[target_feature(enable = "avx512fp16")]
6063	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6064	#[rustc_legacy_const_generics(`4`)]
6065	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6066	pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6067	a: __m128h,
6068	b: __m128h,
6069	c: __m128h,
6070	k: __mmask8,
6071	) -> __m128h {
6072	unsafe {
6073	static_assert_rounding!(ROUNDING);
6074	let mut fmsub: f16 = simd_extract!(c, `0`);
6075	if k & `1` != `0` {
6076	let extracta: f16 = simd_extract!(a, `0`);
6077	let extractb: f16 = simd_extract!(b, `0`);
6078	fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6079	}
6080	simd_insert!(c, `0`, fmsub)
6081	}
6082	}
6083
6084	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6085	/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6086	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6087	/// upper elements of dst.
6088	///
6089	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6090	#[inline]
6091	#[target_feature(enable = "avx512fp16")]
6092	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
6093	#[rustc_legacy_const_generics(`4`)]
6094	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6095	pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6096	k: __mmask8,
6097	a: __m128h,
6098	b: __m128h,
6099	c: __m128h,
6100	) -> __m128h {
6101	unsafe {
6102	static_assert_rounding!(ROUNDING);
6103	let mut fmsub: f16 = `0.0`;
6104	if k & `1` != `0` {
6105	let extracta: f16 = simd_extract!(a, `0`);
6106	let extractb: f16 = simd_extract!(b, `0`);
6107	let extractc: f16 = simd_extract!(c, `0`);
6108	fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6109	}
6110	simd_insert!(a, `0`, fmsub)
6111	}
6112	}
6113
6114	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6115	/// result from packed elements in c, and store the results in dst.
6116	///
6117	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6118	#[inline]
6119	#[target_feature(enable = "avx512fp16,avx512vl")]
6120	#[cfg_attr(test, assert_instr(vfnmadd))]
6121	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6122	pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6123	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6124	}
6125
6126	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6127	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6128	/// from a when the corresponding mask bit is not set).
6129	///
6130	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6131	#[inline]
6132	#[target_feature(enable = "avx512fp16,avx512vl")]
6133	#[cfg_attr(test, assert_instr(vfnmadd))]
6134	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6135	pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6136	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6137	}
6138
6139	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6140	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6141	/// from c when the corresponding mask bit is not set).
6142	///
6143	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6144	#[inline]
6145	#[target_feature(enable = "avx512fp16,avx512vl")]
6146	#[cfg_attr(test, assert_instr(vfnmadd))]
6147	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6148	pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6149	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6150	}
6151
6152	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6153	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6154	/// out when the corresponding mask bit is not set).
6155	///
6156	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6157	#[inline]
6158	#[target_feature(enable = "avx512fp16,avx512vl")]
6159	#[cfg_attr(test, assert_instr(vfnmadd))]
6160	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6161	pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6162	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6163	}
6164
6165	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6166	/// result from packed elements in c, and store the results in dst.
6167	///
6168	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6169	#[inline]
6170	#[target_feature(enable = "avx512fp16,avx512vl")]
6171	#[cfg_attr(test, assert_instr(vfnmadd))]
6172	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6173	pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6174	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6175	}
6176
6177	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6178	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6179	/// from a when the corresponding mask bit is not set).
6180	///
6181	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6182	#[inline]
6183	#[target_feature(enable = "avx512fp16,avx512vl")]
6184	#[cfg_attr(test, assert_instr(vfnmadd))]
6185	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6186	pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6187	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6188	}
6189
6190	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6191	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6192	/// from c when the corresponding mask bit is not set).
6193	///
6194	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6195	#[inline]
6196	#[target_feature(enable = "avx512fp16,avx512vl")]
6197	#[cfg_attr(test, assert_instr(vfnmadd))]
6198	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6199	pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6200	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6201	}
6202
6203	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6204	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6205	/// out when the corresponding mask bit is not set).
6206	///
6207	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6208	#[inline]
6209	#[target_feature(enable = "avx512fp16,avx512vl")]
6210	#[cfg_attr(test, assert_instr(vfnmadd))]
6211	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6212	pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6213	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6214	}
6215
6216	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6217	/// result from packed elements in c, and store the results in dst.
6218	///
6219	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6220	#[inline]
6221	#[target_feature(enable = "avx512fp16")]
6222	#[cfg_attr(test, assert_instr(vfnmadd))]
6223	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6224	pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6225	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6226	}
6227
6228	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6229	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6230	/// from a when the corresponding mask bit is not set).
6231	///
6232	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6233	#[inline]
6234	#[target_feature(enable = "avx512fp16")]
6235	#[cfg_attr(test, assert_instr(vfnmadd))]
6236	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6237	pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6238	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6239	}
6240
6241	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6242	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6243	/// from c when the corresponding mask bit is not set).
6244	///
6245	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6246	#[inline]
6247	#[target_feature(enable = "avx512fp16")]
6248	#[cfg_attr(test, assert_instr(vfnmadd))]
6249	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6250	pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6251	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6252	}
6253
6254	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6255	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6256	/// out when the corresponding mask bit is not set).
6257	///
6258	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6259	#[inline]
6260	#[target_feature(enable = "avx512fp16")]
6261	#[cfg_attr(test, assert_instr(vfnmadd))]
6262	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6263	pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6264	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6265	}
6266
6267	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6268	/// result from packed elements in c, and store the results in dst.
6269	///
6270	/// Rounding is done according to the rounding parameter, which can be one of:
6271	///
6272	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6273	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6274	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6275	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6276	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6277	///
6278	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6279	#[inline]
6280	#[target_feature(enable = "avx512fp16")]
6281	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6282	#[rustc_legacy_const_generics(`3`)]
6283	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6284	pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6285	unsafe {
6286	static_assert_rounding!(ROUNDING);
6287	vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6288	}
6289	}
6290
6291	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6292	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6293	/// from a when the corresponding mask bit is not set).
6294	///
6295	/// Rounding is done according to the rounding parameter, which can be one of:
6296	///
6297	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6298	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6299	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6300	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6301	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6302	///
6303	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6304	#[inline]
6305	#[target_feature(enable = "avx512fp16")]
6306	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6307	#[rustc_legacy_const_generics(`4`)]
6308	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6309	pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6310	a: __m512h,
6311	k: __mmask32,
6312	b: __m512h,
6313	c: __m512h,
6314	) -> __m512h {
6315	unsafe {
6316	static_assert_rounding!(ROUNDING);
6317	simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6318	}
6319	}
6320
6321	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6322	/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6323	/// from c when the corresponding mask bit is not set).
6324	///
6325	/// Rounding is done according to the rounding parameter, which can be one of:
6326	///
6327	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6328	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6329	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6330	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6331	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6332	///
6333	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6334	#[inline]
6335	#[target_feature(enable = "avx512fp16")]
6336	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6337	#[rustc_legacy_const_generics(`4`)]
6338	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6339	pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6340	a: __m512h,
6341	b: __m512h,
6342	c: __m512h,
6343	k: __mmask32,
6344	) -> __m512h {
6345	unsafe {
6346	static_assert_rounding!(ROUNDING);
6347	simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6348	}
6349	}
6350
6351	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6352	/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6353	/// out when the corresponding mask bit is not set).
6354	///
6355	/// Rounding is done according to the rounding parameter, which can be one of:
6356	///
6357	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6358	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6359	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6360	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6361	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6362	///
6363	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6364	#[inline]
6365	#[target_feature(enable = "avx512fp16")]
6366	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6367	#[rustc_legacy_const_generics(`4`)]
6368	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6369	pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6370	k: __mmask32,
6371	a: __m512h,
6372	b: __m512h,
6373	c: __m512h,
6374	) -> __m512h {
6375	unsafe {
6376	static_assert_rounding!(ROUNDING);
6377	simd_select_bitmask(
6378	m:k,
6379	yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6380	no:_mm512_setzero_ph(),
6381	)
6382	}
6383	}
6384
6385	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6386	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6387	/// elements from a to the upper elements of dst.
6388	///
6389	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6390	#[inline]
6391	#[target_feature(enable = "avx512fp16")]
6392	#[cfg_attr(test, assert_instr(vfnmadd))]
6393	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6394	pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6395	unsafe {
6396	let extracta: f16 = simd_extract!(a, `0`);
6397	let extractb: f16 = simd_extract!(b, `0`);
6398	let extractc: f16 = simd_extract!(c, `0`);
6399	let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6400	simd_insert!(a, `0`, r)
6401	}
6402	}
6403
6404	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6405	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6406	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6407	/// elements of dst.
6408	///
6409	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6410	#[inline]
6411	#[target_feature(enable = "avx512fp16")]
6412	#[cfg_attr(test, assert_instr(vfnmadd))]
6413	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6414	pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6415	unsafe {
6416	let mut fnmadd: f16 = simd_extract!(a, `0`);
6417	if k & `1` != `0` {
6418	let extractb: f16 = simd_extract!(b, `0`);
6419	let extractc: f16 = simd_extract!(c, `0`);
6420	fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6421	}
6422	simd_insert!(a, `0`, fnmadd)
6423	}
6424	}
6425
6426	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6427	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6428	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6429	/// elements of dst.
6430	///
6431	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6432	#[inline]
6433	#[target_feature(enable = "avx512fp16")]
6434	#[cfg_attr(test, assert_instr(vfnmadd))]
6435	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6436	pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6437	unsafe {
6438	let mut fnmadd: f16 = simd_extract!(c, `0`);
6439	if k & `1` != `0` {
6440	let extracta: f16 = simd_extract!(a, `0`);
6441	let extractb: f16 = simd_extract!(b, `0`);
6442	fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6443	}
6444	simd_insert!(c, `0`, fnmadd)
6445	}
6446	}
6447
6448	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6449	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6450	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6451	/// elements of dst.
6452	///
6453	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6454	#[inline]
6455	#[target_feature(enable = "avx512fp16")]
6456	#[cfg_attr(test, assert_instr(vfnmadd))]
6457	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6458	pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6459	unsafe {
6460	let mut fnmadd: f16 = `0.0`;
6461	if k & `1` != `0` {
6462	let extracta: f16 = simd_extract!(a, `0`);
6463	let extractb: f16 = simd_extract!(b, `0`);
6464	let extractc: f16 = simd_extract!(c, `0`);
6465	fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6466	}
6467	simd_insert!(a, `0`, fnmadd)
6468	}
6469	}
6470
6471	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6472	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6473	/// elements from a to the upper elements of dst.
6474	///
6475	/// Rounding is done according to the rounding parameter, which can be one of:
6476	///
6477	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6478	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6479	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6480	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6481	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6482	///
6483	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6484	#[inline]
6485	#[target_feature(enable = "avx512fp16")]
6486	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6487	#[rustc_legacy_const_generics(`3`)]
6488	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6489	pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6490	unsafe {
6491	static_assert_rounding!(ROUNDING);
6492	let extracta: f16 = simd_extract!(a, `0`);
6493	let extractb: f16 = simd_extract!(b, `0`);
6494	let extractc: f16 = simd_extract!(c, `0`);
6495	let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6496	simd_insert!(a, `0`, r)
6497	}
6498	}
6499
6500	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6501	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6502	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6503	/// elements of dst.
6504	///
6505	/// Rounding is done according to the rounding parameter, which can be one of:
6506	///
6507	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6508	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6509	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6510	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6511	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6512	///
6513	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6514	#[inline]
6515	#[target_feature(enable = "avx512fp16")]
6516	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6517	#[rustc_legacy_const_generics(`4`)]
6518	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6519	pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6520	a: __m128h,
6521	k: __mmask8,
6522	b: __m128h,
6523	c: __m128h,
6524	) -> __m128h {
6525	unsafe {
6526	static_assert_rounding!(ROUNDING);
6527	let mut fnmadd: f16 = simd_extract!(a, `0`);
6528	if k & `1` != `0` {
6529	let extractb: f16 = simd_extract!(b, `0`);
6530	let extractc: f16 = simd_extract!(c, `0`);
6531	fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6532	}
6533	simd_insert!(a, `0`, fnmadd)
6534	}
6535	}
6536
6537	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6538	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6539	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6540	/// elements of dst.
6541	///
6542	/// Rounding is done according to the rounding parameter, which can be one of:
6543	///
6544	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6545	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6546	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6547	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6548	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6549	///
6550	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6551	#[inline]
6552	#[target_feature(enable = "avx512fp16")]
6553	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6554	#[rustc_legacy_const_generics(`4`)]
6555	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6556	pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6557	a: __m128h,
6558	b: __m128h,
6559	c: __m128h,
6560	k: __mmask8,
6561	) -> __m128h {
6562	unsafe {
6563	static_assert_rounding!(ROUNDING);
6564	let mut fnmadd: f16 = simd_extract!(c, `0`);
6565	if k & `1` != `0` {
6566	let extracta: f16 = simd_extract!(a, `0`);
6567	let extractb: f16 = simd_extract!(b, `0`);
6568	fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6569	}
6570	simd_insert!(c, `0`, fnmadd)
6571	}
6572	}
6573
6574	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6575	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6576	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6577	/// elements of dst.
6578	///
6579	/// Rounding is done according to the rounding parameter, which can be one of:
6580	///
6581	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6582	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6583	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6584	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6585	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6586	///
6587	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6588	#[inline]
6589	#[target_feature(enable = "avx512fp16")]
6590	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
6591	#[rustc_legacy_const_generics(`4`)]
6592	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6593	pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6594	k: __mmask8,
6595	a: __m128h,
6596	b: __m128h,
6597	c: __m128h,
6598	) -> __m128h {
6599	unsafe {
6600	static_assert_rounding!(ROUNDING);
6601	let mut fnmadd: f16 = `0.0`;
6602	if k & `1` != `0` {
6603	let extracta: f16 = simd_extract!(a, `0`);
6604	let extractb: f16 = simd_extract!(b, `0`);
6605	let extractc: f16 = simd_extract!(c, `0`);
6606	fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6607	}
6608	simd_insert!(a, `0`, fnmadd)
6609	}
6610	}
6611
6612	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6613	/// in c from the negated intermediate result, and store the results in dst.
6614	///
6615	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6616	#[inline]
6617	#[target_feature(enable = "avx512fp16,avx512vl")]
6618	#[cfg_attr(test, assert_instr(vfnmsub))]
6619	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6620	pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6621	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6622	}
6623
6624	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6625	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6626	/// copied from a when the corresponding mask bit is not set).
6627	///
6628	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6629	#[inline]
6630	#[target_feature(enable = "avx512fp16,avx512vl")]
6631	#[cfg_attr(test, assert_instr(vfnmsub))]
6632	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6633	pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6634	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6635	}
6636
6637	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6638	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6639	/// copied from c when the corresponding mask bit is not set).
6640	///
6641	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6642	#[inline]
6643	#[target_feature(enable = "avx512fp16,avx512vl")]
6644	#[cfg_attr(test, assert_instr(vfnmsub))]
6645	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6646	pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6647	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6648	}
6649
6650	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6651	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6652	/// zeroed out when the corresponding mask bit is not set).
6653	///
6654	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6655	#[inline]
6656	#[target_feature(enable = "avx512fp16,avx512vl")]
6657	#[cfg_attr(test, assert_instr(vfnmsub))]
6658	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6659	pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6660	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6661	}
6662
6663	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6664	/// in c from the negated intermediate result, and store the results in dst.
6665	///
6666	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6667	#[inline]
6668	#[target_feature(enable = "avx512fp16,avx512vl")]
6669	#[cfg_attr(test, assert_instr(vfnmsub))]
6670	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6671	pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6672	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6673	}
6674
6675	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6676	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6677	/// copied from a when the corresponding mask bit is not set).
6678	///
6679	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6680	#[inline]
6681	#[target_feature(enable = "avx512fp16,avx512vl")]
6682	#[cfg_attr(test, assert_instr(vfnmsub))]
6683	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6684	pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6685	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6686	}
6687
6688	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6689	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6690	/// copied from c when the corresponding mask bit is not set).
6691	///
6692	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6693	#[inline]
6694	#[target_feature(enable = "avx512fp16,avx512vl")]
6695	#[cfg_attr(test, assert_instr(vfnmsub))]
6696	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6697	pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6698	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6699	}
6700
6701	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6702	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6703	/// zeroed out when the corresponding mask bit is not set).
6704	///
6705	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6706	#[inline]
6707	#[target_feature(enable = "avx512fp16,avx512vl")]
6708	#[cfg_attr(test, assert_instr(vfnmsub))]
6709	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6710	pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6711	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6712	}
6713
6714	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6715	/// in c from the negated intermediate result, and store the results in dst.
6716	///
6717	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6718	#[inline]
6719	#[target_feature(enable = "avx512fp16")]
6720	#[cfg_attr(test, assert_instr(vfnmsub))]
6721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6722	pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6723	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6724	}
6725
6726	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6727	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6728	/// copied from a when the corresponding mask bit is not set).
6729	///
6730	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6731	#[inline]
6732	#[target_feature(enable = "avx512fp16")]
6733	#[cfg_attr(test, assert_instr(vfnmsub))]
6734	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6735	pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6736	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6737	}
6738
6739	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6740	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6741	/// copied from c when the corresponding mask bit is not set).
6742	///
6743	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6744	#[inline]
6745	#[target_feature(enable = "avx512fp16")]
6746	#[cfg_attr(test, assert_instr(vfnmsub))]
6747	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6748	pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6749	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6750	}
6751
6752	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6753	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6754	/// zeroed out when the corresponding mask bit is not set).
6755	///
6756	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
6757	#[inline]
6758	#[target_feature(enable = "avx512fp16")]
6759	#[cfg_attr(test, assert_instr(vfnmsub))]
6760	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6761	pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6762	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
6763	}
6764
6765	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6766	/// in c from the negated intermediate result, and store the results in dst.
6767	///
6768	/// Rounding is done according to the rounding parameter, which can be one of:
6769	///
6770	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6771	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6772	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6773	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6774	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6775	///
6776	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
6777	#[inline]
6778	#[target_feature(enable = "avx512fp16")]
6779	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6780	#[rustc_legacy_const_generics(`3`)]
6781	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6782	pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6783	unsafe {
6784	static_assert_rounding!(ROUNDING);
6785	vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
6786	}
6787	}
6788
6789	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6790	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6791	/// copied from a when the corresponding mask bit is not set).
6792	///
6793	/// Rounding is done according to the rounding parameter, which can be one of:
6794	///
6795	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6796	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6797	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6798	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6799	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6800	///
6801	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
6802	#[inline]
6803	#[target_feature(enable = "avx512fp16")]
6804	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6805	#[rustc_legacy_const_generics(`4`)]
6806	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6807	pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
6808	a: __m512h,
6809	k: __mmask32,
6810	b: __m512h,
6811	c: __m512h,
6812	) -> __m512h {
6813	unsafe {
6814	static_assert_rounding!(ROUNDING);
6815	simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6816	}
6817	}
6818
6819	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6820	/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6821	/// copied from c when the corresponding mask bit is not set).
6822	///
6823	/// Rounding is done according to the rounding parameter, which can be one of:
6824	///
6825	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6826	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6827	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6828	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6829	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6830	///
6831	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
6832	#[inline]
6833	#[target_feature(enable = "avx512fp16")]
6834	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6835	#[rustc_legacy_const_generics(`4`)]
6836	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6837	pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
6838	a: __m512h,
6839	b: __m512h,
6840	c: __m512h,
6841	k: __mmask32,
6842	) -> __m512h {
6843	unsafe {
6844	static_assert_rounding!(ROUNDING);
6845	simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6846	}
6847	}
6848
6849	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6850	/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6851	/// zeroed out when the corresponding mask bit is not set).
6852	///
6853	/// Rounding is done according to the rounding parameter, which can be one of:
6854	///
6855	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6856	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6857	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6858	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6859	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6860	///
6861	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
6862	#[inline]
6863	#[target_feature(enable = "avx512fp16")]
6864	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6865	#[rustc_legacy_const_generics(`4`)]
6866	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6867	pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
6868	k: __mmask32,
6869	a: __m512h,
6870	b: __m512h,
6871	c: __m512h,
6872	) -> __m512h {
6873	unsafe {
6874	static_assert_rounding!(ROUNDING);
6875	simd_select_bitmask(
6876	m:k,
6877	yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
6878	no:_mm512_setzero_ph(),
6879	)
6880	}
6881	}
6882
6883	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6884	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6885	/// elements from a to the upper elements of dst.
6886	///
6887	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
6888	#[inline]
6889	#[target_feature(enable = "avx512fp16")]
6890	#[cfg_attr(test, assert_instr(vfnmsub))]
6891	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6892	pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6893	unsafe {
6894	let extracta: f16 = simd_extract!(a, `0`);
6895	let extractb: f16 = simd_extract!(b, `0`);
6896	let extractc: f16 = simd_extract!(c, `0`);
6897	let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
6898	simd_insert!(a, `0`, r)
6899	}
6900	}
6901
6902	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6903	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6904	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6905	/// elements of dst.
6906	///
6907	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
6908	#[inline]
6909	#[target_feature(enable = "avx512fp16")]
6910	#[cfg_attr(test, assert_instr(vfnmsub))]
6911	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6912	pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6913	unsafe {
6914	let mut fnmsub: f16 = simd_extract!(a, `0`);
6915	if k & `1` != `0` {
6916	let extractb: f16 = simd_extract!(b, `0`);
6917	let extractc: f16 = simd_extract!(c, `0`);
6918	fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
6919	}
6920	simd_insert!(a, `0`, fnmsub)
6921	}
6922	}
6923
6924	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6925	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6926	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6927	/// elements of dst.
6928	///
6929	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
6930	#[inline]
6931	#[target_feature(enable = "avx512fp16")]
6932	#[cfg_attr(test, assert_instr(vfnmsub))]
6933	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6934	pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6935	unsafe {
6936	let mut fnmsub: f16 = simd_extract!(c, `0`);
6937	if k & `1` != `0` {
6938	let extracta: f16 = simd_extract!(a, `0`);
6939	let extractb: f16 = simd_extract!(b, `0`);
6940	fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
6941	}
6942	simd_insert!(c, `0`, fnmsub)
6943	}
6944	}
6945
6946	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6947	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6948	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6949	/// elements of dst.
6950	///
6951	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
6952	#[inline]
6953	#[target_feature(enable = "avx512fp16")]
6954	#[cfg_attr(test, assert_instr(vfnmsub))]
6955	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6956	pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6957	unsafe {
6958	let mut fnmsub: f16 = `0.0`;
6959	if k & `1` != `0` {
6960	let extracta: f16 = simd_extract!(a, `0`);
6961	let extractb: f16 = simd_extract!(b, `0`);
6962	let extractc: f16 = simd_extract!(c, `0`);
6963	fnmsub = fmaf16(-extracta, b:extractb, -extractc);
6964	}
6965	simd_insert!(a, `0`, fnmsub)
6966	}
6967	}
6968
6969	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6970	/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6971	/// elements from a to the upper elements of dst.
6972	///
6973	/// Rounding is done according to the rounding parameter, which can be one of:
6974	///
6975	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6976	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6977	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6978	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6979	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6980	///
6981	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
6982	#[inline]
6983	#[target_feature(enable = "avx512fp16")]
6984	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
6985	#[rustc_legacy_const_generics(`3`)]
6986	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6987	pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6988	unsafe {
6989	static_assert_rounding!(ROUNDING);
6990	let extracta: f16 = simd_extract!(a, `0`);
6991	let extractb: f16 = simd_extract!(b, `0`);
6992	let extractc: f16 = simd_extract!(c, `0`);
6993	let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
6994	simd_insert!(a, `0`, r)
6995	}
6996	}
6997
6998	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6999	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7000	/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7001	/// elements of dst.
7002	///
7003	/// Rounding is done according to the rounding parameter, which can be one of:
7004	///
7005	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7006	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7007	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7008	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7009	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7010	///
7011	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
7012	#[inline]
7013	#[target_feature(enable = "avx512fp16")]
7014	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7015	#[rustc_legacy_const_generics(`4`)]
7016	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7017	pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7018	a: __m128h,
7019	k: __mmask8,
7020	b: __m128h,
7021	c: __m128h,
7022	) -> __m128h {
7023	unsafe {
7024	static_assert_rounding!(ROUNDING);
7025	let mut fnmsub: f16 = simd_extract!(a, `0`);
7026	if k & `1` != `0` {
7027	let extractb: f16 = simd_extract!(b, `0`);
7028	let extractc: f16 = simd_extract!(c, `0`);
7029	fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7030	}
7031	simd_insert!(a, `0`, fnmsub)
7032	}
7033	}
7034
7035	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7036	/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7037	/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7038	/// elements of dst.
7039	///
7040	/// Rounding is done according to the rounding parameter, which can be one of:
7041	///
7042	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7043	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7044	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7045	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7046	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7047	///
7048	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7049	#[inline]
7050	#[target_feature(enable = "avx512fp16")]
7051	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7052	#[rustc_legacy_const_generics(`4`)]
7053	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7054	pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7055	a: __m128h,
7056	b: __m128h,
7057	c: __m128h,
7058	k: __mmask8,
7059	) -> __m128h {
7060	unsafe {
7061	static_assert_rounding!(ROUNDING);
7062	let mut fnmsub: f16 = simd_extract!(c, `0`);
7063	if k & `1` != `0` {
7064	let extracta: f16 = simd_extract!(a, `0`);
7065	let extractb: f16 = simd_extract!(b, `0`);
7066	fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7067	}
7068	simd_insert!(c, `0`, fnmsub)
7069	}
7070	}
7071
7072	/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7073	/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7074	/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7075	/// elements of dst.
7076	///
7077	/// Rounding is done according to the rounding parameter, which can be one of:
7078	///
7079	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7080	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7081	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7082	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7083	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7084	///
7085	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7086	#[inline]
7087	#[target_feature(enable = "avx512fp16")]
7088	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
7089	#[rustc_legacy_const_generics(`4`)]
7090	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7091	pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7092	k: __mmask8,
7093	a: __m128h,
7094	b: __m128h,
7095	c: __m128h,
7096	) -> __m128h {
7097	unsafe {
7098	static_assert_rounding!(ROUNDING);
7099	let mut fnmsub: f16 = `0.0`;
7100	if k & `1` != `0` {
7101	let extracta: f16 = simd_extract!(a, `0`);
7102	let extractb: f16 = simd_extract!(b, `0`);
7103	let extractc: f16 = simd_extract!(c, `0`);
7104	fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7105	}
7106	simd_insert!(a, `0`, fnmsub)
7107	}
7108	}
7109
7110	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7111	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7112	///
7113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7114	#[inline]
7115	#[target_feature(enable = "avx512fp16,avx512vl")]
7116	#[cfg_attr(test, assert_instr(vfmaddsub))]
7117	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7118	pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7119	unsafe { vfmaddsubph_128(a, b, c) }
7120	}
7121
7122	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7123	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7124	/// (the element is copied from a when the corresponding mask bit is not set).
7125	///
7126	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7127	#[inline]
7128	#[target_feature(enable = "avx512fp16,avx512vl")]
7129	#[cfg_attr(test, assert_instr(vfmaddsub))]
7130	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7131	pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7132	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7133	}
7134
7135	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7136	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7137	/// (the element is copied from c when the corresponding mask bit is not set).
7138	///
7139	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7140	#[inline]
7141	#[target_feature(enable = "avx512fp16,avx512vl")]
7142	#[cfg_attr(test, assert_instr(vfmaddsub))]
7143	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7144	pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7145	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7146	}
7147
7148	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7149	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7150	/// (the element is zeroed out when the corresponding mask bit is not set).
7151	///
7152	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7153	#[inline]
7154	#[target_feature(enable = "avx512fp16,avx512vl")]
7155	#[cfg_attr(test, assert_instr(vfmaddsub))]
7156	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7157	pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7158	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7159	}
7160
7161	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7162	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7163	///
7164	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7165	#[inline]
7166	#[target_feature(enable = "avx512fp16,avx512vl")]
7167	#[cfg_attr(test, assert_instr(vfmaddsub))]
7168	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7169	pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7170	unsafe { vfmaddsubph_256(a, b, c) }
7171	}
7172
7173	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7174	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7175	/// (the element is copied from a when the corresponding mask bit is not set).
7176	///
7177	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7178	#[inline]
7179	#[target_feature(enable = "avx512fp16,avx512vl")]
7180	#[cfg_attr(test, assert_instr(vfmaddsub))]
7181	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7182	pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7183	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7184	}
7185
7186	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7187	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7188	/// (the element is copied from c when the corresponding mask bit is not set).
7189	///
7190	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7191	#[inline]
7192	#[target_feature(enable = "avx512fp16,avx512vl")]
7193	#[cfg_attr(test, assert_instr(vfmaddsub))]
7194	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7195	pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7196	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7197	}
7198
7199	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7200	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7201	/// (the element is zeroed out when the corresponding mask bit is not set).
7202	///
7203	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7204	#[inline]
7205	#[target_feature(enable = "avx512fp16,avx512vl")]
7206	#[cfg_attr(test, assert_instr(vfmaddsub))]
7207	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7208	pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7209	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7210	}
7211
7212	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7213	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7214	///
7215	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7216	#[inline]
7217	#[target_feature(enable = "avx512fp16")]
7218	#[cfg_attr(test, assert_instr(vfmaddsub))]
7219	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7220	pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7221	_mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7222	}
7223
7224	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7225	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7226	/// (the element is copied from a when the corresponding mask bit is not set).
7227	///
7228	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7229	#[inline]
7230	#[target_feature(enable = "avx512fp16")]
7231	#[cfg_attr(test, assert_instr(vfmaddsub))]
7232	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7233	pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7234	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7235	}
7236
7237	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7238	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7239	/// (the element is copied from c when the corresponding mask bit is not set).
7240	///
7241	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7242	#[inline]
7243	#[target_feature(enable = "avx512fp16")]
7244	#[cfg_attr(test, assert_instr(vfmaddsub))]
7245	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7246	pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7247	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7248	}
7249
7250	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7251	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7252	/// (the element is zeroed out when the corresponding mask bit is not set).
7253	///
7254	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7255	#[inline]
7256	#[target_feature(enable = "avx512fp16")]
7257	#[cfg_attr(test, assert_instr(vfmaddsub))]
7258	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7259	pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7260	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7261	}
7262
7263	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7264	/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7265	///
7266	/// Rounding is done according to the rounding parameter, which can be one of:
7267	///
7268	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7269	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7270	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7271	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7272	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7273	///
7274	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7275	#[inline]
7276	#[target_feature(enable = "avx512fp16")]
7277	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7278	#[rustc_legacy_const_generics(`3`)]
7279	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7280	pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7281	a: __m512h,
7282	b: __m512h,
7283	c: __m512h,
7284	) -> __m512h {
7285	unsafe {
7286	static_assert_rounding!(ROUNDING);
7287	vfmaddsubph_512(a, b, c, ROUNDING)
7288	}
7289	}
7290
7291	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7292	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7293	/// (the element is copied from a when the corresponding mask bit is not set).
7294	///
7295	/// Rounding is done according to the rounding parameter, which can be one of:
7296	///
7297	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7298	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7299	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7300	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7301	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7302	///
7303	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7304	#[inline]
7305	#[target_feature(enable = "avx512fp16")]
7306	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7307	#[rustc_legacy_const_generics(`4`)]
7308	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7309	pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7310	a: __m512h,
7311	k: __mmask32,
7312	b: __m512h,
7313	c: __m512h,
7314	) -> __m512h {
7315	unsafe {
7316	static_assert_rounding!(ROUNDING);
7317	simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7318	}
7319	}
7320
7321	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7322	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7323	/// (the element is copied from c when the corresponding mask bit is not set).
7324	///
7325	/// Rounding is done according to the rounding parameter, which can be one of:
7326	///
7327	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7328	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7329	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7330	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7331	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7332	///
7333	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7334	#[inline]
7335	#[target_feature(enable = "avx512fp16")]
7336	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7337	#[rustc_legacy_const_generics(`4`)]
7338	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7339	pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7340	a: __m512h,
7341	b: __m512h,
7342	c: __m512h,
7343	k: __mmask32,
7344	) -> __m512h {
7345	unsafe {
7346	static_assert_rounding!(ROUNDING);
7347	simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7348	}
7349	}
7350
7351	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7352	/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7353	/// (the element is zeroed out when the corresponding mask bit is not set).
7354	///
7355	/// Rounding is done according to the rounding parameter, which can be one of:
7356	///
7357	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7358	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7359	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7360	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7361	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7362	///
7363	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7364	#[inline]
7365	#[target_feature(enable = "avx512fp16")]
7366	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))]
7367	#[rustc_legacy_const_generics(`4`)]
7368	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7369	pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7370	k: __mmask32,
7371	a: __m512h,
7372	b: __m512h,
7373	c: __m512h,
7374	) -> __m512h {
7375	unsafe {
7376	static_assert_rounding!(ROUNDING);
7377	simd_select_bitmask(
7378	m:k,
7379	yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7380	no:_mm512_setzero_ph(),
7381	)
7382	}
7383	}
7384
7385	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7386	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7387	///
7388	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7389	#[inline]
7390	#[target_feature(enable = "avx512fp16,avx512vl")]
7391	#[cfg_attr(test, assert_instr(vfmsubadd))]
7392	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7393	pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7394	unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) }
7395	}
7396
7397	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7398	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7399	/// (the element is copied from a when the corresponding mask bit is not set).
7400	///
7401	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7402	#[inline]
7403	#[target_feature(enable = "avx512fp16,avx512vl")]
7404	#[cfg_attr(test, assert_instr(vfmsubadd))]
7405	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7406	pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7407	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7408	}
7409
7410	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7411	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7412	/// (the element is copied from c when the corresponding mask bit is not set).
7413	///
7414	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7415	#[inline]
7416	#[target_feature(enable = "avx512fp16,avx512vl")]
7417	#[cfg_attr(test, assert_instr(vfmsubadd))]
7418	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7419	pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7420	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7421	}
7422
7423	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7424	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7425	/// (the element is zeroed out when the corresponding mask bit is not set).
7426	///
7427	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7428	#[inline]
7429	#[target_feature(enable = "avx512fp16,avx512vl")]
7430	#[cfg_attr(test, assert_instr(vfmsubadd))]
7431	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7432	pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7433	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7434	}
7435
7436	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7437	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7438	///
7439	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7440	#[inline]
7441	#[target_feature(enable = "avx512fp16,avx512vl")]
7442	#[cfg_attr(test, assert_instr(vfmsubadd))]
7443	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7444	pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7445	unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) }
7446	}
7447
7448	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7449	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7450	/// (the element is copied from a when the corresponding mask bit is not set).
7451	///
7452	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7453	#[inline]
7454	#[target_feature(enable = "avx512fp16,avx512vl")]
7455	#[cfg_attr(test, assert_instr(vfmsubadd))]
7456	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7457	pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7458	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7459	}
7460
7461	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7462	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7463	/// (the element is copied from c when the corresponding mask bit is not set).
7464	///
7465	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7466	#[inline]
7467	#[target_feature(enable = "avx512fp16,avx512vl")]
7468	#[cfg_attr(test, assert_instr(vfmsubadd))]
7469	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7470	pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7471	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7472	}
7473
7474	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7475	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7476	/// (the element is zeroed out when the corresponding mask bit is not set).
7477	///
7478	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7479	#[inline]
7480	#[target_feature(enable = "avx512fp16,avx512vl")]
7481	#[cfg_attr(test, assert_instr(vfmsubadd))]
7482	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7483	pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7484	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7485	}
7486
7487	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7488	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7489	///
7490	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7491	#[inline]
7492	#[target_feature(enable = "avx512fp16")]
7493	#[cfg_attr(test, assert_instr(vfmsubadd))]
7494	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7495	pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7496	_mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7497	}
7498
7499	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7500	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7501	/// (the element is copied from a when the corresponding mask bit is not set).
7502	///
7503	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7504	#[inline]
7505	#[target_feature(enable = "avx512fp16")]
7506	#[cfg_attr(test, assert_instr(vfmsubadd))]
7507	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7508	pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7509	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7510	}
7511
7512	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7513	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7514	/// (the element is copied from c when the corresponding mask bit is not set).
7515	///
7516	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7517	#[inline]
7518	#[target_feature(enable = "avx512fp16")]
7519	#[cfg_attr(test, assert_instr(vfmsubadd))]
7520	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7521	pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7522	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7523	}
7524
7525	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7526	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7527	/// (the element is zeroed out when the corresponding mask bit is not set).
7528	///
7529	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7530	#[inline]
7531	#[target_feature(enable = "avx512fp16")]
7532	#[cfg_attr(test, assert_instr(vfmsubadd))]
7533	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7534	pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7535	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7536	}
7537
7538	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7539	/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7540	///
7541	/// Rounding is done according to the rounding parameter, which can be one of:
7542	///
7543	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7544	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7545	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7546	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7547	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7548	///
7549	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7550	#[inline]
7551	#[target_feature(enable = "avx512fp16")]
7552	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7553	#[rustc_legacy_const_generics(`3`)]
7554	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7555	pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7556	a: __m512h,
7557	b: __m512h,
7558	c: __m512h,
7559	) -> __m512h {
7560	unsafe {
7561	static_assert_rounding!(ROUNDING);
7562	vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7563	}
7564	}
7565
7566	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7567	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7568	/// (the element is copied from a when the corresponding mask bit is not set).
7569	///
7570	/// Rounding is done according to the rounding parameter, which can be one of:
7571	///
7572	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7573	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7574	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7575	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7576	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7577	///
7578	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7579	#[inline]
7580	#[target_feature(enable = "avx512fp16")]
7581	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7582	#[rustc_legacy_const_generics(`4`)]
7583	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7584	pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7585	a: __m512h,
7586	k: __mmask32,
7587	b: __m512h,
7588	c: __m512h,
7589	) -> __m512h {
7590	unsafe {
7591	static_assert_rounding!(ROUNDING);
7592	simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7593	}
7594	}
7595
7596	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7597	/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7598	/// (the element is copied from c when the corresponding mask bit is not set).
7599	///
7600	/// Rounding is done according to the rounding parameter, which can be one of:
7601	///
7602	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7603	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7604	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7605	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7606	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7607	///
7608	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7609	#[inline]
7610	#[target_feature(enable = "avx512fp16")]
7611	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7612	#[rustc_legacy_const_generics(`4`)]
7613	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7614	pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7615	a: __m512h,
7616	b: __m512h,
7617	c: __m512h,
7618	k: __mmask32,
7619	) -> __m512h {
7620	unsafe {
7621	static_assert_rounding!(ROUNDING);
7622	simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7623	}
7624	}
7625
7626	/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7627	/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7628	/// (the element is zeroed out when the corresponding mask bit is not set).
7629	///
7630	/// Rounding is done according to the rounding parameter, which can be one of:
7631	///
7632	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7633	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7634	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7635	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7636	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7637	///
7638	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7639	#[inline]
7640	#[target_feature(enable = "avx512fp16")]
7641	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))]
7642	#[rustc_legacy_const_generics(`4`)]
7643	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7644	pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7645	k: __mmask32,
7646	a: __m512h,
7647	b: __m512h,
7648	c: __m512h,
7649	) -> __m512h {
7650	unsafe {
7651	static_assert_rounding!(ROUNDING);
7652	simd_select_bitmask(
7653	m:k,
7654	yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7655	no:_mm512_setzero_ph(),
7656	)
7657	}
7658	}
7659
7660	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7661	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7662	///
7663	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7664	#[inline]
7665	#[target_feature(enable = "avx512fp16,avx512vl")]
7666	#[cfg_attr(test, assert_instr(vrcpph))]
7667	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7668	pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7669	_mm_mask_rcp_ph(src:_mm_undefined_ph(), k:`0xff`, a)
7670	}
7671
7672	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7673	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7674	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7675	///
7676	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7677	#[inline]
7678	#[target_feature(enable = "avx512fp16,avx512vl")]
7679	#[cfg_attr(test, assert_instr(vrcpph))]
7680	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7681	pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7682	unsafe { vrcpph_128(a, src, k) }
7683	}
7684
7685	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7686	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7687	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7688	///
7689	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7690	#[inline]
7691	#[target_feature(enable = "avx512fp16,avx512vl")]
7692	#[cfg_attr(test, assert_instr(vrcpph))]
7693	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7694	pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7695	_mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7696	}
7697
7698	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7699	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7700	///
7701	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
7702	#[inline]
7703	#[target_feature(enable = "avx512fp16,avx512vl")]
7704	#[cfg_attr(test, assert_instr(vrcpph))]
7705	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7706	pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
7707	_mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
7708	}
7709
7710	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7711	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7712	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7713	///
7714	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
7715	#[inline]
7716	#[target_feature(enable = "avx512fp16,avx512vl")]
7717	#[cfg_attr(test, assert_instr(vrcpph))]
7718	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7719	pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7720	unsafe { vrcpph_256(a, src, k) }
7721	}
7722
7723	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7724	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7725	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7726	///
7727	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
7728	#[inline]
7729	#[target_feature(enable = "avx512fp16,avx512vl")]
7730	#[cfg_attr(test, assert_instr(vrcpph))]
7731	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7732	pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
7733	_mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
7734	}
7735
7736	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7737	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7738	///
7739	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
7740	#[inline]
7741	#[target_feature(enable = "avx512fp16")]
7742	#[cfg_attr(test, assert_instr(vrcpph))]
7743	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7744	pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
7745	_mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
7746	}
7747
7748	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7749	/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7750	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7751	///
7752	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
7753	#[inline]
7754	#[target_feature(enable = "avx512fp16")]
7755	#[cfg_attr(test, assert_instr(vrcpph))]
7756	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7757	pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7758	unsafe { vrcpph_512(a, src, k) }
7759	}
7760
7761	/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7762	/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7763	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7764	///
7765	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
7766	#[inline]
7767	#[target_feature(enable = "avx512fp16")]
7768	#[cfg_attr(test, assert_instr(vrcpph))]
7769	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7770	pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
7771	_mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
7772	}
7773
7774	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7775	/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
7776	/// upper elements of dst.
7777	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7778	///
7779	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
7780	#[inline]
7781	#[target_feature(enable = "avx512fp16")]
7782	#[cfg_attr(test, assert_instr(vrcpsh))]
7783	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7784	pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
7785	_mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
7786	}
7787
7788	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7789	/// store the result in the lower element of dst using writemask k (the element is copied from src when
7790	/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7791	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7792	///
7793	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
7794	#[inline]
7795	#[target_feature(enable = "avx512fp16")]
7796	#[cfg_attr(test, assert_instr(vrcpsh))]
7797	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7798	pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7799	unsafe { vrcpsh(a, b, src, k) }
7800	}
7801
7802	/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7803	/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
7804	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7805	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7806	///
7807	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
7808	#[inline]
7809	#[target_feature(enable = "avx512fp16")]
7810	#[cfg_attr(test, assert_instr(vrcpsh))]
7811	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7812	pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7813	_mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
7814	}
7815
7816	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7817	/// elements in a, and store the results in dst.
7818	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7819	///
7820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
7821	#[inline]
7822	#[target_feature(enable = "avx512fp16,avx512vl")]
7823	#[cfg_attr(test, assert_instr(vrsqrtph))]
7824	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7825	pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
7826	_mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:`0xff`, a)
7827	}
7828
7829	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7830	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7831	/// the corresponding mask bit is not set).
7832	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7833	///
7834	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
7835	#[inline]
7836	#[target_feature(enable = "avx512fp16,avx512vl")]
7837	#[cfg_attr(test, assert_instr(vrsqrtph))]
7838	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7839	pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7840	unsafe { vrsqrtph_128(a, src, k) }
7841	}
7842
7843	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7844	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7845	/// corresponding mask bit is not set).
7846	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7847	///
7848	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
7849	#[inline]
7850	#[target_feature(enable = "avx512fp16,avx512vl")]
7851	#[cfg_attr(test, assert_instr(vrsqrtph))]
7852	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7853	pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
7854	_mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
7855	}
7856
7857	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7858	/// elements in a, and store the results in dst.
7859	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7860	///
7861	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
7862	#[inline]
7863	#[target_feature(enable = "avx512fp16,avx512vl")]
7864	#[cfg_attr(test, assert_instr(vrsqrtph))]
7865	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7866	pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
7867	_mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
7868	}
7869
7870	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7871	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7872	/// the corresponding mask bit is not set).
7873	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7874	///
7875	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
7876	#[inline]
7877	#[target_feature(enable = "avx512fp16,avx512vl")]
7878	#[cfg_attr(test, assert_instr(vrsqrtph))]
7879	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7880	pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7881	unsafe { vrsqrtph_256(a, src, k) }
7882	}
7883
7884	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7885	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7886	/// corresponding mask bit is not set).
7887	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7888	///
7889	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
7890	#[inline]
7891	#[target_feature(enable = "avx512fp16,avx512vl")]
7892	#[cfg_attr(test, assert_instr(vrsqrtph))]
7893	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7894	pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
7895	_mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
7896	}
7897
7898	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7899	/// elements in a, and store the results in dst.
7900	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7901	///
7902	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
7903	#[inline]
7904	#[target_feature(enable = "avx512fp16")]
7905	#[cfg_attr(test, assert_instr(vrsqrtph))]
7906	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7907	pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
7908	_mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
7909	}
7910
7911	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7912	/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7913	/// the corresponding mask bit is not set).
7914	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7915	///
7916	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
7917	#[inline]
7918	#[target_feature(enable = "avx512fp16")]
7919	#[cfg_attr(test, assert_instr(vrsqrtph))]
7920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7921	pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7922	unsafe { vrsqrtph_512(a, src, k) }
7923	}
7924
7925	/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7926	/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7927	/// corresponding mask bit is not set).
7928	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7929	///
7930	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
7931	#[inline]
7932	#[target_feature(enable = "avx512fp16")]
7933	#[cfg_attr(test, assert_instr(vrsqrtph))]
7934	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7935	pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
7936	_mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
7937	}
7938
7939	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7940	/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
7941	/// to the upper elements of dst.
7942	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7943	///
7944	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
7945	#[inline]
7946	#[target_feature(enable = "avx512fp16")]
7947	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7949	pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
7950	_mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
7951	}
7952
7953	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7954	/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
7955	/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7956	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7957	///
7958	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
7959	#[inline]
7960	#[target_feature(enable = "avx512fp16")]
7961	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7962	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7963	pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7964	unsafe { vrsqrtsh(a, b, src, k) }
7965	}
7966
7967	/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7968	/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
7969	/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7970	/// The maximum relative error for this approximation is less than `1.52^-12`.*
7971	///
7972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
7973	#[inline]
7974	#[target_feature(enable = "avx512fp16")]
7975	#[cfg_attr(test, assert_instr(vrsqrtsh))]
7976	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7977	pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7978	_mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
7979	}
7980
7981	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7982	/// results in dst.
7983	///
7984	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
7985	#[inline]
7986	#[target_feature(enable = "avx512fp16,avx512vl")]
7987	#[cfg_attr(test, assert_instr(vsqrtph))]
7988	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7989	pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
7990	unsafe { simd_fsqrt(a) }
7991	}
7992
7993	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7994	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7995	///
7996	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
7997	#[inline]
7998	#[target_feature(enable = "avx512fp16,avx512vl")]
7999	#[cfg_attr(test, assert_instr(vsqrtph))]
8000	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8001	pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8002	unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
8003	}
8004
8005	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8006	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8007	///
8008	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
8009	#[inline]
8010	#[target_feature(enable = "avx512fp16,avx512vl")]
8011	#[cfg_attr(test, assert_instr(vsqrtph))]
8012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8013	pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8014	unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8015	}
8016
8017	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8018	/// results in dst.
8019	///
8020	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8021	#[inline]
8022	#[target_feature(enable = "avx512fp16,avx512vl")]
8023	#[cfg_attr(test, assert_instr(vsqrtph))]
8024	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8025	pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8026	unsafe { simd_fsqrt(a) }
8027	}
8028
8029	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8030	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8031	///
8032	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8033	#[inline]
8034	#[target_feature(enable = "avx512fp16,avx512vl")]
8035	#[cfg_attr(test, assert_instr(vsqrtph))]
8036	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8037	pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8038	unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8039	}
8040
8041	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8042	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8043	///
8044	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8045	#[inline]
8046	#[target_feature(enable = "avx512fp16,avx512vl")]
8047	#[cfg_attr(test, assert_instr(vsqrtph))]
8048	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8049	pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8050	unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8051	}
8052
8053	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8054	/// results in dst.
8055	///
8056	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8057	#[inline]
8058	#[target_feature(enable = "avx512fp16")]
8059	#[cfg_attr(test, assert_instr(vsqrtph))]
8060	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8061	pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8062	unsafe { simd_fsqrt(a) }
8063	}
8064
8065	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8066	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8067	///
8068	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8069	#[inline]
8070	#[target_feature(enable = "avx512fp16")]
8071	#[cfg_attr(test, assert_instr(vsqrtph))]
8072	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8073	pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8074	unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8075	}
8076
8077	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8078	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8079	///
8080	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8081	#[inline]
8082	#[target_feature(enable = "avx512fp16")]
8083	#[cfg_attr(test, assert_instr(vsqrtph))]
8084	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8085	pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8086	unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8087	}
8088
8089	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8090	/// results in dst.
8091	/// Rounding is done according to the rounding parameter, which can be one of:
8092	///
8093	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8094	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8095	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8096	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8097	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8098	///
8099	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8100	#[inline]
8101	#[target_feature(enable = "avx512fp16")]
8102	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8103	#[rustc_legacy_const_generics(`1`)]
8104	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8105	pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8106	unsafe {
8107	static_assert_rounding!(ROUNDING);
8108	vsqrtph_512(a, ROUNDING)
8109	}
8110	}
8111
8112	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8113	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8114	/// Rounding is done according to the rounding parameter, which can be one of:
8115	///
8116	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8117	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8118	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8119	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8120	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8121	///
8122	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8123	#[inline]
8124	#[target_feature(enable = "avx512fp16")]
8125	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8126	#[rustc_legacy_const_generics(`3`)]
8127	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8128	pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8129	src: __m512h,
8130	k: __mmask32,
8131	a: __m512h,
8132	) -> __m512h {
8133	unsafe {
8134	static_assert_rounding!(ROUNDING);
8135	simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8136	}
8137	}
8138
8139	/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8140	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8141	/// Rounding is done according to the rounding parameter, which can be one of:
8142	///
8143	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8144	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8145	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8146	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8147	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8148	///
8149	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8150	#[inline]
8151	#[target_feature(enable = "avx512fp16")]
8152	#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = `8`))]
8153	#[rustc_legacy_const_generics(`2`)]
8154	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8155	pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8156	unsafe {
8157	static_assert_rounding!(ROUNDING);
8158	simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8159	}
8160	}
8161
8162	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8163	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8164	/// elements of dst.
8165	///
8166	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8167	#[inline]
8168	#[target_feature(enable = "avx512fp16")]
8169	#[cfg_attr(test, assert_instr(vsqrtsh))]
8170	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8171	pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8172	_mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
8173	}
8174
8175	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8176	/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8177	/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8178	///
8179	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8180	#[inline]
8181	#[target_feature(enable = "avx512fp16")]
8182	#[cfg_attr(test, assert_instr(vsqrtsh))]
8183	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8184	pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8185	_mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8186	}
8187
8188	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8189	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8190	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8191	///
8192	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8193	#[inline]
8194	#[target_feature(enable = "avx512fp16")]
8195	#[cfg_attr(test, assert_instr(vsqrtsh))]
8196	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8197	pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8198	_mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8199	}
8200
8201	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8202	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8203	/// elements of dst.
8204	/// Rounding is done according to the rounding parameter, which can be one of:
8205	///
8206	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211	///
8212	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8213	#[inline]
8214	#[target_feature(enable = "avx512fp16")]
8215	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8216	#[rustc_legacy_const_generics(`2`)]
8217	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8218	pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8219	static_assert_rounding!(ROUNDING);
8220	_mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
8221	}
8222
8223	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8224	/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8225	/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8226	/// Rounding is done according to the rounding parameter, which can be one of:
8227	///
8228	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8229	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8230	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8231	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8232	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8233	///
8234	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8235	#[inline]
8236	#[target_feature(enable = "avx512fp16")]
8237	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8238	#[rustc_legacy_const_generics(`4`)]
8239	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8240	pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8241	src: __m128h,
8242	k: __mmask8,
8243	a: __m128h,
8244	b: __m128h,
8245	) -> __m128h {
8246	unsafe {
8247	static_assert_rounding!(ROUNDING);
8248	vsqrtsh(a, b, src, k, ROUNDING)
8249	}
8250	}
8251
8252	/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8253	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8254	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8255	/// Rounding is done according to the rounding parameter, which can be one of:
8256	///
8257	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8258	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8259	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8260	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8261	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8262	///
8263	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8264	#[inline]
8265	#[target_feature(enable = "avx512fp16")]
8266	#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = `8`))]
8267	#[rustc_legacy_const_generics(`3`)]
8268	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8269	pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8270	k: __mmask8,
8271	a: __m128h,
8272	b: __m128h,
8273	) -> __m128h {
8274	static_assert_rounding!(ROUNDING);
8275	_mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
8276	}
8277
8278	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8279	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8280	/// value when inputs are NaN or signed-zero values.
8281	///
8282	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8283	#[inline]
8284	#[target_feature(enable = "avx512fp16,avx512vl")]
8285	#[cfg_attr(test, assert_instr(vmaxph))]
8286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8287	pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8288	unsafe { vmaxph_128(a, b) }
8289	}
8290
8291	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8292	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8293	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8294	/// NaN or signed-zero values.
8295	///
8296	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8297	#[inline]
8298	#[target_feature(enable = "avx512fp16,avx512vl")]
8299	#[cfg_attr(test, assert_instr(vmaxph))]
8300	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8301	pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8302	unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8303	}
8304
8305	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8306	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8307	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8308	/// NaN or signed-zero values.
8309	///
8310	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8311	#[inline]
8312	#[target_feature(enable = "avx512fp16,avx512vl")]
8313	#[cfg_attr(test, assert_instr(vmaxph))]
8314	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8315	pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8316	unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8317	}
8318
8319	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8320	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8321	/// value when inputs are NaN or signed-zero values.
8322	///
8323	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8324	#[inline]
8325	#[target_feature(enable = "avx512fp16,avx512vl")]
8326	#[cfg_attr(test, assert_instr(vmaxph))]
8327	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8328	pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8329	unsafe { vmaxph_256(a, b) }
8330	}
8331
8332	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8333	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8334	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8335	/// NaN or signed-zero values.
8336	///
8337	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8338	#[inline]
8339	#[target_feature(enable = "avx512fp16,avx512vl")]
8340	#[cfg_attr(test, assert_instr(vmaxph))]
8341	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8342	pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8343	unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8344	}
8345
8346	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8347	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8348	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8349	/// NaN or signed-zero values.
8350	///
8351	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8352	#[inline]
8353	#[target_feature(enable = "avx512fp16,avx512vl")]
8354	#[cfg_attr(test, assert_instr(vmaxph))]
8355	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8356	pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8357	unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8358	}
8359
8360	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8361	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8362	/// value when inputs are NaN or signed-zero values.
8363	///
8364	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8365	#[inline]
8366	#[target_feature(enable = "avx512fp16")]
8367	#[cfg_attr(test, assert_instr(vmaxph))]
8368	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8369	pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8370	_mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8371	}
8372
8373	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8374	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8375	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8376	/// NaN or signed-zero values.
8377	///
8378	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8379	#[inline]
8380	#[target_feature(enable = "avx512fp16")]
8381	#[cfg_attr(test, assert_instr(vmaxph))]
8382	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8383	pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8384	unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8385	}
8386
8387	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8388	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8389	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8390	/// NaN or signed-zero values.
8391	///
8392	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8393	#[inline]
8394	#[target_feature(enable = "avx512fp16")]
8395	#[cfg_attr(test, assert_instr(vmaxph))]
8396	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8397	pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8398	unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8399	}
8400
8401	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8402	/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8403	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8404	/// NaN or signed-zero values.
8405	///
8406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8407	#[inline]
8408	#[target_feature(enable = "avx512fp16")]
8409	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8410	#[rustc_legacy_const_generics(`2`)]
8411	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8412	pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8413	unsafe {
8414	static_assert_sae!(SAE);
8415	vmaxph_512(a, b, SAE)
8416	}
8417	}
8418
8419	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8420	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8421	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8422	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8423	///
8424	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8425	#[inline]
8426	#[target_feature(enable = "avx512fp16")]
8427	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8428	#[rustc_legacy_const_generics(`4`)]
8429	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8430	pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8431	src: __m512h,
8432	k: __mmask32,
8433	a: __m512h,
8434	b: __m512h,
8435	) -> __m512h {
8436	unsafe {
8437	static_assert_sae!(SAE);
8438	simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8439	}
8440	}
8441
8442	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8443	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8444	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8445	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8446	///
8447	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8448	#[inline]
8449	#[target_feature(enable = "avx512fp16")]
8450	#[cfg_attr(test, assert_instr(vmaxph, SAE = `8`))]
8451	#[rustc_legacy_const_generics(`3`)]
8452	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8453	pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8454	unsafe {
8455	static_assert_sae!(SAE);
8456	simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8457	}
8458	}
8459
8460	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8461	/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8462	/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8463	/// when inputs are NaN or signed-zero values.
8464	///
8465	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8466	#[inline]
8467	#[target_feature(enable = "avx512fp16,avx512vl")]
8468	#[cfg_attr(test, assert_instr(vmaxsh))]
8469	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8470	pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8471	_mm_mask_max_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
8472	}
8473
8474	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8475	/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8476	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8477	/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8478	///
8479	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8480	#[inline]
8481	#[target_feature(enable = "avx512fp16,avx512vl")]
8482	#[cfg_attr(test, assert_instr(vmaxsh))]
8483	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8484	pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8485	_mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8486	}
8487
8488	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8489	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8490	/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8491	/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8492	///
8493	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8494	#[inline]
8495	#[target_feature(enable = "avx512fp16,avx512vl")]
8496	#[cfg_attr(test, assert_instr(vmaxsh))]
8497	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8498	pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8499	_mm_mask_max_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8500	}
8501
8502	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8503	/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8504	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8505	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8506	///
8507	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8508	#[inline]
8509	#[target_feature(enable = "avx512fp16,avx512vl")]
8510	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8511	#[rustc_legacy_const_generics(`2`)]
8512	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8513	pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8514	static_assert_sae!(SAE);
8515	_mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
8516	}
8517
8518	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8519	/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8520	/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8521	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8522	/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8523	///
8524	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8525	#[inline]
8526	#[target_feature(enable = "avx512fp16,avx512vl")]
8527	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8528	#[rustc_legacy_const_generics(`4`)]
8529	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8530	pub fn _mm_mask_max_round_sh<const SAE: i32>(
8531	src: __m128h,
8532	k: __mmask8,
8533	a: __m128h,
8534	b: __m128h,
8535	) -> __m128h {
8536	unsafe {
8537	static_assert_sae!(SAE);
8538	vmaxsh(a, b, src, k, SAE)
8539	}
8540	}
8541
8542	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8543	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8544	/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8545	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8546	/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8547	///
8548	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8549	#[inline]
8550	#[target_feature(enable = "avx512fp16,avx512vl")]
8551	#[cfg_attr(test, assert_instr(vmaxsh, SAE = `8`))]
8552	#[rustc_legacy_const_generics(`3`)]
8553	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8554	pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8555	static_assert_sae!(SAE);
8556	_mm_mask_max_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8557	}
8558
8559	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8560	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8561	/// when inputs are NaN or signed-zero values.
8562	///
8563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8564	#[inline]
8565	#[target_feature(enable = "avx512fp16,avx512vl")]
8566	#[cfg_attr(test, assert_instr(vminph))]
8567	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8568	pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8569	unsafe { vminph_128(a, b) }
8570	}
8571
8572	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8573	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8574	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8575	/// NaN or signed-zero values.
8576	///
8577	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8578	#[inline]
8579	#[target_feature(enable = "avx512fp16,avx512vl")]
8580	#[cfg_attr(test, assert_instr(vminph))]
8581	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8582	pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8583	unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8584	}
8585
8586	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8587	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8588	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8589	/// NaN or signed-zero values.
8590	///
8591	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8592	#[inline]
8593	#[target_feature(enable = "avx512fp16,avx512vl")]
8594	#[cfg_attr(test, assert_instr(vminph))]
8595	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8596	pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8597	unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8598	}
8599
8600	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8601	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8602	/// when inputs are NaN or signed-zero values.
8603	///
8604	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8605	#[inline]
8606	#[target_feature(enable = "avx512fp16,avx512vl")]
8607	#[cfg_attr(test, assert_instr(vminph))]
8608	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8609	pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8610	unsafe { vminph_256(a, b) }
8611	}
8612
8613	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8614	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8615	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8616	/// NaN or signed-zero values.
8617	///
8618	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8619	#[inline]
8620	#[target_feature(enable = "avx512fp16,avx512vl")]
8621	#[cfg_attr(test, assert_instr(vminph))]
8622	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8623	pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8624	unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8625	}
8626
8627	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8628	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8629	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8630	/// NaN or signed-zero values.
8631	///
8632	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8633	#[inline]
8634	#[target_feature(enable = "avx512fp16,avx512vl")]
8635	#[cfg_attr(test, assert_instr(vminph))]
8636	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8637	pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8638	unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8639	}
8640
8641	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8642	/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8643	/// when inputs are NaN or signed-zero values.
8644	///
8645	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8646	#[inline]
8647	#[target_feature(enable = "avx512fp16")]
8648	#[cfg_attr(test, assert_instr(vminph))]
8649	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8650	pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8651	_mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8652	}
8653
8654	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8655	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8656	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8657	/// NaN or signed-zero values.
8658	///
8659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8660	#[inline]
8661	#[target_feature(enable = "avx512fp16")]
8662	#[cfg_attr(test, assert_instr(vminph))]
8663	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8664	pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8665	unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8666	}
8667
8668	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8669	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8670	/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8671	/// NaN or signed-zero values.
8672	///
8673	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8674	#[inline]
8675	#[target_feature(enable = "avx512fp16")]
8676	#[cfg_attr(test, assert_instr(vminph))]
8677	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8678	pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8679	unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8680	}
8681
8682	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8683	/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8684	/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8685	///
8686	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8687	#[inline]
8688	#[target_feature(enable = "avx512fp16")]
8689	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8690	#[rustc_legacy_const_generics(`2`)]
8691	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8692	pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8693	unsafe {
8694	static_assert_sae!(SAE);
8695	vminph_512(a, b, SAE)
8696	}
8697	}
8698
8699	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8700	/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8701	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8702	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8703	///
8704	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
8705	#[inline]
8706	#[target_feature(enable = "avx512fp16")]
8707	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8708	#[rustc_legacy_const_generics(`4`)]
8709	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8710	pub fn _mm512_mask_min_round_ph<const SAE: i32>(
8711	src: __m512h,
8712	k: __mmask32,
8713	a: __m512h,
8714	b: __m512h,
8715	) -> __m512h {
8716	unsafe {
8717	static_assert_sae!(SAE);
8718	simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
8719	}
8720	}
8721
8722	/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8723	/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8724	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8725	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8726	///
8727	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
8728	#[inline]
8729	#[target_feature(enable = "avx512fp16")]
8730	#[cfg_attr(test, assert_instr(vminph, SAE = `8`))]
8731	#[rustc_legacy_const_generics(`3`)]
8732	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8733	pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8734	unsafe {
8735	static_assert_sae!(SAE);
8736	simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8737	}
8738	}
8739
8740	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8741	/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8742	/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
8743	/// inputs are NaN or signed-zero values.
8744	///
8745	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
8746	#[inline]
8747	#[target_feature(enable = "avx512fp16,avx512vl")]
8748	#[cfg_attr(test, assert_instr(vminsh))]
8749	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8750	pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
8751	_mm_mask_min_sh(src:_mm_undefined_ph(), k:`0xff`, a, b)
8752	}
8753
8754	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8755	/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8756	/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8757	/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8758	///
8759	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
8760	#[inline]
8761	#[target_feature(enable = "avx512fp16,avx512vl")]
8762	#[cfg_attr(test, assert_instr(vminsh))]
8763	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8764	pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8765	_mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8766	}
8767
8768	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8769	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8770	/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8771	/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8772	///
8773	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
8774	#[inline]
8775	#[target_feature(enable = "avx512fp16,avx512vl")]
8776	#[cfg_attr(test, assert_instr(vminsh))]
8777	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8778	pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8779	_mm_mask_min_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8780	}
8781
8782	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8783	/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8784	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8785	/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8786	///
8787	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
8788	#[inline]
8789	#[target_feature(enable = "avx512fp16,avx512vl")]
8790	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8791	#[rustc_legacy_const_generics(`2`)]
8792	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8793	pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8794	static_assert_sae!(SAE);
8795	_mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:`0xff`, a, b)
8796	}
8797
8798	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8799	/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8800	/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8801	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8802	/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8803	///
8804	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
8805	#[inline]
8806	#[target_feature(enable = "avx512fp16,avx512vl")]
8807	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8808	#[rustc_legacy_const_generics(`4`)]
8809	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8810	pub fn _mm_mask_min_round_sh<const SAE: i32>(
8811	src: __m128h,
8812	k: __mmask8,
8813	a: __m128h,
8814	b: __m128h,
8815	) -> __m128h {
8816	unsafe {
8817	static_assert_sae!(SAE);
8818	vminsh(a, b, src, k, SAE)
8819	}
8820	}
8821
8822	/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8823	/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8824	/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8825	/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8826	/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8827	///
8828	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
8829	#[inline]
8830	#[target_feature(enable = "avx512fp16,avx512vl")]
8831	#[cfg_attr(test, assert_instr(vminsh, SAE = `8`))]
8832	#[rustc_legacy_const_generics(`3`)]
8833	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8834	pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8835	static_assert_sae!(SAE);
8836	_mm_mask_min_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8837	}
8838
8839	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8840	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8841	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8842	///
8843	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
8844	#[inline]
8845	#[target_feature(enable = "avx512fp16,avx512vl")]
8846	#[cfg_attr(test, assert_instr(vgetexpph))]
8847	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8848	pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
8849	_mm_mask_getexp_ph(src:_mm_undefined_ph(), k:`0xff`, a)
8850	}
8851
8852	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8853	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8854	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8855	/// `floor(log2(x))` for each element.
8856	///
8857	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
8858	#[inline]
8859	#[target_feature(enable = "avx512fp16,avx512vl")]
8860	#[cfg_attr(test, assert_instr(vgetexpph))]
8861	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8862	pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8863	unsafe { vgetexpph_128(a, src, k) }
8864	}
8865
8866	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8867	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8868	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8869	/// `floor(log2(x))` for each element.
8870	///
8871	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
8872	#[inline]
8873	#[target_feature(enable = "avx512fp16,avx512vl")]
8874	#[cfg_attr(test, assert_instr(vgetexpph))]
8875	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8876	pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
8877	_mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
8878	}
8879
8880	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8881	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8882	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8883	///
8884	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
8885	#[inline]
8886	#[target_feature(enable = "avx512fp16,avx512vl")]
8887	#[cfg_attr(test, assert_instr(vgetexpph))]
8888	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8889	pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
8890	_mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:`0xffff`, a)
8891	}
8892
8893	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8894	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8895	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8896	/// `floor(log2(x))` for each element.
8897	///
8898	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
8899	#[inline]
8900	#[target_feature(enable = "avx512fp16,avx512vl")]
8901	#[cfg_attr(test, assert_instr(vgetexpph))]
8902	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8903	pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8904	unsafe { vgetexpph_256(a, src, k) }
8905	}
8906
8907	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8908	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8909	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8910	/// `floor(log2(x))` for each element.
8911	///
8912	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
8913	#[inline]
8914	#[target_feature(enable = "avx512fp16,avx512vl")]
8915	#[cfg_attr(test, assert_instr(vgetexpph))]
8916	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8917	pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
8918	_mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
8919	}
8920
8921	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8922	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8923	/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8924	///
8925	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
8926	#[inline]
8927	#[target_feature(enable = "avx512fp16")]
8928	#[cfg_attr(test, assert_instr(vgetexpph))]
8929	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8930	pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
8931	_mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
8932	}
8933
8934	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8935	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8936	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8937	/// `floor(log2(x))` for each element.
8938	///
8939	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
8940	#[inline]
8941	#[target_feature(enable = "avx512fp16")]
8942	#[cfg_attr(test, assert_instr(vgetexpph))]
8943	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8944	pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8945	_mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
8946	}
8947
8948	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8949	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8950	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8951	/// `floor(log2(x))` for each element.
8952	///
8953	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
8954	#[inline]
8955	#[target_feature(enable = "avx512fp16")]
8956	#[cfg_attr(test, assert_instr(vgetexpph))]
8957	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8958	pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
8959	_mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
8960	}
8961
8962	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8963	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8964	/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
8965	/// by passing _MM_FROUND_NO_EXC in the sae parameter
8966	///
8967	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
8968	#[inline]
8969	#[target_feature(enable = "avx512fp16")]
8970	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
8971	#[rustc_legacy_const_generics(`1`)]
8972	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8973	pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
8974	static_assert_sae!(SAE);
8975	_mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
8976	}
8977
8978	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8979	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8980	/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8981	/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8982	///
8983	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
8984	#[inline]
8985	#[target_feature(enable = "avx512fp16")]
8986	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
8987	#[rustc_legacy_const_generics(`3`)]
8988	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8989	pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
8990	src: __m512h,
8991	k: __mmask32,
8992	a: __m512h,
8993	) -> __m512h {
8994	unsafe {
8995	static_assert_sae!(SAE);
8996	vgetexpph_512(a, src, k, SAE)
8997	}
8998	}
8999
9000	/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9001	/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9002	/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9003	/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9004	///
9005	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
9006	#[inline]
9007	#[target_feature(enable = "avx512fp16")]
9008	#[cfg_attr(test, assert_instr(vgetexpph, SAE = `8`))]
9009	#[rustc_legacy_const_generics(`2`)]
9010	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9011	pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
9012	static_assert_sae!(SAE);
9013	_mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9014	}
9015
9016	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9017	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9018	/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9019	/// calculates `floor(log2(x))` for the lower element.
9020	///
9021	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9022	#[inline]
9023	#[target_feature(enable = "avx512fp16")]
9024	#[cfg_attr(test, assert_instr(vgetexpsh))]
9025	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9026	pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9027	_mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
9028	}
9029
9030	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9031	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9032	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9033	/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9034	/// for the lower element.
9035	///
9036	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9037	#[inline]
9038	#[target_feature(enable = "avx512fp16")]
9039	#[cfg_attr(test, assert_instr(vgetexpsh))]
9040	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9041	pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9042	_mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9043	}
9044
9045	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9046	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9047	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9048	/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9049	/// lower element.
9050	///
9051	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9052	#[inline]
9053	#[target_feature(enable = "avx512fp16")]
9054	#[cfg_attr(test, assert_instr(vgetexpsh))]
9055	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9056	pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9057	_mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
9058	}
9059
9060	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9061	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9062	/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9063	/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9064	/// in the sae parameter
9065	///
9066	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9067	#[inline]
9068	#[target_feature(enable = "avx512fp16")]
9069	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9070	#[rustc_legacy_const_generics(`2`)]
9071	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9072	pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9073	static_assert_sae!(SAE);
9074	_mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
9075	}
9076
9077	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9078	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9079	/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9080	/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9081	/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9082	///
9083	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9084	#[inline]
9085	#[target_feature(enable = "avx512fp16")]
9086	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9087	#[rustc_legacy_const_generics(`4`)]
9088	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9089	pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9090	src: __m128h,
9091	k: __mmask8,
9092	a: __m128h,
9093	b: __m128h,
9094	) -> __m128h {
9095	unsafe {
9096	static_assert_sae!(SAE);
9097	vgetexpsh(a, b, src, k, SAE)
9098	}
9099	}
9100
9101	/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9102	/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9103	/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9104	/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9105	/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9106	///
9107	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9108	#[inline]
9109	#[target_feature(enable = "avx512fp16")]
9110	#[cfg_attr(test, assert_instr(vgetexpsh, SAE = `8`))]
9111	#[rustc_legacy_const_generics(`3`)]
9112	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9113	pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9114	static_assert_sae!(SAE);
9115	_mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9116	}
9117
9118	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9119	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9120	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9121	///
9122	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9123	///
9124	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9125	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9126	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9127	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9128	///
9129	/// The sign is determined by sc which can take the following values:
9130	///
9131	/// _MM_MANT_SIGN_src // sign = sign(src)
9132	/// _MM_MANT_SIGN_zero // sign = 0
9133	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9134	///
9135	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9136	#[inline]
9137	#[target_feature(enable = "avx512fp16,avx512vl")]
9138	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9139	#[rustc_legacy_const_generics(`1`, `2`)]
9140	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9141	pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9142	a: __m128h,
9143	) -> __m128h {
9144	static_assert_uimm_bits!(NORM, `4`);
9145	static_assert_uimm_bits!(SIGN, `2`);
9146	_mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:`0xff`, a)
9147	}
9148
9149	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9150	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9151	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9152	/// by norm and the sign depends on sign and the source sign.
9153	///
9154	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9155	///
9156	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9157	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9158	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9159	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9160	///
9161	/// The sign is determined by sc which can take the following values:
9162	///
9163	/// _MM_MANT_SIGN_src // sign = sign(src)
9164	/// _MM_MANT_SIGN_zero // sign = 0
9165	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9166	///
9167	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9168	#[inline]
9169	#[target_feature(enable = "avx512fp16,avx512vl")]
9170	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9171	#[rustc_legacy_const_generics(`3`, `4`)]
9172	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9173	pub fn _mm_mask_getmant_ph<
9174	const NORM: _MM_MANTISSA_NORM_ENUM,
9175	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9176	>(
9177	src: __m128h,
9178	k: __mmask8,
9179	a: __m128h,
9180	) -> __m128h {
9181	unsafe {
9182	static_assert_uimm_bits!(NORM, `4`);
9183	static_assert_uimm_bits!(SIGN, `2`);
9184	vgetmantph_128(a, (SIGN << `2`) \| NORM, src, k)
9185	}
9186	}
9187
9188	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9189	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9190	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9191	/// by norm and the sign depends on sign and the source sign.
9192	///
9193	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9194	///
9195	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9196	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9197	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9198	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9199	///
9200	/// The sign is determined by sc which can take the following values:
9201	///
9202	/// _MM_MANT_SIGN_src // sign = sign(src)
9203	/// _MM_MANT_SIGN_zero // sign = 0
9204	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9205	///
9206	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9207	#[inline]
9208	#[target_feature(enable = "avx512fp16,avx512vl")]
9209	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9210	#[rustc_legacy_const_generics(`2`, `3`)]
9211	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9212	pub fn _mm_maskz_getmant_ph<
9213	const NORM: _MM_MANTISSA_NORM_ENUM,
9214	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9215	>(
9216	k: __mmask8,
9217	a: __m128h,
9218	) -> __m128h {
9219	static_assert_uimm_bits!(NORM, `4`);
9220	static_assert_uimm_bits!(SIGN, `2`);
9221	_mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9222	}
9223
9224	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9225	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9226	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9227	///
9228	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9229	///
9230	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9231	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9232	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9233	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9234	///
9235	/// The sign is determined by sc which can take the following values:
9236	///
9237	/// _MM_MANT_SIGN_src // sign = sign(src)
9238	/// _MM_MANT_SIGN_zero // sign = 0
9239	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9240	///
9241	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9242	#[inline]
9243	#[target_feature(enable = "avx512fp16,avx512vl")]
9244	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9245	#[rustc_legacy_const_generics(`1`, `2`)]
9246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9247	pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9248	a: __m256h,
9249	) -> __m256h {
9250	static_assert_uimm_bits!(NORM, `4`);
9251	static_assert_uimm_bits!(SIGN, `2`);
9252	_mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:`0xffff`, a)
9253	}
9254
9255	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9256	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9257	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9258	/// by norm and the sign depends on sign and the source sign.
9259	///
9260	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9261	///
9262	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9263	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9264	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9265	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9266	///
9267	/// The sign is determined by sc which can take the following values:
9268	///
9269	/// _MM_MANT_SIGN_src // sign = sign(src)
9270	/// _MM_MANT_SIGN_zero // sign = 0
9271	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9272	///
9273	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9274	#[inline]
9275	#[target_feature(enable = "avx512fp16,avx512vl")]
9276	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9277	#[rustc_legacy_const_generics(`3`, `4`)]
9278	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9279	pub fn _mm256_mask_getmant_ph<
9280	const NORM: _MM_MANTISSA_NORM_ENUM,
9281	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9282	>(
9283	src: __m256h,
9284	k: __mmask16,
9285	a: __m256h,
9286	) -> __m256h {
9287	unsafe {
9288	static_assert_uimm_bits!(NORM, `4`);
9289	static_assert_uimm_bits!(SIGN, `2`);
9290	vgetmantph_256(a, (SIGN << `2`) \| NORM, src, k)
9291	}
9292	}
9293
9294	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9295	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9296	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9297	/// by norm and the sign depends on sign and the source sign.
9298	///
9299	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9300	///
9301	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9302	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9303	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9304	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9305	///
9306	/// The sign is determined by sc which can take the following values:
9307	///
9308	/// _MM_MANT_SIGN_src // sign = sign(src)
9309	/// _MM_MANT_SIGN_zero // sign = 0
9310	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9311	///
9312	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9313	#[inline]
9314	#[target_feature(enable = "avx512fp16,avx512vl")]
9315	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9316	#[rustc_legacy_const_generics(`2`, `3`)]
9317	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9318	pub fn _mm256_maskz_getmant_ph<
9319	const NORM: _MM_MANTISSA_NORM_ENUM,
9320	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9321	>(
9322	k: __mmask16,
9323	a: __m256h,
9324	) -> __m256h {
9325	static_assert_uimm_bits!(NORM, `4`);
9326	static_assert_uimm_bits!(SIGN, `2`);
9327	_mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9328	}
9329
9330	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9331	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9332	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9333	///
9334	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9335	///
9336	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9337	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9338	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9339	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9340	///
9341	/// The sign is determined by sc which can take the following values:
9342	///
9343	/// _MM_MANT_SIGN_src // sign = sign(src)
9344	/// _MM_MANT_SIGN_zero // sign = 0
9345	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9346	///
9347	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9348	#[inline]
9349	#[target_feature(enable = "avx512fp16")]
9350	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9351	#[rustc_legacy_const_generics(`1`, `2`)]
9352	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9353	pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9354	a: __m512h,
9355	) -> __m512h {
9356	static_assert_uimm_bits!(NORM, `4`);
9357	static_assert_uimm_bits!(SIGN, `2`);
9358	_mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9359	}
9360
9361	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9362	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9363	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9364	/// by norm and the sign depends on sign and the source sign.
9365	///
9366	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9367	///
9368	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9369	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9370	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9371	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9372	///
9373	/// The sign is determined by sc which can take the following values:
9374	///
9375	/// _MM_MANT_SIGN_src // sign = sign(src)
9376	/// _MM_MANT_SIGN_zero // sign = 0
9377	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9378	///
9379	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9380	#[inline]
9381	#[target_feature(enable = "avx512fp16")]
9382	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9383	#[rustc_legacy_const_generics(`3`, `4`)]
9384	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9385	pub fn _mm512_mask_getmant_ph<
9386	const NORM: _MM_MANTISSA_NORM_ENUM,
9387	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9388	>(
9389	src: __m512h,
9390	k: __mmask32,
9391	a: __m512h,
9392	) -> __m512h {
9393	static_assert_uimm_bits!(NORM, `4`);
9394	static_assert_uimm_bits!(SIGN, `2`);
9395	_mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9396	}
9397
9398	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9399	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9400	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9401	/// by norm and the sign depends on sign and the source sign.
9402	///
9403	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9404	///
9405	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9406	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9407	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9408	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9409	///
9410	/// The sign is determined by sc which can take the following values:
9411	///
9412	/// _MM_MANT_SIGN_src // sign = sign(src)
9413	/// _MM_MANT_SIGN_zero // sign = 0
9414	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9415	///
9416	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9417	#[inline]
9418	#[target_feature(enable = "avx512fp16")]
9419	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`))]
9420	#[rustc_legacy_const_generics(`2`, `3`)]
9421	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9422	pub fn _mm512_maskz_getmant_ph<
9423	const NORM: _MM_MANTISSA_NORM_ENUM,
9424	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9425	>(
9426	k: __mmask32,
9427	a: __m512h,
9428	) -> __m512h {
9429	static_assert_uimm_bits!(NORM, `4`);
9430	static_assert_uimm_bits!(SIGN, `2`);
9431	_mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9432	}
9433
9434	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9435	/// the results in dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9436	/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9437	/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9438	///
9439	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9440	///
9441	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9442	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9443	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9444	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9445	///
9446	/// The sign is determined by sc which can take the following values:
9447	///
9448	/// _MM_MANT_SIGN_src // sign = sign(src)
9449	/// _MM_MANT_SIGN_zero // sign = 0
9450	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9451	///
9452	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9453	///
9454	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9455	#[inline]
9456	#[target_feature(enable = "avx512fp16")]
9457	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9458	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
9459	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9460	pub fn _mm512_getmant_round_ph<
9461	const NORM: _MM_MANTISSA_NORM_ENUM,
9462	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9463	const SAE: i32,
9464	>(
9465	a: __m512h,
9466	) -> __m512h {
9467	static_assert_uimm_bits!(NORM, `4`);
9468	static_assert_uimm_bits!(SIGN, `2`);
9469	static_assert_sae!(SAE);
9470	_mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9471	}
9472
9473	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9474	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9475	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9476	/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9477	/// in the sae parameter
9478	///
9479	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9480	///
9481	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9482	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9483	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9484	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9485	///
9486	/// The sign is determined by sc which can take the following values:
9487	///
9488	/// _MM_MANT_SIGN_src // sign = sign(src)
9489	/// _MM_MANT_SIGN_zero // sign = 0
9490	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9491	///
9492	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9493	///
9494	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9495	#[inline]
9496	#[target_feature(enable = "avx512fp16")]
9497	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9498	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
9499	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9500	pub fn _mm512_mask_getmant_round_ph<
9501	const NORM: _MM_MANTISSA_NORM_ENUM,
9502	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9503	const SAE: i32,
9504	>(
9505	src: __m512h,
9506	k: __mmask32,
9507	a: __m512h,
9508	) -> __m512h {
9509	unsafe {
9510	static_assert_uimm_bits!(NORM, `4`);
9511	static_assert_uimm_bits!(SIGN, `2`);
9512	static_assert_sae!(SAE);
9513	vgetmantph_512(a, (SIGN << `2`) \| NORM, src, k, SAE)
9514	}
9515	}
9516
9517	/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9518	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9519	/// This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends on the interval range defined*
9520	/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9521	/// in the sae parameter
9522	///
9523	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9524	///
9525	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9526	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9527	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9528	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9529	///
9530	/// The sign is determined by sc which can take the following values:
9531	///
9532	/// _MM_MANT_SIGN_src // sign = sign(src)
9533	/// _MM_MANT_SIGN_zero // sign = 0
9534	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9535	///
9536	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9537	///
9538	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9539	#[inline]
9540	#[target_feature(enable = "avx512fp16")]
9541	#[cfg_attr(test, assert_instr(vgetmantph, NORM = `0`, SIGN = `0`, SAE = `8`))]
9542	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
9543	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9544	pub fn _mm512_maskz_getmant_round_ph<
9545	const NORM: _MM_MANTISSA_NORM_ENUM,
9546	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9547	const SAE: i32,
9548	>(
9549	k: __mmask32,
9550	a: __m512h,
9551	) -> __m512h {
9552	static_assert_uimm_bits!(NORM, `4`);
9553	static_assert_uimm_bits!(SIGN, `2`);
9554	static_assert_sae!(SAE);
9555	_mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9556	}
9557
9558	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9559	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9560	/// elements of dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9561	/// on the interval range defined by norm and the sign depends on sign and the source sign.
9562	///
9563	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9564	///
9565	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9566	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9567	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9568	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9569	///
9570	/// The sign is determined by sc which can take the following values:
9571	///
9572	/// _MM_MANT_SIGN_src // sign = sign(src)
9573	/// _MM_MANT_SIGN_zero // sign = 0
9574	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9575	///
9576	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9577	#[inline]
9578	#[target_feature(enable = "avx512fp16")]
9579	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9580	#[rustc_legacy_const_generics(`2`, `3`)]
9581	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9582	pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9583	a: __m128h,
9584	b: __m128h,
9585	) -> __m128h {
9586	static_assert_uimm_bits!(NORM, `4`);
9587	static_assert_uimm_bits!(SIGN, `2`);
9588	_mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
9589	}
9590
9591	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9592	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9593	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9594	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9595	/// the source sign.
9596	///
9597	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9598	///
9599	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9600	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9601	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9602	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9603	///
9604	/// The sign is determined by sc which can take the following values:
9605	///
9606	/// _MM_MANT_SIGN_src // sign = sign(src)
9607	/// _MM_MANT_SIGN_zero // sign = 0
9608	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9609	///
9610	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9611	#[inline]
9612	#[target_feature(enable = "avx512fp16")]
9613	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9614	#[rustc_legacy_const_generics(`4`, `5`)]
9615	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9616	pub fn _mm_mask_getmant_sh<
9617	const NORM: _MM_MANTISSA_NORM_ENUM,
9618	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9619	>(
9620	src: __m128h,
9621	k: __mmask8,
9622	a: __m128h,
9623	b: __m128h,
9624	) -> __m128h {
9625	static_assert_uimm_bits!(NORM, `4`);
9626	static_assert_uimm_bits!(SIGN, `2`);
9627	_mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9628	}
9629
9630	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9631	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9632	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9633	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9634	/// the source sign.
9635	///
9636	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9637	///
9638	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9639	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9640	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9641	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9642	///
9643	/// The sign is determined by sc which can take the following values:
9644	///
9645	/// _MM_MANT_SIGN_src // sign = sign(src)
9646	/// _MM_MANT_SIGN_zero // sign = 0
9647	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9648	///
9649	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9650	#[inline]
9651	#[target_feature(enable = "avx512fp16")]
9652	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`))]
9653	#[rustc_legacy_const_generics(`3`, `4`)]
9654	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9655	pub fn _mm_maskz_getmant_sh<
9656	const NORM: _MM_MANTISSA_NORM_ENUM,
9657	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9658	>(
9659	k: __mmask8,
9660	a: __m128h,
9661	b: __m128h,
9662	) -> __m128h {
9663	static_assert_uimm_bits!(NORM, `4`);
9664	static_assert_uimm_bits!(SIGN, `2`);
9665	_mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k, a, b)
9666	}
9667
9668	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9669	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9670	/// elements of dst. This intrinsic essentially calculates `±(2^k)\|x.significand\|`, where k depends*
9671	/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9672	/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9673	///
9674	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9675	///
9676	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9677	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9678	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9679	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9680	///
9681	/// The sign is determined by sc which can take the following values:
9682	///
9683	/// _MM_MANT_SIGN_src // sign = sign(src)
9684	/// _MM_MANT_SIGN_zero // sign = 0
9685	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9686	///
9687	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9688	///
9689	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9690	#[inline]
9691	#[target_feature(enable = "avx512fp16")]
9692	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9693	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
9694	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9695	pub fn _mm_getmant_round_sh<
9696	const NORM: _MM_MANTISSA_NORM_ENUM,
9697	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9698	const SAE: i32,
9699	>(
9700	a: __m128h,
9701	b: __m128h,
9702	) -> __m128h {
9703	static_assert_uimm_bits!(NORM, `4`);
9704	static_assert_uimm_bits!(SIGN, `2`);
9705	static_assert_sae!(SAE);
9706	_mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
9707	}
9708
9709	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9710	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9711	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9712	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9713	/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9714	///
9715	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9716	///
9717	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9718	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9719	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9720	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9721	///
9722	/// The sign is determined by sc which can take the following values:
9723	///
9724	/// _MM_MANT_SIGN_src // sign = sign(src)
9725	/// _MM_MANT_SIGN_zero // sign = 0
9726	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9727	///
9728	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9729	///
9730	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
9731	#[inline]
9732	#[target_feature(enable = "avx512fp16")]
9733	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9734	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
9735	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9736	pub fn _mm_mask_getmant_round_sh<
9737	const NORM: _MM_MANTISSA_NORM_ENUM,
9738	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9739	const SAE: i32,
9740	>(
9741	src: __m128h,
9742	k: __mmask8,
9743	a: __m128h,
9744	b: __m128h,
9745	) -> __m128h {
9746	unsafe {
9747	static_assert_uimm_bits!(NORM, `4`);
9748	static_assert_uimm_bits!(SIGN, `2`);
9749	static_assert_sae!(SAE);
9750	vgetmantsh(a, b, (SIGN << `2`) \| NORM, src, k, SAE)
9751	}
9752	}
9753
9754	/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9755	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9756	/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9757	/// `±(2^k)\|x.significand\|`, where k depends on the interval range defined by norm and the sign depends on sign and*
9758	/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9759	///
9760	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9761	///
9762	/// _MM_MANT_NORM_1_2 // interval [1, 2)
9763	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9764	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9765	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9766	///
9767	/// The sign is determined by sc which can take the following values:
9768	///
9769	/// _MM_MANT_SIGN_src // sign = sign(src)
9770	/// _MM_MANT_SIGN_zero // sign = 0
9771	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9772	///
9773	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9774	///
9775	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
9776	#[inline]
9777	#[target_feature(enable = "avx512fp16")]
9778	#[cfg_attr(test, assert_instr(vgetmantsh, NORM = `0`, SIGN = `0`, SAE = `8`))]
9779	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
9780	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9781	pub fn _mm_maskz_getmant_round_sh<
9782	const NORM: _MM_MANTISSA_NORM_ENUM,
9783	const SIGN: _MM_MANTISSA_SIGN_ENUM,
9784	const SAE: i32,
9785	>(
9786	k: __mmask8,
9787	a: __m128h,
9788	b: __m128h,
9789	) -> __m128h {
9790	static_assert_uimm_bits!(NORM, `4`);
9791	static_assert_uimm_bits!(SIGN, `2`);
9792	static_assert_sae!(SAE);
9793	_mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9794	}
9795
9796	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9797	/// specified by imm8, and store the results in dst.
9798	///
9799	/// Rounding is done according to the imm8 parameter, which can be one of:
9800	///
9801	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9802	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9803	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9804	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9805	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9806	///
9807	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
9808	#[inline]
9809	#[target_feature(enable = "avx512fp16,avx512vl")]
9810	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9811	#[rustc_legacy_const_generics(`1`)]
9812	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9813	pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
9814	static_assert_uimm_bits!(IMM8, `8`);
9815	_mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a)
9816	}
9817
9818	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9819	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9820	/// the corresponding mask bit is not set).
9821	///
9822	/// Rounding is done according to the imm8 parameter, which can be one of:
9823	///
9824	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9825	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9826	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9827	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9828	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9829	///
9830	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
9831	#[inline]
9832	#[target_feature(enable = "avx512fp16,avx512vl")]
9833	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9834	#[rustc_legacy_const_generics(`3`)]
9835	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9836	pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9837	unsafe {
9838	static_assert_uimm_bits!(IMM8, `8`);
9839	vrndscaleph_128(a, IMM8, src, k)
9840	}
9841	}
9842
9843	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9844	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9845	/// mask bit is not set).
9846	///
9847	/// Rounding is done according to the imm8 parameter, which can be one of:
9848	///
9849	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9850	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9851	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9852	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9853	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9854	///
9855	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
9856	#[inline]
9857	#[target_feature(enable = "avx512fp16,avx512vl")]
9858	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9859	#[rustc_legacy_const_generics(`2`)]
9860	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9861	pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
9862	static_assert_uimm_bits!(IMM8, `8`);
9863	_mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
9864	}
9865
9866	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9867	/// specified by imm8, and store the results in dst.
9868	///
9869	/// Rounding is done according to the imm8 parameter, which can be one of:
9870	///
9871	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9872	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9873	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9874	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9875	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9876	///
9877	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
9878	#[inline]
9879	#[target_feature(enable = "avx512fp16,avx512vl")]
9880	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9881	#[rustc_legacy_const_generics(`1`)]
9882	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9883	pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
9884	static_assert_uimm_bits!(IMM8, `8`);
9885	_mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:`0xffff`, a)
9886	}
9887
9888	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9889	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9890	/// the corresponding mask bit is not set).
9891	///
9892	/// Rounding is done according to the imm8 parameter, which can be one of:
9893	///
9894	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9895	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9896	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9897	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9898	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9899	///
9900	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
9901	#[inline]
9902	#[target_feature(enable = "avx512fp16,avx512vl")]
9903	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9904	#[rustc_legacy_const_generics(`3`)]
9905	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9906	pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
9907	src: __m256h,
9908	k: __mmask16,
9909	a: __m256h,
9910	) -> __m256h {
9911	unsafe {
9912	static_assert_uimm_bits!(IMM8, `8`);
9913	vrndscaleph_256(a, IMM8, src, k)
9914	}
9915	}
9916
9917	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9918	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9919	/// mask bit is not set).
9920	///
9921	/// Rounding is done according to the imm8 parameter, which can be one of:
9922	///
9923	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9924	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9925	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9926	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9927	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9928	///
9929	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
9930	#[inline]
9931	#[target_feature(enable = "avx512fp16,avx512vl")]
9932	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9933	#[rustc_legacy_const_generics(`2`)]
9934	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9935	pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
9936	static_assert_uimm_bits!(IMM8, `8`);
9937	_mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
9938	}
9939
9940	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9941	/// specified by imm8, and store the results in dst.
9942	///
9943	/// Rounding is done according to the imm8 parameter, which can be one of:
9944	///
9945	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9946	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9947	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9948	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9949	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9950	///
9951	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
9952	#[inline]
9953	#[target_feature(enable = "avx512fp16")]
9954	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9955	#[rustc_legacy_const_generics(`1`)]
9956	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9957	pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
9958	static_assert_uimm_bits!(IMM8, `8`);
9959	_mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
9960	}
9961
9962	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9963	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9964	/// the corresponding mask bit is not set).
9965	///
9966	/// Rounding is done according to the imm8 parameter, which can be one of:
9967	///
9968	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9969	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9970	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9971	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9972	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9973	///
9974	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
9975	#[inline]
9976	#[target_feature(enable = "avx512fp16")]
9977	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
9978	#[rustc_legacy_const_generics(`3`)]
9979	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9980	pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
9981	src: __m512h,
9982	k: __mmask32,
9983	a: __m512h,
9984	) -> __m512h {
9985	static_assert_uimm_bits!(IMM8, `8`);
9986	_mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9987	}
9988
9989	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9990	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9991	/// mask bit is not set).
9992	///
9993	/// Rounding is done according to the imm8 parameter, which can be one of:
9994	///
9995	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9996	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9997	/// * [`_MM_FROUND_TO_POS_INF`] : round up
9998	/// * [`_MM_FROUND_TO_ZERO`] : truncate
9999	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10000	///
10001	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
10002	#[inline]
10003	#[target_feature(enable = "avx512fp16")]
10004	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`))]
10005	#[rustc_legacy_const_generics(`2`)]
10006	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10007	pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10008	static_assert_uimm_bits!(IMM8, `8`);
10009	_mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10010	}
10011
10012	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10013	/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10014	/// in the sae parameter
10015	///
10016	/// Rounding is done according to the imm8 parameter, which can be one of:
10017	///
10018	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10019	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10020	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10021	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10022	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10023	///
10024	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10025	#[inline]
10026	#[target_feature(enable = "avx512fp16")]
10027	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10028	#[rustc_legacy_const_generics(`1`, `2`)]
10029	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10030	pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10031	static_assert_uimm_bits!(IMM8, `8`);
10032	static_assert_sae!(SAE);
10033	_mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10034	}
10035
10036	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10037	/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10038	/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10039	/// in the sae parameter
10040	///
10041	/// Rounding is done according to the imm8 parameter, which can be one of:
10042	///
10043	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10044	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10045	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10046	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10047	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10048	///
10049	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10050	#[inline]
10051	#[target_feature(enable = "avx512fp16")]
10052	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10053	#[rustc_legacy_const_generics(`3`, `4`)]
10054	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10055	pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10056	src: __m512h,
10057	k: __mmask32,
10058	a: __m512h,
10059	) -> __m512h {
10060	unsafe {
10061	static_assert_uimm_bits!(IMM8, `8`);
10062	static_assert_sae!(SAE);
10063	vrndscaleph_512(a, IMM8, src, k, SAE)
10064	}
10065	}
10066
10067	/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10068	/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10069	/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10070	///
10071	/// Rounding is done according to the imm8 parameter, which can be one of:
10072	///
10073	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10074	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10075	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10076	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10077	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10078	///
10079	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10080	#[inline]
10081	#[target_feature(enable = "avx512fp16")]
10082	#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = `0`, SAE = `8`))]
10083	#[rustc_legacy_const_generics(`2`, `3`)]
10084	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10085	pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10086	k: __mmask32,
10087	a: __m512h,
10088	) -> __m512h {
10089	static_assert_uimm_bits!(IMM8, `8`);
10090	static_assert_sae!(SAE);
10091	_mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10092	}
10093
10094	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10095	/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10096	/// from a to the upper elements of dst.
10097	///
10098	/// Rounding is done according to the imm8 parameter, which can be one of:
10099	///
10100	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105	///
10106	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10107	#[inline]
10108	#[target_feature(enable = "avx512fp16")]
10109	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10110	#[rustc_legacy_const_generics(`2`)]
10111	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10112	pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10113	static_assert_uimm_bits!(IMM8, `8`);
10114	_mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10115	}
10116
10117	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10118	/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10119	/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10120	///
10121	/// Rounding is done according to the imm8 parameter, which can be one of:
10122	///
10123	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10124	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10125	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10126	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10127	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10128	///
10129	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10130	#[inline]
10131	#[target_feature(enable = "avx512fp16")]
10132	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10133	#[rustc_legacy_const_generics(`4`)]
10134	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10135	pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10136	src: __m128h,
10137	k: __mmask8,
10138	a: __m128h,
10139	b: __m128h,
10140	) -> __m128h {
10141	static_assert_uimm_bits!(IMM8, `8`);
10142	_mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10143	}
10144
10145	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10146	/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10147	/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10148	///
10149	/// Rounding is done according to the imm8 parameter, which can be one of:
10150	///
10151	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10152	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10153	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10154	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10155	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10156	///
10157	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10158	#[inline]
10159	#[target_feature(enable = "avx512fp16")]
10160	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`))]
10161	#[rustc_legacy_const_generics(`3`)]
10162	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10163	pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10164	static_assert_uimm_bits!(IMM8, `8`);
10165	_mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10166	}
10167
10168	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10169	/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10170	/// from a to the upper elements of dst.
10171	///
10172	/// Rounding is done according to the imm8 parameter, which can be one of:
10173	///
10174	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10175	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10176	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10177	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10178	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10179	///
10180	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10181	///
10182	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10183	#[inline]
10184	#[target_feature(enable = "avx512fp16")]
10185	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10186	#[rustc_legacy_const_generics(`2`, `3`)]
10187	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10188	pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10189	static_assert_uimm_bits!(IMM8, `8`);
10190	static_assert_sae!(SAE);
10191	_mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10192	}
10193
10194	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10195	/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10196	/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10197	///
10198	/// Rounding is done according to the imm8 parameter, which can be one of:
10199	///
10200	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10201	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10202	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10203	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10204	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10205	///
10206	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10207	///
10208	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10209	#[inline]
10210	#[target_feature(enable = "avx512fp16")]
10211	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10212	#[rustc_legacy_const_generics(`4`, `5`)]
10213	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10214	pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10215	src: __m128h,
10216	k: __mmask8,
10217	a: __m128h,
10218	b: __m128h,
10219	) -> __m128h {
10220	unsafe {
10221	static_assert_uimm_bits!(IMM8, `8`);
10222	static_assert_sae!(SAE);
10223	vrndscalesh(a, b, src, k, IMM8, SAE)
10224	}
10225	}
10226
10227	/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10228	/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10229	/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10230	///
10231	/// Rounding is done according to the imm8 parameter, which can be one of:
10232	///
10233	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10234	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10235	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10236	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10237	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10238	///
10239	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10240	///
10241	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10242	#[inline]
10243	#[target_feature(enable = "avx512fp16")]
10244	#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = `0`, SAE = `8`))]
10245	#[rustc_legacy_const_generics(`3`, `4`)]
10246	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10247	pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10248	k: __mmask8,
10249	a: __m128h,
10250	b: __m128h,
10251	) -> __m128h {
10252	static_assert_uimm_bits!(IMM8, `8`);
10253	static_assert_sae!(SAE);
10254	_mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
10255	}
10256
10257	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10258	/// the results in dst.
10259	///
10260	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10261	#[inline]
10262	#[target_feature(enable = "avx512fp16,avx512vl")]
10263	#[cfg_attr(test, assert_instr(vscalefph))]
10264	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10265	pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10266	_mm_mask_scalef_ph(src:_mm_undefined_ph(), k:`0xff`, a, b)
10267	}
10268
10269	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10270	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10271	///
10272	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10273	#[inline]
10274	#[target_feature(enable = "avx512fp16,avx512vl")]
10275	#[cfg_attr(test, assert_instr(vscalefph))]
10276	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10277	pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10278	unsafe { vscalefph_128(a, b, src, k) }
10279	}
10280
10281	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10282	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10283	///
10284	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10285	#[inline]
10286	#[target_feature(enable = "avx512fp16,avx512vl")]
10287	#[cfg_attr(test, assert_instr(vscalefph))]
10288	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10289	pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10290	_mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10291	}
10292
10293	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10294	/// the results in dst.
10295	///
10296	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10297	#[inline]
10298	#[target_feature(enable = "avx512fp16,avx512vl")]
10299	#[cfg_attr(test, assert_instr(vscalefph))]
10300	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10301	pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10302	_mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:`0xffff`, a, b)
10303	}
10304
10305	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10306	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10307	///
10308	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10309	#[inline]
10310	#[target_feature(enable = "avx512fp16,avx512vl")]
10311	#[cfg_attr(test, assert_instr(vscalefph))]
10312	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10313	pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10314	unsafe { vscalefph_256(a, b, src, k) }
10315	}
10316
10317	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10318	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10319	///
10320	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10321	#[inline]
10322	#[target_feature(enable = "avx512fp16,avx512vl")]
10323	#[cfg_attr(test, assert_instr(vscalefph))]
10324	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10325	pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10326	_mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10327	}
10328
10329	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10330	/// the results in dst.
10331	///
10332	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10333	#[inline]
10334	#[target_feature(enable = "avx512fp16")]
10335	#[cfg_attr(test, assert_instr(vscalefph))]
10336	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10337	pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10338	_mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:`0xffffffff`, a, b)
10339	}
10340
10341	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10342	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10343	///
10344	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10345	#[inline]
10346	#[target_feature(enable = "avx512fp16")]
10347	#[cfg_attr(test, assert_instr(vscalefph))]
10348	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10349	pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10350	_mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10351	}
10352
10353	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10354	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10355	///
10356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10357	#[inline]
10358	#[target_feature(enable = "avx512fp16")]
10359	#[cfg_attr(test, assert_instr(vscalefph))]
10360	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10361	pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10362	_mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10363	}
10364
10365	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10366	/// the results in dst.
10367	///
10368	/// Rounding is done according to the rounding parameter, which can be one of:
10369	///
10370	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10371	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10372	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10373	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10374	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10375	///
10376	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10377	#[inline]
10378	#[target_feature(enable = "avx512fp16")]
10379	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10380	#[rustc_legacy_const_generics(`2`)]
10381	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10382	pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10383	static_assert_rounding!(ROUNDING);
10384	_mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:`0xffffffff`, a, b)
10385	}
10386
10387	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10388	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10389	///
10390	/// Rounding is done according to the rounding parameter, which can be one of:
10391	///
10392	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10393	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10394	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10395	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10396	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10397	///
10398	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10399	#[inline]
10400	#[target_feature(enable = "avx512fp16")]
10401	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10402	#[rustc_legacy_const_generics(`4`)]
10403	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10404	pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10405	src: __m512h,
10406	k: __mmask32,
10407	a: __m512h,
10408	b: __m512h,
10409	) -> __m512h {
10410	unsafe {
10411	static_assert_rounding!(ROUNDING);
10412	vscalefph_512(a, b, src, k, ROUNDING)
10413	}
10414	}
10415
10416	/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10417	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10418	///
10419	/// Rounding is done according to the rounding parameter, which can be one of:
10420	///
10421	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10422	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10423	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10424	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10425	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10426	///
10427	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10428	#[inline]
10429	#[target_feature(enable = "avx512fp16")]
10430	#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = `8`))]
10431	#[rustc_legacy_const_generics(`3`)]
10432	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10433	pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10434	k: __mmask32,
10435	a: __m512h,
10436	b: __m512h,
10437	) -> __m512h {
10438	static_assert_rounding!(ROUNDING);
10439	_mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10440	}
10441
10442	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10443	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10444	/// elements of dst.
10445	///
10446	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10447	#[inline]
10448	#[target_feature(enable = "avx512fp16")]
10449	#[cfg_attr(test, assert_instr(vscalefsh))]
10450	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10451	pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10452	_mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10453	}
10454
10455	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10456	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10457	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10458	///
10459	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10460	#[inline]
10461	#[target_feature(enable = "avx512fp16")]
10462	#[cfg_attr(test, assert_instr(vscalefsh))]
10463	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10464	pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10465	_mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10466	}
10467
10468	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10469	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10470	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10471	///
10472	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10473	#[inline]
10474	#[target_feature(enable = "avx512fp16")]
10475	#[cfg_attr(test, assert_instr(vscalefsh))]
10476	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10477	pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10478	_mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
10479	}
10480
10481	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10482	/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10483	/// elements of dst.
10484	///
10485	/// Rounding is done according to the rounding parameter, which can be one of:
10486	///
10487	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10488	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10489	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10490	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10491	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10492	///
10493	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10494	#[inline]
10495	#[target_feature(enable = "avx512fp16")]
10496	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10497	#[rustc_legacy_const_generics(`2`)]
10498	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10499	pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10500	static_assert_rounding!(ROUNDING);
10501	_mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10502	}
10503
10504	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10505	/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10506	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10507	///
10508	/// Rounding is done according to the rounding parameter, which can be one of:
10509	///
10510	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10511	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10512	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10513	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10514	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10515	///
10516	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10517	#[inline]
10518	#[target_feature(enable = "avx512fp16")]
10519	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10520	#[rustc_legacy_const_generics(`4`)]
10521	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10522	pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10523	src: __m128h,
10524	k: __mmask8,
10525	a: __m128h,
10526	b: __m128h,
10527	) -> __m128h {
10528	unsafe {
10529	static_assert_rounding!(ROUNDING);
10530	vscalefsh(a, b, src, k, ROUNDING)
10531	}
10532	}
10533
10534	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10535	/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10536	/// and copy the upper 7 packed elements from a to the upper elements of dst.
10537	///
10538	/// Rounding is done according to the rounding parameter, which can be one of:
10539	///
10540	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10541	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10542	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10543	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10544	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10545	///
10546	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10547	#[inline]
10548	#[target_feature(enable = "avx512fp16")]
10549	#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = `8`))]
10550	#[rustc_legacy_const_generics(`3`)]
10551	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10552	pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10553	k: __mmask8,
10554	a: __m128h,
10555	b: __m128h,
10556	) -> __m128h {
10557	static_assert_rounding!(ROUNDING);
10558	_mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
10559	}
10560
10561	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10562	/// number of bits specified by imm8, and store the results in dst.
10563	///
10564	/// Rounding is done according to the imm8 parameter, which can be one of:
10565	///
10566	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10567	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10568	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10569	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10570	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10571	///
10572	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10573	#[inline]
10574	#[target_feature(enable = "avx512fp16,avx512vl")]
10575	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10576	#[rustc_legacy_const_generics(`1`)]
10577	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10578	pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10579	static_assert_uimm_bits!(IMM8, `8`);
10580	_mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:`0xff`, a)
10581	}
10582
10583	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10584	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10585	/// from src when the corresponding mask bit is not set).
10586	///
10587	/// Rounding is done according to the imm8 parameter, which can be one of:
10588	///
10589	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10590	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10591	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10592	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10593	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10594	///
10595	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10596	#[inline]
10597	#[target_feature(enable = "avx512fp16,avx512vl")]
10598	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10599	#[rustc_legacy_const_generics(`3`)]
10600	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10601	pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10602	unsafe {
10603	static_assert_uimm_bits!(IMM8, `8`);
10604	vreduceph_128(a, IMM8, src, k)
10605	}
10606	}
10607
10608	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10609	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10610	/// out when the corresponding mask bit is not set).
10611	///
10612	/// Rounding is done according to the imm8 parameter, which can be one of:
10613	///
10614	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10615	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10616	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10617	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10618	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10619	///
10620	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10621	#[inline]
10622	#[target_feature(enable = "avx512fp16,avx512vl")]
10623	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10624	#[rustc_legacy_const_generics(`2`)]
10625	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10626	pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10627	static_assert_uimm_bits!(IMM8, `8`);
10628	_mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10629	}
10630
10631	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10632	/// number of bits specified by imm8, and store the results in dst.
10633	///
10634	/// Rounding is done according to the imm8 parameter, which can be one of:
10635	///
10636	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10637	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10638	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10639	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10640	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10641	///
10642	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10643	#[inline]
10644	#[target_feature(enable = "avx512fp16,avx512vl")]
10645	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10646	#[rustc_legacy_const_generics(`1`)]
10647	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10648	pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10649	static_assert_uimm_bits!(IMM8, `8`);
10650	_mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:`0xffff`, a)
10651	}
10652
10653	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10654	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10655	/// from src when the corresponding mask bit is not set).
10656	///
10657	/// Rounding is done according to the imm8 parameter, which can be one of:
10658	///
10659	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10660	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10661	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10662	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10663	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10664	///
10665	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10666	#[inline]
10667	#[target_feature(enable = "avx512fp16,avx512vl")]
10668	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10669	#[rustc_legacy_const_generics(`3`)]
10670	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10671	pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10672	unsafe {
10673	static_assert_uimm_bits!(IMM8, `8`);
10674	vreduceph_256(a, IMM8, src, k)
10675	}
10676	}
10677
10678	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10679	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10680	/// out when the corresponding mask bit is not set).
10681	///
10682	/// Rounding is done according to the imm8 parameter, which can be one of:
10683	///
10684	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10685	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10686	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10687	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10688	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10689	///
10690	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10691	#[inline]
10692	#[target_feature(enable = "avx512fp16,avx512vl")]
10693	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10694	#[rustc_legacy_const_generics(`2`)]
10695	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10696	pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10697	static_assert_uimm_bits!(IMM8, `8`);
10698	_mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10699	}
10700
10701	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10702	/// number of bits specified by imm8, and store the results in dst.
10703	///
10704	/// Rounding is done according to the imm8 parameter, which can be one of:
10705	///
10706	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10707	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10708	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10709	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10710	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10711	///
10712	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
10713	#[inline]
10714	#[target_feature(enable = "avx512fp16")]
10715	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10716	#[rustc_legacy_const_generics(`1`)]
10717	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10718	pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10719	static_assert_uimm_bits!(IMM8, `8`);
10720	_mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10721	}
10722
10723	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10724	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10725	/// from src when the corresponding mask bit is not set).
10726	///
10727	/// Rounding is done according to the imm8 parameter, which can be one of:
10728	///
10729	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10730	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10731	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10732	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10733	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10734	///
10735	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
10736	#[inline]
10737	#[target_feature(enable = "avx512fp16")]
10738	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10739	#[rustc_legacy_const_generics(`3`)]
10740	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10741	pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
10742	static_assert_uimm_bits!(IMM8, `8`);
10743	_mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10744	}
10745
10746	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10747	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10748	/// out when the corresponding mask bit is not set).
10749	///
10750	/// Rounding is done according to the imm8 parameter, which can be one of:
10751	///
10752	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10753	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10754	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10755	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10756	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10757	///
10758	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
10759	#[inline]
10760	#[target_feature(enable = "avx512fp16")]
10761	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`))]
10762	#[rustc_legacy_const_generics(`2`)]
10763	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10764	pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10765	static_assert_uimm_bits!(IMM8, `8`);
10766	_mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10767	}
10768
10769	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10770	/// number of bits specified by imm8, and store the results in dst.
10771	///
10772	/// Rounding is done according to the imm8 parameter, which can be one of:
10773	///
10774	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10775	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10776	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10777	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10778	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10779	///
10780	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781	///
10782	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
10783	#[inline]
10784	#[target_feature(enable = "avx512fp16")]
10785	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10786	#[rustc_legacy_const_generics(`1`, `2`)]
10787	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10788	pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10789	static_assert_uimm_bits!(IMM8, `8`);
10790	static_assert_sae!(SAE);
10791	_mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:`0xffffffff`, a)
10792	}
10793
10794	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10795	/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10796	/// from src when the corresponding mask bit is not set).
10797	///
10798	/// Rounding is done according to the imm8 parameter, which can be one of:
10799	///
10800	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10801	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10802	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10803	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10804	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10805	///
10806	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10807	///
10808	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
10809	#[inline]
10810	#[target_feature(enable = "avx512fp16")]
10811	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10812	#[rustc_legacy_const_generics(`3`, `4`)]
10813	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10814	pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10815	src: __m512h,
10816	k: __mmask32,
10817	a: __m512h,
10818	) -> __m512h {
10819	unsafe {
10820	static_assert_uimm_bits!(IMM8, `8`);
10821	static_assert_sae!(SAE);
10822	vreduceph_512(a, IMM8, src, k, SAE)
10823	}
10824	}
10825
10826	/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10827	/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10828	/// out when the corresponding mask bit is not set).
10829	///
10830	/// Rounding is done according to the imm8 parameter, which can be one of:
10831	///
10832	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10833	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10834	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10835	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10836	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10837	///
10838	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10839	///
10840	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
10841	#[inline]
10842	#[target_feature(enable = "avx512fp16")]
10843	#[cfg_attr(test, assert_instr(vreduceph, IMM8 = `0`, SAE = `8`))]
10844	#[rustc_legacy_const_generics(`2`, `3`)]
10845	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10846	pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10847	k: __mmask32,
10848	a: __m512h,
10849	) -> __m512h {
10850	static_assert_uimm_bits!(IMM8, `8`);
10851	static_assert_sae!(SAE);
10852	_mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10853	}
10854
10855	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10856	/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
10857	/// upper 7 packed elements from a to the upper elements of dst.
10858	///
10859	/// Rounding is done according to the imm8 parameter, which can be one of:
10860	///
10861	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10862	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10863	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10864	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10865	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10866	///
10867	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
10868	#[inline]
10869	#[target_feature(enable = "avx512fp16")]
10870	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10871	#[rustc_legacy_const_generics(`2`)]
10872	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10873	pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10874	static_assert_uimm_bits!(IMM8, `8`);
10875	_mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10876	}
10877
10878	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10879	/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10880	/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
10881	/// a to the upper elements of dst.
10882	///
10883	/// Rounding is done according to the imm8 parameter, which can be one of:
10884	///
10885	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10886	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10887	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10888	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10889	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10890	///
10891	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
10892	#[inline]
10893	#[target_feature(enable = "avx512fp16")]
10894	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10895	#[rustc_legacy_const_generics(`4`)]
10896	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10897	pub fn _mm_mask_reduce_sh<const IMM8: i32>(
10898	src: __m128h,
10899	k: __mmask8,
10900	a: __m128h,
10901	b: __m128h,
10902	) -> __m128h {
10903	static_assert_uimm_bits!(IMM8, `8`);
10904	_mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10905	}
10906
10907	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10908	/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10909	/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10910	/// to the upper elements of dst.
10911	///
10912	/// Rounding is done according to the imm8 parameter, which can be one of:
10913	///
10914	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10915	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10916	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10917	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10918	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10919	///
10920	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
10921	#[inline]
10922	#[target_feature(enable = "avx512fp16")]
10923	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`))]
10924	#[rustc_legacy_const_generics(`3`)]
10925	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10926	pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10927	static_assert_uimm_bits!(IMM8, `8`);
10928	_mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10929	}
10930
10931	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10932	/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
10933	/// 7 packed elements from a to the upper elements of dst.
10934	///
10935	/// Rounding is done according to the imm8 parameter, which can be one of:
10936	///
10937	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10938	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10939	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10940	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10941	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10942	///
10943	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10944	///
10945	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
10946	#[inline]
10947	#[target_feature(enable = "avx512fp16")]
10948	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
10949	#[rustc_legacy_const_generics(`2`, `3`)]
10950	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10951	pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10952	static_assert_uimm_bits!(IMM8, `8`);
10953	static_assert_sae!(SAE);
10954	_mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
10955	}
10956
10957	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10958	/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10959	/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
10960	/// to the upper elements of dst.
10961	///
10962	/// Rounding is done according to the imm8 parameter, which can be one of:
10963	///
10964	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10965	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10966	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10967	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10968	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10969	///
10970	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10971	///
10972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
10973	#[inline]
10974	#[target_feature(enable = "avx512fp16")]
10975	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
10976	#[rustc_legacy_const_generics(`4`, `5`)]
10977	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10978	pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10979	src: __m128h,
10980	k: __mmask8,
10981	a: __m128h,
10982	b: __m128h,
10983	) -> __m128h {
10984	unsafe {
10985	static_assert_uimm_bits!(IMM8, `8`);
10986	static_assert_sae!(SAE);
10987	vreducesh(a, b, src, k, IMM8, SAE)
10988	}
10989	}
10990
10991	/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10992	/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10993	/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10994	/// to the upper elements of dst.
10995	///
10996	/// Rounding is done according to the imm8 parameter, which can be one of:
10997	///
10998	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10999	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11000	/// * [`_MM_FROUND_TO_POS_INF`] : round up
11001	/// * [`_MM_FROUND_TO_ZERO`] : truncate
11002	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11003	///
11004	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11005	///
11006	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
11007	#[inline]
11008	#[target_feature(enable = "avx512fp16")]
11009	#[cfg_attr(test, assert_instr(vreducesh, IMM8 = `0`, SAE = `8`))]
11010	#[rustc_legacy_const_generics(`3`, `4`)]
11011	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11012	pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11013	k: __mmask8,
11014	a: __m128h,
11015	b: __m128h,
11016	) -> __m128h {
11017	static_assert_uimm_bits!(IMM8, `8`);
11018	static_assert_sae!(SAE);
11019	_mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
11020	}
11021
11022	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11023	/// sum of all elements in a.
11024	///
11025	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11026	#[inline]
11027	#[target_feature(enable = "avx512fp16,avx512vl")]
11028	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11029	pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11030	unsafe {
11031	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11032	let a: __m128h = _mm_add_ph(a, b);
11033	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11034	let a: __m128h = _mm_add_ph(a, b);
11035	simd_extract::<_, f16>(x:a, idx:`0`) + simd_extract::<_, f16>(x:a, idx:`1`)
11036	}
11037	}
11038
11039	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11040	/// sum of all elements in a.
11041	///
11042	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11043	#[inline]
11044	#[target_feature(enable = "avx512fp16,avx512vl")]
11045	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11046	pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11047	unsafe {
11048	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11049	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11050	_mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11051	}
11052	}
11053
11054	/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11055	/// sum of all elements in a.
11056	///
11057	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11058	#[inline]
11059	#[target_feature(enable = "avx512fp16")]
11060	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11061	pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11062	unsafe {
11063	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11064	let q: __m256h = simd_shuffle!(
11065	a,
11066	a,
11067	[
11068	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11069	]
11070	);
11071	_mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11072	}
11073	}
11074
11075	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11076	/// the product of all elements in a.
11077	///
11078	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11079	#[inline]
11080	#[target_feature(enable = "avx512fp16,avx512vl")]
11081	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11082	pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11083	unsafe {
11084	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11085	let a: __m128h = _mm_mul_ph(a, b);
11086	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11087	let a: __m128h = _mm_mul_ph(a, b);
11088	simd_extract::<_, f16>(x:a, idx:`0`) * simd_extract::<_, f16>(x:a, idx:`1`)
11089	}
11090	}
11091
11092	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11093	/// the product of all elements in a.
11094	///
11095	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11096	#[inline]
11097	#[target_feature(enable = "avx512fp16,avx512vl")]
11098	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11099	pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11100	unsafe {
11101	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11102	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11103	_mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11104	}
11105	}
11106
11107	/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11108	/// the product of all elements in a.
11109	///
11110	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11111	#[inline]
11112	#[target_feature(enable = "avx512fp16")]
11113	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11114	pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11115	unsafe {
11116	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11117	let q: __m256h = simd_shuffle!(
11118	a,
11119	a,
11120	[
11121	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11122	]
11123	);
11124	_mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11125	}
11126	}
11127
11128	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11129	/// minimum of all elements in a.
11130	///
11131	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11132	#[inline]
11133	#[target_feature(enable = "avx512fp16,avx512vl")]
11134	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11135	pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11136	unsafe {
11137	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11138	let a: __m128h = _mm_min_ph(a, b);
11139	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11140	let a: __m128h = _mm_min_ph(a, b);
11141	let b: __m128h = simd_shuffle!(a, a, [`1`, `0`, `2`, `3`, `4`, `5`, `6`, `7`]);
11142	simd_extract!(_mm_min_sh(a, b), `0`)
11143	}
11144	}
11145
11146	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11147	/// minimum of all elements in a.
11148	///
11149	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11150	#[inline]
11151	#[target_feature(enable = "avx512fp16,avx512vl")]
11152	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11153	pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11154	unsafe {
11155	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11156	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11157	_mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11158	}
11159	}
11160
11161	/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11162	/// minimum of all elements in a.
11163	///
11164	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11165	#[inline]
11166	#[target_feature(enable = "avx512fp16")]
11167	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11168	pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11169	unsafe {
11170	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11171	let q: __m256h = simd_shuffle!(
11172	a,
11173	a,
11174	[
11175	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11176	]
11177	);
11178	_mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11179	}
11180	}
11181
11182	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11183	/// maximum of all elements in a.
11184	///
11185	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11186	#[inline]
11187	#[target_feature(enable = "avx512fp16,avx512vl")]
11188	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11189	pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11190	unsafe {
11191	let b: __m128h = simd_shuffle!(a, a, [`4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`]);
11192	let a: __m128h = _mm_max_ph(a, b);
11193	let b: __m128h = simd_shuffle!(a, a, [`2`, `3`, `0`, `1`, `4`, `5`, `6`, `7`]);
11194	let a: __m128h = _mm_max_ph(a, b);
11195	let b: __m128h = simd_shuffle!(a, a, [`1`, `0`, `2`, `3`, `4`, `5`, `6`, `7`]);
11196	simd_extract!(_mm_max_sh(a, b), `0`)
11197	}
11198	}
11199
11200	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11201	/// maximum of all elements in a.
11202	///
11203	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11204	#[inline]
11205	#[target_feature(enable = "avx512fp16,avx512vl")]
11206	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11207	pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11208	unsafe {
11209	let p: __m128h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11210	let q: __m128h = simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11211	_mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11212	}
11213	}
11214
11215	/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11216	/// maximum of all elements in a.
11217	///
11218	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11219	#[inline]
11220	#[target_feature(enable = "avx512fp16")]
11221	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11222	pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11223	unsafe {
11224	let p: __m256h = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]);
11225	let q: __m256h = simd_shuffle!(
11226	a,
11227	a,
11228	[
11229	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`
11230	]
11231	);
11232	_mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11233	}
11234	}
11235
11236	macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11237	($mask_type: ty, $reg: ident, $a: expr) => {{
11238	let dst: $mask_type;
11239	asm!(
11240	"vfpclassph {k}, {src}, {imm8}",
11241	k = lateout(kreg) dst,
11242	src = in($reg) $a,
11243	imm8 = const IMM8,
11244	options(pure, nomem, nostack)
11245	);
11246	dst
11247	}};
11248	($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11249	let dst: $mask_type;
11250	asm!(
11251	"vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11252	k = lateout(kreg) dst,
11253	mask = in(kreg) $mask,
11254	src = in($reg) $a,
11255	imm8 = const IMM8,
11256	options(pure, nomem, nostack)
11257	);
11258	dst
11259	}};
11260	}
11261
11262	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11263	/// by imm8, and store the results in mask vector k.
11264	/// imm can be a combination of:
11265	///
11266	/// 0x01 // QNaN
11267	/// 0x02 // Positive Zero
11268	/// 0x04 // Negative Zero
11269	/// 0x08 // Positive Infinity
11270	/// 0x10 // Negative Infinity
11271	/// 0x20 // Denormal
11272	/// 0x40 // Negative
11273	/// 0x80 // SNaN
11274	///
11275	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11276	#[inline]
11277	#[target_feature(enable = "avx512fp16,avx512vl")]
11278	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11279	#[rustc_legacy_const_generics(`1`)]
11280	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11281	pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11282	unsafe {
11283	static_assert_uimm_bits!(IMM8, `8`);
11284	fpclass_asm!(__mmask8, xmm_reg, a)
11285	}
11286	}
11287
11288	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11289	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11290	/// corresponding mask bit is not set).
11291	/// imm can be a combination of:
11292	///
11293	/// 0x01 // QNaN
11294	/// 0x02 // Positive Zero
11295	/// 0x04 // Negative Zero
11296	/// 0x08 // Positive Infinity
11297	/// 0x10 // Negative Infinity
11298	/// 0x20 // Denormal
11299	/// 0x40 // Negative
11300	/// 0x80 // SNaN
11301	///
11302	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11303	#[inline]
11304	#[target_feature(enable = "avx512fp16,avx512vl")]
11305	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11306	#[rustc_legacy_const_generics(`2`)]
11307	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11308	pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11309	unsafe {
11310	static_assert_uimm_bits!(IMM8, `8`);
11311	fpclass_asm!(__mmask8, k1, xmm_reg, a)
11312	}
11313	}
11314
11315	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11316	/// by imm8, and store the results in mask vector k.
11317	/// imm can be a combination of:
11318	///
11319	/// 0x01 // QNaN
11320	/// 0x02 // Positive Zero
11321	/// 0x04 // Negative Zero
11322	/// 0x08 // Positive Infinity
11323	/// 0x10 // Negative Infinity
11324	/// 0x20 // Denormal
11325	/// 0x40 // Negative
11326	/// 0x80 // SNaN
11327	///
11328	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11329	#[inline]
11330	#[target_feature(enable = "avx512fp16,avx512vl")]
11331	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11332	#[rustc_legacy_const_generics(`1`)]
11333	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11334	pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11335	unsafe {
11336	static_assert_uimm_bits!(IMM8, `8`);
11337	fpclass_asm!(__mmask16, ymm_reg, a)
11338	}
11339	}
11340
11341	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11342	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11343	/// corresponding mask bit is not set).
11344	/// imm can be a combination of:
11345	///
11346	/// 0x01 // QNaN
11347	/// 0x02 // Positive Zero
11348	/// 0x04 // Negative Zero
11349	/// 0x08 // Positive Infinity
11350	/// 0x10 // Negative Infinity
11351	/// 0x20 // Denormal
11352	/// 0x40 // Negative
11353	/// 0x80 // SNaN
11354	///
11355	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11356	#[inline]
11357	#[target_feature(enable = "avx512fp16,avx512vl")]
11358	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11359	#[rustc_legacy_const_generics(`2`)]
11360	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11361	pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11362	unsafe {
11363	static_assert_uimm_bits!(IMM8, `8`);
11364	fpclass_asm!(__mmask16, k1, ymm_reg, a)
11365	}
11366	}
11367
11368	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11369	/// by imm8, and store the results in mask vector k.
11370	/// imm can be a combination of:
11371	///
11372	/// 0x01 // QNaN
11373	/// 0x02 // Positive Zero
11374	/// 0x04 // Negative Zero
11375	/// 0x08 // Positive Infinity
11376	/// 0x10 // Negative Infinity
11377	/// 0x20 // Denormal
11378	/// 0x40 // Negative
11379	/// 0x80 // SNaN
11380	///
11381	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11382	#[inline]
11383	#[target_feature(enable = "avx512fp16")]
11384	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11385	#[rustc_legacy_const_generics(`1`)]
11386	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11387	pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11388	unsafe {
11389	static_assert_uimm_bits!(IMM8, `8`);
11390	fpclass_asm!(__mmask32, zmm_reg, a)
11391	}
11392	}
11393
11394	/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11395	/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11396	/// corresponding mask bit is not set).
11397	/// imm can be a combination of:
11398	///
11399	/// 0x01 // QNaN
11400	/// 0x02 // Positive Zero
11401	/// 0x04 // Negative Zero
11402	/// 0x08 // Positive Infinity
11403	/// 0x10 // Negative Infinity
11404	/// 0x20 // Denormal
11405	/// 0x40 // Negative
11406	/// 0x80 // SNaN
11407	///
11408	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11409	#[inline]
11410	#[target_feature(enable = "avx512fp16")]
11411	#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = `0`))]
11412	#[rustc_legacy_const_generics(`2`)]
11413	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11414	pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11415	unsafe {
11416	static_assert_uimm_bits!(IMM8, `8`);
11417	fpclass_asm!(__mmask32, k1, zmm_reg, a)
11418	}
11419	}
11420
11421	/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11422	/// by imm8, and store the result in mask vector k.
11423	/// imm can be a combination of:
11424	///
11425	/// 0x01 // QNaN
11426	/// 0x02 // Positive Zero
11427	/// 0x04 // Negative Zero
11428	/// 0x08 // Positive Infinity
11429	/// 0x10 // Negative Infinity
11430	/// 0x20 // Denormal
11431	/// 0x40 // Negative
11432	/// 0x80 // SNaN
11433	///
11434	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11435	#[inline]
11436	#[target_feature(enable = "avx512fp16")]
11437	#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = `0`))]
11438	#[rustc_legacy_const_generics(`1`)]
11439	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11440	pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11441	_mm_mask_fpclass_sh_mask::<IMM8>(k1:`0xff`, a)
11442	}
11443
11444	/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11445	/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11446	/// corresponding mask bit is not set).
11447	/// imm can be a combination of:
11448	///
11449	/// 0x01 // QNaN
11450	/// 0x02 // Positive Zero
11451	/// 0x04 // Negative Zero
11452	/// 0x08 // Positive Infinity
11453	/// 0x10 // Negative Infinity
11454	/// 0x20 // Denormal
11455	/// 0x40 // Negative
11456	/// 0x80 // SNaN
11457	///
11458	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11459	#[inline]
11460	#[target_feature(enable = "avx512fp16")]
11461	#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = `0`))]
11462	#[rustc_legacy_const_generics(`2`)]
11463	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11464	pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11465	unsafe {
11466	static_assert_uimm_bits!(IMM8, `8`);
11467	vfpclasssh(a, IMM8, k:k1)
11468	}
11469	}
11470
11471	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11472	/// and store the results in dst.
11473	///
11474	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11475	#[inline]
11476	#[target_feature(enable = "avx512fp16,avx512vl")]
11477	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11478	pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11479	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11480	}
11481
11482	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11483	/// and store the results in dst.
11484	///
11485	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11486	#[inline]
11487	#[target_feature(enable = "avx512fp16,avx512vl")]
11488	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11489	pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11490	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11491	}
11492
11493	/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11494	/// and store the results in dst.
11495	///
11496	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11497	#[inline]
11498	#[target_feature(enable = "avx512fp16")]
11499	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11500	pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11501	unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11502	}
11503
11504	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11505	/// and index in idx, and store the results in dst.
11506	///
11507	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11508	#[inline]
11509	#[target_feature(enable = "avx512fp16,avx512vl")]
11510	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11511	pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11512	_mm_castsi128_ph(_mm_permutex2var_epi16(
11513	a:_mm_castph_si128(a),
11514	idx,
11515	b:_mm_castph_si128(b),
11516	))
11517	}
11518
11519	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11520	/// and index in idx, and store the results in dst.
11521	///
11522	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11523	#[inline]
11524	#[target_feature(enable = "avx512fp16,avx512vl")]
11525	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11526	pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11527	_mm256_castsi256_ph(_mm256_permutex2var_epi16(
11528	a:_mm256_castph_si256(a),
11529	idx,
11530	b:_mm256_castph_si256(b),
11531	))
11532	}
11533
11534	/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11535	/// and index in idx, and store the results in dst.
11536	///
11537	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11538	#[inline]
11539	#[target_feature(enable = "avx512fp16")]
11540	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11541	pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11542	_mm512_castsi512_ph(_mm512_permutex2var_epi16(
11543	a:_mm512_castph_si512(a),
11544	idx,
11545	b:_mm512_castph_si512(b),
11546	))
11547	}
11548
11549	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11550	/// and store the results in dst.
11551	///
11552	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11553	#[inline]
11554	#[target_feature(enable = "avx512fp16,avx512vl")]
11555	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11556	pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11557	_mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11558	}
11559
11560	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11561	/// and store the results in dst.
11562	///
11563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11564	#[inline]
11565	#[target_feature(enable = "avx512fp16,avx512vl")]
11566	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11567	pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11568	_mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11569	}
11570
11571	/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11572	/// and store the results in dst.
11573	///
11574	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11575	#[inline]
11576	#[target_feature(enable = "avx512fp16")]
11577	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11578	pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11579	_mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11580	}
11581
11582	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11583	/// and store the results in dst.
11584	///
11585	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11586	#[inline]
11587	#[target_feature(enable = "avx512fp16,avx512vl")]
11588	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11589	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11590	pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11591	unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11592	}
11593
11594	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11595	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11596	/// mask bit is not set).
11597	///
11598	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11599	#[inline]
11600	#[target_feature(enable = "avx512fp16,avx512vl")]
11601	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11602	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11603	pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11604	unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11605	}
11606
11607	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11608	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11609	///
11610	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11611	#[inline]
11612	#[target_feature(enable = "avx512fp16,avx512vl")]
11613	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11614	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11615	pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11616	_mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11617	}
11618
11619	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11620	/// and store the results in dst.
11621	///
11622	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11623	#[inline]
11624	#[target_feature(enable = "avx512fp16,avx512vl")]
11625	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11626	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11627	pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11628	unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11629	}
11630
11631	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11632	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11633	/// mask bit is not set).
11634	///
11635	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11636	#[inline]
11637	#[target_feature(enable = "avx512fp16,avx512vl")]
11638	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11639	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11640	pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11641	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11642	}
11643
11644	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11645	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11646	///
11647	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11648	#[inline]
11649	#[target_feature(enable = "avx512fp16,avx512vl")]
11650	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11651	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11652	pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11653	_mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11654	}
11655
11656	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11657	/// and store the results in dst.
11658	///
11659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11660	#[inline]
11661	#[target_feature(enable = "avx512fp16")]
11662	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11663	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11664	pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11665	unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11666	}
11667
11668	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11669	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11670	/// mask bit is not set).
11671	///
11672	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11673	#[inline]
11674	#[target_feature(enable = "avx512fp16")]
11675	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11676	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11677	pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11678	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11679	}
11680
11681	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11682	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11683	///
11684	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11685	#[inline]
11686	#[target_feature(enable = "avx512fp16")]
11687	#[cfg_attr(test, assert_instr(vcvtw2ph))]
11688	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11689	pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11690	_mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11691	}
11692
11693	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11694	/// and store the results in dst.
11695	///
11696	/// Rounding is done according to the rounding parameter, which can be one of:
11697	///
11698	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11699	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11700	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11701	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11702	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11703	///
11704	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
11705	#[inline]
11706	#[target_feature(enable = "avx512fp16")]
11707	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11708	#[rustc_legacy_const_generics(`1`)]
11709	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11710	pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11711	unsafe {
11712	static_assert_rounding!(ROUNDING);
11713	vcvtw2ph_512(a.as_i16x32(), ROUNDING)
11714	}
11715	}
11716
11717	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11718	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11719	/// mask bit is not set).
11720	///
11721	/// Rounding is done according to the rounding parameter, which can be one of:
11722	///
11723	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11724	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11725	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11726	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11727	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11728	///
11729	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
11730	#[inline]
11731	#[target_feature(enable = "avx512fp16")]
11732	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11733	#[rustc_legacy_const_generics(`3`)]
11734	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11735	pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
11736	src: __m512h,
11737	k: __mmask32,
11738	a: __m512i,
11739	) -> __m512h {
11740	unsafe {
11741	static_assert_rounding!(ROUNDING);
11742	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
11743	}
11744	}
11745
11746	/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11747	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11748	///
11749	/// Rounding is done according to the rounding parameter, which can be one of:
11750	///
11751	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11752	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11753	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11754	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11755	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11756	///
11757	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
11758	#[inline]
11759	#[target_feature(enable = "avx512fp16")]
11760	#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = `8`))]
11761	#[rustc_legacy_const_generics(`2`)]
11762	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11763	pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11764	static_assert_rounding!(ROUNDING);
11765	_mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11766	}
11767
11768	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11769	/// and store the results in dst.
11770	///
11771	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
11772	#[inline]
11773	#[target_feature(enable = "avx512fp16,avx512vl")]
11774	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11775	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11776	pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
11777	unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
11778	}
11779
11780	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11781	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11782	/// mask bit is not set).
11783	///
11784	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
11785	#[inline]
11786	#[target_feature(enable = "avx512fp16,avx512vl")]
11787	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11788	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11789	pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11790	unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
11791	}
11792
11793	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11794	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11795	///
11796	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
11797	#[inline]
11798	#[target_feature(enable = "avx512fp16,avx512vl")]
11799	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11800	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11801	pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
11802	_mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
11803	}
11804
11805	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11806	/// and store the results in dst.
11807	///
11808	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
11809	#[inline]
11810	#[target_feature(enable = "avx512fp16,avx512vl")]
11811	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11812	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11813	pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
11814	unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
11815	}
11816
11817	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11818	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11819	/// mask bit is not set).
11820	///
11821	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
11822	#[inline]
11823	#[target_feature(enable = "avx512fp16,avx512vl")]
11824	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11825	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11826	pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11827	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
11828	}
11829
11830	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11831	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11832	///
11833	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
11834	#[inline]
11835	#[target_feature(enable = "avx512fp16,avx512vl")]
11836	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11837	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11838	pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
11839	_mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
11840	}
11841
11842	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11843	/// and store the results in dst.
11844	///
11845	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
11846	#[inline]
11847	#[target_feature(enable = "avx512fp16")]
11848	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11849	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11850	pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
11851	unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
11852	}
11853
11854	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11855	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11856	/// mask bit is not set).
11857	///
11858	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
11859	#[inline]
11860	#[target_feature(enable = "avx512fp16")]
11861	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11862	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11863	pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11864	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
11865	}
11866
11867	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11868	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11869	///
11870	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
11871	#[inline]
11872	#[target_feature(enable = "avx512fp16")]
11873	#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11874	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11875	pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
11876	_mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
11877	}
11878
11879	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11880	/// and store the results in dst.
11881	///
11882	/// Rounding is done according to the rounding parameter, which can be one of:
11883	///
11884	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11885	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11886	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11887	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11888	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11889	///
11890	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
11891	#[inline]
11892	#[target_feature(enable = "avx512fp16")]
11893	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11894	#[rustc_legacy_const_generics(`1`)]
11895	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11896	pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11897	unsafe {
11898	static_assert_rounding!(ROUNDING);
11899	vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
11900	}
11901	}
11902
11903	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11904	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11905	/// mask bit is not set).
11906	///
11907	/// Rounding is done according to the rounding parameter, which can be one of:
11908	///
11909	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11910	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11911	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11912	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11913	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11914	///
11915	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
11916	#[inline]
11917	#[target_feature(enable = "avx512fp16")]
11918	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11919	#[rustc_legacy_const_generics(`3`)]
11920	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11921	pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
11922	src: __m512h,
11923	k: __mmask32,
11924	a: __m512i,
11925	) -> __m512h {
11926	unsafe {
11927	static_assert_rounding!(ROUNDING);
11928	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
11929	}
11930	}
11931
11932	/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11933	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11934	///
11935	/// Rounding is done according to the rounding parameter, which can be one of:
11936	///
11937	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11938	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11939	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11940	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11941	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11942	///
11943	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
11944	#[inline]
11945	#[target_feature(enable = "avx512fp16")]
11946	#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = `8`))]
11947	#[rustc_legacy_const_generics(`2`)]
11948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11949	pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11950	static_assert_rounding!(ROUNDING);
11951	_mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11952	}
11953
11954	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11955	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
11956	///
11957	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
11958	#[inline]
11959	#[target_feature(enable = "avx512fp16,avx512vl")]
11960	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11961	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11962	pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
11963	_mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:`0xff`, a)
11964	}
11965
11966	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11967	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11968	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
11969	///
11970	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
11971	#[inline]
11972	#[target_feature(enable = "avx512fp16,avx512vl")]
11973	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11974	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11975	pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11976	unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
11977	}
11978
11979	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11980	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11981	/// The upper 64 bits of dst are zeroed out.
11982	///
11983	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
11984	#[inline]
11985	#[target_feature(enable = "avx512fp16,avx512vl")]
11986	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11987	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11988	pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
11989	_mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
11990	}
11991
11992	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11993	/// and store the results in dst.
11994	///
11995	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
11996	#[inline]
11997	#[target_feature(enable = "avx512fp16,avx512vl")]
11998	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11999	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12000	pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
12001	unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
12002	}
12003
12004	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12005	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12006	/// mask bit is not set).
12007	///
12008	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
12009	#[inline]
12010	#[target_feature(enable = "avx512fp16,avx512vl")]
12011	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12013	pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12014	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12015	}
12016
12017	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12018	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12019	///
12020	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12021	#[inline]
12022	#[target_feature(enable = "avx512fp16,avx512vl")]
12023	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12024	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12025	pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12026	_mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12027	}
12028
12029	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12030	/// and store the results in dst.
12031	///
12032	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12033	#[inline]
12034	#[target_feature(enable = "avx512fp16")]
12035	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12036	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12037	pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12038	unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12039	}
12040
12041	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12042	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12043	/// mask bit is not set).
12044	///
12045	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12046	#[inline]
12047	#[target_feature(enable = "avx512fp16")]
12048	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12049	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12050	pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12051	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12052	}
12053
12054	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12055	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12056	///
12057	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12058	#[inline]
12059	#[target_feature(enable = "avx512fp16")]
12060	#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12061	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12062	pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12063	_mm512_mask_cvtepi32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12064	}
12065
12066	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12067	/// and store the results in dst.
12068	///
12069	/// Rounding is done according to the rounding parameter, which can be one of:
12070	///
12071	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12072	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12073	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12074	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12075	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12076	///
12077	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12078	#[inline]
12079	#[target_feature(enable = "avx512fp16")]
12080	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12081	#[rustc_legacy_const_generics(`1`)]
12082	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12083	pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12084	unsafe {
12085	static_assert_rounding!(ROUNDING);
12086	vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12087	}
12088	}
12089
12090	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12091	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12092	/// mask bit is not set).
12093	///
12094	/// Rounding is done according to the rounding parameter, which can be one of:
12095	///
12096	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12097	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12098	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12099	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12100	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12101	///
12102	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12103	#[inline]
12104	#[target_feature(enable = "avx512fp16")]
12105	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12106	#[rustc_legacy_const_generics(`3`)]
12107	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12108	pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12109	src: __m256h,
12110	k: __mmask16,
12111	a: __m512i,
12112	) -> __m256h {
12113	unsafe {
12114	static_assert_rounding!(ROUNDING);
12115	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12116	}
12117	}
12118
12119	/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12120	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12121	///
12122	/// Rounding is done according to the rounding parameter, which can be one of:
12123	///
12124	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12125	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12126	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12127	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12128	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12129	///
12130	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12131	#[inline]
12132	#[target_feature(enable = "avx512fp16")]
12133	#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = `8`))]
12134	#[rustc_legacy_const_generics(`2`)]
12135	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12136	pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12137	static_assert_rounding!(ROUNDING);
12138	_mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12139	}
12140
12141	/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12142	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12143	/// of dst.
12144	///
12145	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12146	#[inline]
12147	#[target_feature(enable = "avx512fp16")]
12148	#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12149	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12150	pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12151	unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12152	}
12153
12154	/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12155	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12156	/// of dst.
12157	///
12158	/// Rounding is done according to the rounding parameter, which can be one of:
12159	///
12160	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12161	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12162	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12163	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12164	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12165	///
12166	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12167	#[inline]
12168	#[target_feature(enable = "avx512fp16")]
12169	#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = `8`))]
12170	#[rustc_legacy_const_generics(`2`)]
12171	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12172	pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12173	unsafe {
12174	static_assert_rounding!(ROUNDING);
12175	vcvtsi2sh(a, b, ROUNDING)
12176	}
12177	}
12178
12179	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12180	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12181	///
12182	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12183	#[inline]
12184	#[target_feature(enable = "avx512fp16,avx512vl")]
12185	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12186	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12187	pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12188	_mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12189	}
12190
12191	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12192	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12193	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12194	///
12195	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12196	#[inline]
12197	#[target_feature(enable = "avx512fp16,avx512vl")]
12198	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12199	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12200	pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12201	unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12202	}
12203
12204	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12205	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12206	/// The upper 64 bits of dst are zeroed out.
12207	///
12208	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12209	#[inline]
12210	#[target_feature(enable = "avx512fp16,avx512vl")]
12211	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12212	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12213	pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12214	_mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12215	}
12216
12217	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12218	/// and store the results in dst.
12219	///
12220	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12221	#[inline]
12222	#[target_feature(enable = "avx512fp16,avx512vl")]
12223	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12224	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12225	pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12226	unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12227	}
12228
12229	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12230	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12231	/// mask bit is not set).
12232	///
12233	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12234	#[inline]
12235	#[target_feature(enable = "avx512fp16,avx512vl")]
12236	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12237	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12238	pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12239	unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12240	}
12241
12242	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12243	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12244	///
12245	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12246	#[inline]
12247	#[target_feature(enable = "avx512fp16,avx512vl")]
12248	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12249	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12250	pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12251	_mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12252	}
12253
12254	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12255	/// and store the results in dst.
12256	///
12257	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12258	#[inline]
12259	#[target_feature(enable = "avx512fp16")]
12260	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12261	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12262	pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12263	unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12264	}
12265
12266	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12267	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12268	/// mask bit is not set).
12269	///
12270	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12271	#[inline]
12272	#[target_feature(enable = "avx512fp16")]
12273	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12274	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12275	pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12276	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12277	}
12278
12279	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12280	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12281	///
12282	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12283	#[inline]
12284	#[target_feature(enable = "avx512fp16")]
12285	#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12286	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12287	pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12288	_mm512_mask_cvtepu32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12289	}
12290
12291	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12292	/// and store the results in dst.
12293	///
12294	/// Rounding is done according to the rounding parameter, which can be one of:
12295	///
12296	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12297	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12298	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12299	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12300	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12301	///
12302	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12303	#[inline]
12304	#[target_feature(enable = "avx512fp16")]
12305	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12306	#[rustc_legacy_const_generics(`1`)]
12307	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12308	pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12309	unsafe {
12310	static_assert_rounding!(ROUNDING);
12311	vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12312	}
12313	}
12314
12315	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12316	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12317	/// mask bit is not set).
12318	///
12319	/// Rounding is done according to the rounding parameter, which can be one of:
12320	///
12321	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12322	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12323	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12324	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12325	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12326	///
12327	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12328	#[inline]
12329	#[target_feature(enable = "avx512fp16")]
12330	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12331	#[rustc_legacy_const_generics(`3`)]
12332	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12333	pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12334	src: __m256h,
12335	k: __mmask16,
12336	a: __m512i,
12337	) -> __m256h {
12338	unsafe {
12339	static_assert_rounding!(ROUNDING);
12340	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12341	}
12342	}
12343
12344	/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12345	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12346	///
12347	/// Rounding is done according to the rounding parameter, which can be one of:
12348	///
12349	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12350	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12351	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12352	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12353	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12354	///
12355	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12356	#[inline]
12357	#[target_feature(enable = "avx512fp16")]
12358	#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = `8`))]
12359	#[rustc_legacy_const_generics(`2`)]
12360	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12361	pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12362	static_assert_rounding!(ROUNDING);
12363	_mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12364	}
12365
12366	/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12367	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12368	/// of dst.
12369	///
12370	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12371	#[inline]
12372	#[target_feature(enable = "avx512fp16")]
12373	#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12374	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12375	pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12376	unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12377	}
12378
12379	/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12380	/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12381	/// of dst.
12382	///
12383	/// Rounding is done according to the rounding parameter, which can be one of:
12384	///
12385	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12386	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12387	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12388	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12389	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12390	///
12391	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12392	#[inline]
12393	#[target_feature(enable = "avx512fp16")]
12394	#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = `8`))]
12395	#[rustc_legacy_const_generics(`2`)]
12396	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12397	pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12398	unsafe {
12399	static_assert_rounding!(ROUNDING);
12400	vcvtusi2sh(a, b, ROUNDING)
12401	}
12402	}
12403
12404	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12405	/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12406	///
12407	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12408	#[inline]
12409	#[target_feature(enable = "avx512fp16,avx512vl")]
12410	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12411	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12412	pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12413	_mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12414	}
12415
12416	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12417	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12418	/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12419	///
12420	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12421	#[inline]
12422	#[target_feature(enable = "avx512fp16,avx512vl")]
12423	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12424	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12425	pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12426	unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12427	}
12428
12429	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12430	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12431	/// The upper 96 bits of dst are zeroed out.
12432	///
12433	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12434	#[inline]
12435	#[target_feature(enable = "avx512fp16,avx512vl")]
12436	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12437	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12438	pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12439	_mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12440	}
12441
12442	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12443	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12444	///
12445	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12446	#[inline]
12447	#[target_feature(enable = "avx512fp16,avx512vl")]
12448	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12449	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12450	pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12451	_mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12452	}
12453
12454	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12455	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12456	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12457	///
12458	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12459	#[inline]
12460	#[target_feature(enable = "avx512fp16,avx512vl")]
12461	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12462	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12463	pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12464	unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12465	}
12466
12467	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12468	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12469	/// The upper 64 bits of dst are zeroed out.
12470	///
12471	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12472	#[inline]
12473	#[target_feature(enable = "avx512fp16,avx512vl")]
12474	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12475	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12476	pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12477	_mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12478	}
12479
12480	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12481	/// and store the results in dst.
12482	///
12483	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12484	#[inline]
12485	#[target_feature(enable = "avx512fp16")]
12486	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12487	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12488	pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12489	unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12490	}
12491
12492	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12493	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12494	/// mask bit is not set).
12495	///
12496	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12497	#[inline]
12498	#[target_feature(enable = "avx512fp16")]
12499	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12500	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12501	pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12502	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12503	}
12504
12505	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12506	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12507	///
12508	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12509	#[inline]
12510	#[target_feature(enable = "avx512fp16")]
12511	#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12512	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12513	pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12514	_mm512_mask_cvtepi64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12515	}
12516
12517	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12518	/// and store the results in dst.
12519	///
12520	/// Rounding is done according to the rounding parameter, which can be one of:
12521	///
12522	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12523	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12524	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12525	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12526	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12527	///
12528	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12529	#[inline]
12530	#[target_feature(enable = "avx512fp16")]
12531	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12532	#[rustc_legacy_const_generics(`1`)]
12533	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12534	pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12535	unsafe {
12536	static_assert_rounding!(ROUNDING);
12537	vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12538	}
12539	}
12540
12541	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12542	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12543	/// mask bit is not set).
12544	///
12545	/// Rounding is done according to the rounding parameter, which can be one of:
12546	///
12547	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12548	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12549	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12550	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12551	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12552	///
12553	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12554	#[inline]
12555	#[target_feature(enable = "avx512fp16")]
12556	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12557	#[rustc_legacy_const_generics(`3`)]
12558	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12559	pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12560	src: __m128h,
12561	k: __mmask8,
12562	a: __m512i,
12563	) -> __m128h {
12564	unsafe {
12565	static_assert_rounding!(ROUNDING);
12566	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12567	}
12568	}
12569
12570	/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12571	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12572	///
12573	/// Rounding is done according to the rounding parameter, which can be one of:
12574	///
12575	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12576	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12577	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12578	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12579	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12580	///
12581	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12582	#[inline]
12583	#[target_feature(enable = "avx512fp16")]
12584	#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = `8`))]
12585	#[rustc_legacy_const_generics(`2`)]
12586	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12587	pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12588	static_assert_rounding!(ROUNDING);
12589	_mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12590	}
12591
12592	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12593	/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12594	///
12595	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12596	#[inline]
12597	#[target_feature(enable = "avx512fp16,avx512vl")]
12598	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12599	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12600	pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12601	_mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12602	}
12603
12604	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12605	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12606	/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12607	///
12608	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12609	#[inline]
12610	#[target_feature(enable = "avx512fp16,avx512vl")]
12611	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12612	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12613	pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12614	unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12615	}
12616
12617	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12618	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12619	/// The upper 96 bits of dst are zeroed out.
12620	///
12621	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12622	#[inline]
12623	#[target_feature(enable = "avx512fp16,avx512vl")]
12624	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12625	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12626	pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12627	_mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12628	}
12629
12630	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12631	/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12632	///
12633	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12634	#[inline]
12635	#[target_feature(enable = "avx512fp16,avx512vl")]
12636	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12637	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12638	pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12639	_mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12640	}
12641
12642	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12643	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12644	/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12645	///
12646	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12647	#[inline]
12648	#[target_feature(enable = "avx512fp16,avx512vl")]
12649	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12650	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12651	pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12652	unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12653	}
12654
12655	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12656	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12657	/// The upper 64 bits of dst are zeroed out.
12658	///
12659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12660	#[inline]
12661	#[target_feature(enable = "avx512fp16,avx512vl")]
12662	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12663	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12664	pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12665	_mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12666	}
12667
12668	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12669	/// and store the results in dst.
12670	///
12671	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12672	#[inline]
12673	#[target_feature(enable = "avx512fp16")]
12674	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12675	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12676	pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12677	unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12678	}
12679
12680	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12681	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12682	/// mask bit is not set).
12683	///
12684	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12685	#[inline]
12686	#[target_feature(enable = "avx512fp16")]
12687	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12688	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12689	pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12690	unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12691	}
12692
12693	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12694	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12695	///
12696	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
12697	#[inline]
12698	#[target_feature(enable = "avx512fp16")]
12699	#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12700	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12701	pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
12702	_mm512_mask_cvtepu64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12703	}
12704
12705	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12706	/// and store the results in dst.
12707	///
12708	/// Rounding is done according to the rounding parameter, which can be one of:
12709	///
12710	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12711	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12712	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12713	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12714	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12715	///
12716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
12717	#[inline]
12718	#[target_feature(enable = "avx512fp16")]
12719	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12720	#[rustc_legacy_const_generics(`1`)]
12721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12722	pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12723	unsafe {
12724	static_assert_rounding!(ROUNDING);
12725	vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
12726	}
12727	}
12728
12729	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12730	/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12731	/// mask bit is not set).
12732	///
12733	/// Rounding is done according to the rounding parameter, which can be one of:
12734	///
12735	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12736	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12737	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12738	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12739	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12740	///
12741	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
12742	#[inline]
12743	#[target_feature(enable = "avx512fp16")]
12744	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12745	#[rustc_legacy_const_generics(`3`)]
12746	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12747	pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
12748	src: __m128h,
12749	k: __mmask8,
12750	a: __m512i,
12751	) -> __m128h {
12752	unsafe {
12753	static_assert_rounding!(ROUNDING);
12754	simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
12755	}
12756	}
12757
12758	/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12759	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12760	///
12761	/// Rounding is done according to the rounding parameter, which can be one of:
12762	///
12763	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12764	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12765	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12766	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12767	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12768	///
12769	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
12770	#[inline]
12771	#[target_feature(enable = "avx512fp16")]
12772	#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = `8`))]
12773	#[rustc_legacy_const_generics(`2`)]
12774	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12775	pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12776	static_assert_rounding!(ROUNDING);
12777	_mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12778	}
12779
12780	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12781	/// floating-point elements, and store the results in dst.
12782	///
12783	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
12784	#[inline]
12785	#[target_feature(enable = "avx512fp16,avx512vl")]
12786	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12787	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12788	pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
12789	_mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12790	}
12791
12792	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12793	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12794	/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12795	///
12796	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
12797	#[inline]
12798	#[target_feature(enable = "avx512fp16,avx512vl")]
12799	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12800	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12801	pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
12802	unsafe { vcvtps2phx_128(a, src, k) }
12803	}
12804
12805	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12806	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12807	/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12808	///
12809	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
12810	#[inline]
12811	#[target_feature(enable = "avx512fp16,avx512vl")]
12812	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12813	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12814	pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
12815	_mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12816	}
12817
12818	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12819	/// floating-point elements, and store the results in dst.
12820	///
12821	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
12822	#[inline]
12823	#[target_feature(enable = "avx512fp16,avx512vl")]
12824	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12825	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12826	pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
12827	_mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:`0xff`, a)
12828	}
12829
12830	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12831	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12832	/// when the corresponding mask bit is not set).
12833	///
12834	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
12835	#[inline]
12836	#[target_feature(enable = "avx512fp16,avx512vl")]
12837	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12838	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12839	pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
12840	unsafe { vcvtps2phx_256(a, src, k) }
12841	}
12842
12843	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12844	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12845	/// corresponding mask bit is not set).
12846	///
12847	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
12848	#[inline]
12849	#[target_feature(enable = "avx512fp16,avx512vl")]
12850	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12851	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12852	pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
12853	_mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12854	}
12855
12856	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12857	/// floating-point elements, and store the results in dst.
12858	///
12859	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
12860	#[inline]
12861	#[target_feature(enable = "avx512fp16")]
12862	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12863	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12864	pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
12865	_mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k:`0xffff`, a)
12866	}
12867
12868	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12869	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12870	/// when the corresponding mask bit is not set).
12871	///
12872	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
12873	#[inline]
12874	#[target_feature(enable = "avx512fp16")]
12875	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12876	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12877	pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
12878	unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
12879	}
12880
12881	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12882	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12883	/// corresponding mask bit is not set).
12884	///
12885	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
12886	#[inline]
12887	#[target_feature(enable = "avx512fp16")]
12888	#[cfg_attr(test, assert_instr(vcvtps2phx))]
12889	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12890	pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
12891	_mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k, a)
12892	}
12893
12894	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12895	/// floating-point elements, and store the results in dst.
12896	///
12897	/// Rounding is done according to the rounding parameter, which can be one of:
12898	///
12899	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12900	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12901	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12902	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12903	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12904	///
12905	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
12906	#[inline]
12907	#[target_feature(enable = "avx512fp16")]
12908	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12909	#[rustc_legacy_const_generics(`1`)]
12910	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12911	pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
12912	static_assert_rounding!(ROUNDING);
12913	_mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k:`0xffff`, a)
12914	}
12915
12916	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12917	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12918	/// when the corresponding mask bit is not set).
12919	///
12920	/// Rounding is done according to the rounding parameter, which can be one of:
12921	///
12922	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12923	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12924	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12925	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12926	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12927	///
12928	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
12929	#[inline]
12930	#[target_feature(enable = "avx512fp16")]
12931	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12932	#[rustc_legacy_const_generics(`3`)]
12933	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12934	pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
12935	src: __m256h,
12936	k: __mmask16,
12937	a: __m512,
12938	) -> __m256h {
12939	unsafe {
12940	static_assert_rounding!(ROUNDING);
12941	vcvtps2phx_512(a, src, k, ROUNDING)
12942	}
12943	}
12944
12945	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12946	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12947	/// corresponding mask bit is not set).
12948	///
12949	/// Rounding is done according to the rounding parameter, which can be one of:
12950	///
12951	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12952	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12953	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12954	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12955	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12956	///
12957	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
12958	#[inline]
12959	#[target_feature(enable = "avx512fp16")]
12960	#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = `8`))]
12961	#[rustc_legacy_const_generics(`2`)]
12962	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12963	pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
12964	static_assert_rounding!(ROUNDING);
12965	_mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12966	}
12967
12968	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12969	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12970	/// elements from a to the upper elements of dst.
12971	///
12972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
12973	#[inline]
12974	#[target_feature(enable = "avx512fp16")]
12975	#[cfg_attr(test, assert_instr(vcvtss2sh))]
12976	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12977	pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
12978	_mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
12979	}
12980
12981	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12982	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
12983	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
12984	/// upper elements of dst.
12985	///
12986	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
12987	#[inline]
12988	#[target_feature(enable = "avx512fp16")]
12989	#[cfg_attr(test, assert_instr(vcvtss2sh))]
12990	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12991	pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12992	unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
12993	}
12994
12995	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12996	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
12997	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
12998	/// elements of dst.
12999	///
13000	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
13001	#[inline]
13002	#[target_feature(enable = "avx512fp16")]
13003	#[cfg_attr(test, assert_instr(vcvtss2sh))]
13004	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13005	pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13006	_mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13007	}
13008
13009	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13010	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13011	/// elements from a to the upper elements of dst.
13012	///
13013	/// Rounding is done according to the rounding parameter, which can be one of:
13014	///
13015	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13016	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13017	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13018	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13019	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13020	///
13021	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13022	#[inline]
13023	#[target_feature(enable = "avx512fp16")]
13024	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13025	#[rustc_legacy_const_generics(`2`)]
13026	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13027	pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13028	static_assert_rounding!(ROUNDING);
13029	_mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
13030	}
13031
13032	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13033	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13034	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13035	/// upper elements of dst.
13036	///
13037	/// Rounding is done according to the rounding parameter, which can be one of:
13038	///
13039	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13040	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13041	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13042	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13043	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13044	///
13045	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13046	#[inline]
13047	#[target_feature(enable = "avx512fp16")]
13048	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13049	#[rustc_legacy_const_generics(`4`)]
13050	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13051	pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13052	src: __m128h,
13053	k: __mmask8,
13054	a: __m128h,
13055	b: __m128,
13056	) -> __m128h {
13057	unsafe {
13058	static_assert_rounding!(ROUNDING);
13059	vcvtss2sh(a, b, src, k, ROUNDING)
13060	}
13061	}
13062
13063	/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13064	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13065	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13066	/// elements of dst.
13067	///
13068	/// Rounding is done according to the rounding parameter, which can be one of:
13069	///
13070	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13071	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13072	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13073	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13074	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13075	///
13076	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13077	#[inline]
13078	#[target_feature(enable = "avx512fp16")]
13079	#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = `8`))]
13080	#[rustc_legacy_const_generics(`3`)]
13081	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13082	pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13083	k: __mmask8,
13084	a: __m128h,
13085	b: __m128,
13086	) -> __m128h {
13087	static_assert_rounding!(ROUNDING);
13088	_mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13089	}
13090
13091	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13092	/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13093	///
13094	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13095	#[inline]
13096	#[target_feature(enable = "avx512fp16,avx512vl")]
13097	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13098	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13099	pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13100	_mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:`0xff`, a)
13101	}
13102
13103	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13104	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13105	/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13106	///
13107	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13108	#[inline]
13109	#[target_feature(enable = "avx512fp16,avx512vl")]
13110	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13111	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13112	pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13113	unsafe { vcvtpd2ph_128(a, src, k) }
13114	}
13115
13116	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13117	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13118	/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13119	///
13120	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13121	#[inline]
13122	#[target_feature(enable = "avx512fp16,avx512vl")]
13123	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13124	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13125	pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13126	_mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13127	}
13128
13129	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13130	/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13131	///
13132	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13133	#[inline]
13134	#[target_feature(enable = "avx512fp16,avx512vl")]
13135	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13136	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13137	pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13138	_mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:`0xff`, a)
13139	}
13140
13141	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13142	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13143	/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13144	///
13145	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13146	#[inline]
13147	#[target_feature(enable = "avx512fp16,avx512vl")]
13148	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13149	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13150	pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13151	unsafe { vcvtpd2ph_256(a, src, k) }
13152	}
13153
13154	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13155	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13156	/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13157	///
13158	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13159	#[inline]
13160	#[target_feature(enable = "avx512fp16,avx512vl")]
13161	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13162	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13163	pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13164	_mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13165	}
13166
13167	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13168	/// floating-point elements, and store the results in dst.
13169	///
13170	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13171	#[inline]
13172	#[target_feature(enable = "avx512fp16")]
13173	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13174	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13175	pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13176	_mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k:`0xff`, a)
13177	}
13178
13179	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13180	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13181	/// when the corresponding mask bit is not set).
13182	///
13183	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13184	#[inline]
13185	#[target_feature(enable = "avx512fp16")]
13186	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13187	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13188	pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13189	unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13190	}
13191
13192	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13193	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13194	/// corresponding mask bit is not set).
13195	///
13196	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13197	#[inline]
13198	#[target_feature(enable = "avx512fp16")]
13199	#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13200	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13201	pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13202	_mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k, a)
13203	}
13204
13205	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13206	/// floating-point elements, and store the results in dst.
13207	///
13208	/// Rounding is done according to the rounding parameter, which can be one of:
13209	///
13210	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13211	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13212	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13213	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13214	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13215	///
13216	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13217	#[inline]
13218	#[target_feature(enable = "avx512fp16")]
13219	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13220	#[rustc_legacy_const_generics(`1`)]
13221	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13222	pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13223	static_assert_rounding!(ROUNDING);
13224	_mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a)
13225	}
13226
13227	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13228	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13229	/// when the corresponding mask bit is not set).
13230	///
13231	/// Rounding is done according to the rounding parameter, which can be one of:
13232	///
13233	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13234	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13235	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13236	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13237	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13238	///
13239	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13240	#[inline]
13241	#[target_feature(enable = "avx512fp16")]
13242	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13243	#[rustc_legacy_const_generics(`3`)]
13244	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13245	pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13246	src: __m128h,
13247	k: __mmask8,
13248	a: __m512d,
13249	) -> __m128h {
13250	unsafe {
13251	static_assert_rounding!(ROUNDING);
13252	vcvtpd2ph_512(a, src, k, ROUNDING)
13253	}
13254	}
13255
13256	/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13257	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13258	/// corresponding mask bit is not set).
13259	///
13260	/// Rounding is done according to the rounding parameter, which can be one of:
13261	///
13262	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13263	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13264	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13265	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13266	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13267	///
13268	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13269	#[inline]
13270	#[target_feature(enable = "avx512fp16")]
13271	#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = `8`))]
13272	#[rustc_legacy_const_generics(`2`)]
13273	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13274	pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13275	static_assert_rounding!(ROUNDING);
13276	_mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
13277	}
13278
13279	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13280	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13281	/// elements from a to the upper elements of dst.
13282	///
13283	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13284	#[inline]
13285	#[target_feature(enable = "avx512fp16")]
13286	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13287	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13288	pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13289	_mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
13290	}
13291
13292	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13293	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13294	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13295	/// upper elements of dst.
13296	///
13297	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13298	#[inline]
13299	#[target_feature(enable = "avx512fp16")]
13300	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13301	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13302	pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13303	unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13304	}
13305
13306	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13307	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13308	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13309	/// elements of dst.
13310	///
13311	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13312	#[inline]
13313	#[target_feature(enable = "avx512fp16")]
13314	#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13315	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13316	pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13317	_mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13318	}
13319
13320	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13321	/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13322	/// elements from a to the upper elements of dst.
13323	///
13324	/// Rounding is done according to the rounding parameter, which can be one of:
13325	///
13326	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13327	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13328	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13329	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13330	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13331	///
13332	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13333	#[inline]
13334	#[target_feature(enable = "avx512fp16")]
13335	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13336	#[rustc_legacy_const_generics(`2`)]
13337	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13338	pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13339	static_assert_rounding!(ROUNDING);
13340	_mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:`0xff`, a, b)
13341	}
13342
13343	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13344	/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13345	/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13346	/// upper elements of dst.
13347	///
13348	/// Rounding is done according to the rounding parameter, which can be one of:
13349	///
13350	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13351	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13352	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13353	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13354	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13355	///
13356	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13357	#[inline]
13358	#[target_feature(enable = "avx512fp16")]
13359	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13360	#[rustc_legacy_const_generics(`4`)]
13361	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13362	pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13363	src: __m128h,
13364	k: __mmask8,
13365	a: __m128h,
13366	b: __m128d,
13367	) -> __m128h {
13368	unsafe {
13369	static_assert_rounding!(ROUNDING);
13370	vcvtsd2sh(a, b, src, k, ROUNDING)
13371	}
13372	}
13373
13374	/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13375	/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13376	/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13377	/// elements of dst.
13378	///
13379	/// Rounding is done according to the rounding parameter, which can be one of:
13380	///
13381	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13382	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13383	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13384	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13385	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13386	///
13387	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13388	#[inline]
13389	#[target_feature(enable = "avx512fp16")]
13390	#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = `8`))]
13391	#[rustc_legacy_const_generics(`3`)]
13392	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13393	pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13394	k: __mmask8,
13395	a: __m128h,
13396	b: __m128d,
13397	) -> __m128h {
13398	static_assert_rounding!(ROUNDING);
13399	_mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13400	}
13401
13402	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13403	/// store the results in dst.
13404	///
13405	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13406	#[inline]
13407	#[target_feature(enable = "avx512fp16,avx512vl")]
13408	#[cfg_attr(test, assert_instr(vcvtph2w))]
13409	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13410	pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13411	_mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:`0xff`, a)
13412	}
13413
13414	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13415	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13416	/// mask bit is not set).
13417	///
13418	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13419	#[inline]
13420	#[target_feature(enable = "avx512fp16,avx512vl")]
13421	#[cfg_attr(test, assert_instr(vcvtph2w))]
13422	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13423	pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13424	unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13425	}
13426
13427	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13428	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13429	///
13430	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13431	#[inline]
13432	#[target_feature(enable = "avx512fp16,avx512vl")]
13433	#[cfg_attr(test, assert_instr(vcvtph2w))]
13434	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13435	pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13436	_mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13437	}
13438
13439	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13440	/// store the results in dst.
13441	///
13442	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13443	#[inline]
13444	#[target_feature(enable = "avx512fp16,avx512vl")]
13445	#[cfg_attr(test, assert_instr(vcvtph2w))]
13446	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13447	pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13448	_mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13449	}
13450
13451	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13452	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13453	/// mask bit is not set).
13454	///
13455	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13456	#[inline]
13457	#[target_feature(enable = "avx512fp16,avx512vl")]
13458	#[cfg_attr(test, assert_instr(vcvtph2w))]
13459	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13460	pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13461	unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13462	}
13463
13464	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13465	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13466	///
13467	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13468	#[inline]
13469	#[target_feature(enable = "avx512fp16,avx512vl")]
13470	#[cfg_attr(test, assert_instr(vcvtph2w))]
13471	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13472	pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13473	_mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13474	}
13475
13476	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13477	/// store the results in dst.
13478	///
13479	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13480	#[inline]
13481	#[target_feature(enable = "avx512fp16")]
13482	#[cfg_attr(test, assert_instr(vcvtph2w))]
13483	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13484	pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13485	_mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13486	}
13487
13488	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13489	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13490	/// mask bit is not set).
13491	///
13492	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13493	#[inline]
13494	#[target_feature(enable = "avx512fp16")]
13495	#[cfg_attr(test, assert_instr(vcvtph2w))]
13496	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13497	pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13498	unsafe {
13499	transmute(src:vcvtph2w_512(
13500	a,
13501	src.as_i16x32(),
13502	k,
13503	_MM_FROUND_CUR_DIRECTION,
13504	))
13505	}
13506	}
13507
13508	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13509	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13510	///
13511	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13512	#[inline]
13513	#[target_feature(enable = "avx512fp16")]
13514	#[cfg_attr(test, assert_instr(vcvtph2w))]
13515	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13516	pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13517	_mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13518	}
13519
13520	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13521	/// store the results in dst.
13522	///
13523	/// Rounding is done according to the rounding parameter, which can be one of:
13524	///
13525	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13526	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13527	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13528	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13529	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13530	///
13531	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13532	#[inline]
13533	#[target_feature(enable = "avx512fp16")]
13534	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13535	#[rustc_legacy_const_generics(`1`)]
13536	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13537	pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13538	static_assert_rounding!(ROUNDING);
13539	_mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13540	}
13541
13542	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13543	/// store the results in dst using writemask k (elements are copied from src when the corresponding
13544	/// mask bit is not set).
13545	///
13546	/// Rounding is done according to the rounding parameter, which can be one of:
13547	///
13548	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13549	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13550	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13551	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13552	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13553	///
13554	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13555	#[inline]
13556	#[target_feature(enable = "avx512fp16")]
13557	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13558	#[rustc_legacy_const_generics(`3`)]
13559	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13560	pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13561	src: __m512i,
13562	k: __mmask32,
13563	a: __m512h,
13564	) -> __m512i {
13565	unsafe {
13566	static_assert_rounding!(ROUNDING);
13567	transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13568	}
13569	}
13570
13571	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13572	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13573	///
13574	/// Rounding is done according to the rounding parameter, which can be one of:
13575	///
13576	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13577	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13578	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13579	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13580	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13581	///
13582	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13583	#[inline]
13584	#[target_feature(enable = "avx512fp16")]
13585	#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = `8`))]
13586	#[rustc_legacy_const_generics(`2`)]
13587	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13588	pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13589	static_assert_rounding!(ROUNDING);
13590	_mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13591	}
13592
13593	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13594	/// and store the results in dst.
13595	///
13596	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13597	#[inline]
13598	#[target_feature(enable = "avx512fp16,avx512vl")]
13599	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13600	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13601	pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13602	_mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:`0xff`, a)
13603	}
13604
13605	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13606	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13607	/// mask bit is not set).
13608	///
13609	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13610	#[inline]
13611	#[target_feature(enable = "avx512fp16,avx512vl")]
13612	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13613	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13614	pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13615	unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13616	}
13617
13618	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13619	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13620	///
13621	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13622	#[inline]
13623	#[target_feature(enable = "avx512fp16,avx512vl")]
13624	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13625	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13626	pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13627	_mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13628	}
13629
13630	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13631	/// and store the results in dst.
13632	///
13633	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13634	#[inline]
13635	#[target_feature(enable = "avx512fp16,avx512vl")]
13636	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13637	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13638	pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13639	_mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13640	}
13641
13642	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13643	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13644	/// mask bit is not set).
13645	///
13646	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13647	#[inline]
13648	#[target_feature(enable = "avx512fp16,avx512vl")]
13649	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13650	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13651	pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13652	unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13653	}
13654
13655	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13656	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13657	///
13658	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13659	#[inline]
13660	#[target_feature(enable = "avx512fp16,avx512vl")]
13661	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13662	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13663	pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13664	_mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13665	}
13666
13667	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13668	/// and store the results in dst.
13669	///
13670	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13671	#[inline]
13672	#[target_feature(enable = "avx512fp16")]
13673	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13674	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13675	pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13676	_mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13677	}
13678
13679	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13680	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13681	/// mask bit is not set).
13682	///
13683	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13684	#[inline]
13685	#[target_feature(enable = "avx512fp16")]
13686	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13687	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13688	pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13689	unsafe {
13690	transmute(src:vcvtph2uw_512(
13691	a,
13692	src.as_u16x32(),
13693	k,
13694	_MM_FROUND_CUR_DIRECTION,
13695	))
13696	}
13697	}
13698
13699	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13700	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13701	///
13702	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
13703	#[inline]
13704	#[target_feature(enable = "avx512fp16")]
13705	#[cfg_attr(test, assert_instr(vcvtph2uw))]
13706	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13707	pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
13708	_mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
13709	}
13710
13711	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13712	/// and store the results in dst.
13713	///
13714	/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13715	///
13716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
13717	#[inline]
13718	#[target_feature(enable = "avx512fp16")]
13719	#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = `8`))]
13720	#[rustc_legacy_const_generics(`1`)]
13721	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13722	pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
13723	static_assert_sae!(SAE);
13724	_mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13725	}
13726
13727	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13728	/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13729	/// mask bit is not set).
13730	///
13731	/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13732	///
13733	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
13734	#[inline]
13735	#[target_feature(enable = "avx512fp16")]
13736	#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = `8`))]
13737	#[rustc_legacy_const_generics(`3`)]
13738	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13739	pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
13740	src: __m512i,
13741	k: __mmask32,
13742	a: __m512h,
13743	) -> __m512i {
13744	unsafe {
13745	static_assert_sae!(SAE);
13746	transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, SAE))
13747	}
13748	}
13749
13750	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13751	/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13752	///
13753	/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13754	///
13755	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
13756	#[inline]
13757	#[target_feature(enable = "avx512fp16")]
13758	#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = `8`))]
13759	#[rustc_legacy_const_generics(`2`)]
13760	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13761	pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13762	static_assert_sae!(SAE);
13763	_mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
13764	}
13765
13766	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13767	/// truncation, and store the results in dst.
13768	///
13769	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
13770	#[inline]
13771	#[target_feature(enable = "avx512fp16,avx512vl")]
13772	#[cfg_attr(test, assert_instr(vcvttph2w))]
13773	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13774	pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
13775	_mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:`0xff`, a)
13776	}
13777
13778	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13779	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13780	/// mask bit is not set).
13781	///
13782	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
13783	#[inline]
13784	#[target_feature(enable = "avx512fp16,avx512vl")]
13785	#[cfg_attr(test, assert_instr(vcvttph2w))]
13786	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13787	pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13788	unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
13789	}
13790
13791	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13792	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13793	/// mask bit is not set).
13794	///
13795	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
13796	#[inline]
13797	#[target_feature(enable = "avx512fp16,avx512vl")]
13798	#[cfg_attr(test, assert_instr(vcvttph2w))]
13799	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13800	pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13801	_mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
13802	}
13803
13804	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13805	/// truncation, and store the results in dst.
13806	///
13807	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
13808	#[inline]
13809	#[target_feature(enable = "avx512fp16,avx512vl")]
13810	#[cfg_attr(test, assert_instr(vcvttph2w))]
13811	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13812	pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
13813	_mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13814	}
13815
13816	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13817	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13818	/// mask bit is not set).
13819	///
13820	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
13821	#[inline]
13822	#[target_feature(enable = "avx512fp16,avx512vl")]
13823	#[cfg_attr(test, assert_instr(vcvttph2w))]
13824	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13825	pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13826	unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
13827	}
13828
13829	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13830	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13831	/// mask bit is not set).
13832	///
13833	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
13834	#[inline]
13835	#[target_feature(enable = "avx512fp16,avx512vl")]
13836	#[cfg_attr(test, assert_instr(vcvttph2w))]
13837	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13838	pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13839	_mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
13840	}
13841
13842	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13843	/// truncation, and store the results in dst.
13844	///
13845	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
13846	#[inline]
13847	#[target_feature(enable = "avx512fp16")]
13848	#[cfg_attr(test, assert_instr(vcvttph2w))]
13849	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13850	pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
13851	_mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13852	}
13853
13854	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13855	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13856	/// mask bit is not set).
13857	///
13858	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
13859	#[inline]
13860	#[target_feature(enable = "avx512fp16")]
13861	#[cfg_attr(test, assert_instr(vcvttph2w))]
13862	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13863	pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13864	unsafe {
13865	transmute(src:vcvttph2w_512(
13866	a,
13867	src.as_i16x32(),
13868	k,
13869	_MM_FROUND_CUR_DIRECTION,
13870	))
13871	}
13872	}
13873
13874	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13875	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13876	/// mask bit is not set).
13877	///
13878	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
13879	#[inline]
13880	#[target_feature(enable = "avx512fp16")]
13881	#[cfg_attr(test, assert_instr(vcvttph2w))]
13882	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13883	pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13884	_mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
13885	}
13886
13887	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13888	/// truncation, and store the results in dst.
13889	///
13890	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13891	///
13892	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
13893	#[inline]
13894	#[target_feature(enable = "avx512fp16")]
13895	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13896	#[rustc_legacy_const_generics(`1`)]
13897	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13898	pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
13899	static_assert_sae!(SAE);
13900	_mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
13901	}
13902
13903	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13904	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13905	/// mask bit is not set).
13906	///
13907	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13908	///
13909	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
13910	#[inline]
13911	#[target_feature(enable = "avx512fp16")]
13912	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13913	#[rustc_legacy_const_generics(`3`)]
13914	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13915	pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
13916	src: __m512i,
13917	k: __mmask32,
13918	a: __m512h,
13919	) -> __m512i {
13920	unsafe {
13921	static_assert_sae!(SAE);
13922	transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
13923	}
13924	}
13925
13926	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13927	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13928	/// mask bit is not set).
13929	///
13930	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13931	///
13932	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
13933	#[inline]
13934	#[target_feature(enable = "avx512fp16")]
13935	#[cfg_attr(test, assert_instr(vcvttph2w, SAE = `8`))]
13936	#[rustc_legacy_const_generics(`2`)]
13937	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13938	pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13939	static_assert_sae!(SAE);
13940	_mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
13941	}
13942
13943	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13944	/// truncation, and store the results in dst.
13945	///
13946	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
13947	#[inline]
13948	#[target_feature(enable = "avx512fp16,avx512vl")]
13949	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13950	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13951	pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
13952	_mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:`0xff`, a)
13953	}
13954
13955	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13956	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13957	/// mask bit is not set).
13958	///
13959	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
13960	#[inline]
13961	#[target_feature(enable = "avx512fp16,avx512vl")]
13962	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13963	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13964	pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13965	unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
13966	}
13967
13968	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13969	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13970	/// mask bit is not set).
13971	///
13972	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
13973	#[inline]
13974	#[target_feature(enable = "avx512fp16,avx512vl")]
13975	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13976	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13977	pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13978	_mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
13979	}
13980
13981	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13982	/// truncation, and store the results in dst.
13983	///
13984	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
13985	#[inline]
13986	#[target_feature(enable = "avx512fp16,avx512vl")]
13987	#[cfg_attr(test, assert_instr(vcvttph2uw))]
13988	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13989	pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
13990	_mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:`0xffff`, a)
13991	}
13992
13993	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13994	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13995	/// mask bit is not set).
13996	///
13997	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
13998	#[inline]
13999	#[target_feature(enable = "avx512fp16,avx512vl")]
14000	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14001	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14002	pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14003	unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14004	}
14005
14006	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14007	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14008	/// mask bit is not set).
14009	///
14010	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14011	#[inline]
14012	#[target_feature(enable = "avx512fp16,avx512vl")]
14013	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14014	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14015	pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14016	_mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14017	}
14018
14019	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14020	/// truncation, and store the results in dst.
14021	///
14022	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14023	#[inline]
14024	#[target_feature(enable = "avx512fp16")]
14025	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14026	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14027	pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14028	_mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
14029	}
14030
14031	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14032	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14033	/// mask bit is not set).
14034	///
14035	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14036	#[inline]
14037	#[target_feature(enable = "avx512fp16")]
14038	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14039	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14040	pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14041	unsafe {
14042	transmute(src:vcvttph2uw_512(
14043	a,
14044	src.as_u16x32(),
14045	k,
14046	_MM_FROUND_CUR_DIRECTION,
14047	))
14048	}
14049	}
14050
14051	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14052	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14053	/// mask bit is not set).
14054	///
14055	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14056	#[inline]
14057	#[target_feature(enable = "avx512fp16")]
14058	#[cfg_attr(test, assert_instr(vcvttph2uw))]
14059	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14060	pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14061	_mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14062	}
14063
14064	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14065	/// truncation, and store the results in dst.
14066	///
14067	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14068	///
14069	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14070	#[inline]
14071	#[target_feature(enable = "avx512fp16")]
14072	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14073	#[rustc_legacy_const_generics(`1`)]
14074	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14075	pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14076	static_assert_sae!(SAE);
14077	_mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:`0xffffffff`, a)
14078	}
14079
14080	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14081	/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14082	/// mask bit is not set).
14083	///
14084	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14085	///
14086	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14087	#[inline]
14088	#[target_feature(enable = "avx512fp16")]
14089	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14090	#[rustc_legacy_const_generics(`3`)]
14091	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14092	pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14093	src: __m512i,
14094	k: __mmask32,
14095	a: __m512h,
14096	) -> __m512i {
14097	unsafe {
14098	static_assert_sae!(SAE);
14099	transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14100	}
14101	}
14102
14103	/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14104	/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14105	/// mask bit is not set).
14106	///
14107	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14108	///
14109	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14110	#[inline]
14111	#[target_feature(enable = "avx512fp16")]
14112	#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = `8`))]
14113	#[rustc_legacy_const_generics(`2`)]
14114	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14115	pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14116	static_assert_sae!(SAE);
14117	_mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14118	}
14119
14120	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14121	/// results in dst.
14122	///
14123	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14124	#[inline]
14125	#[target_feature(enable = "avx512fp16,avx512vl")]
14126	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14127	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14128	pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14129	_mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:`0xff`, a)
14130	}
14131
14132	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14133	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14134	///
14135	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14136	#[inline]
14137	#[target_feature(enable = "avx512fp16,avx512vl")]
14138	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14139	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14140	pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14141	unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14142	}
14143
14144	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14145	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146	///
14147	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14148	#[inline]
14149	#[target_feature(enable = "avx512fp16,avx512vl")]
14150	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14151	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14152	pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14153	_mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14154	}
14155
14156	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14157	/// results in dst.
14158	///
14159	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14160	#[inline]
14161	#[target_feature(enable = "avx512fp16,avx512vl")]
14162	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14163	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14164	pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14165	_mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:`0xff`, a)
14166	}
14167
14168	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14169	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14170	///
14171	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14172	#[inline]
14173	#[target_feature(enable = "avx512fp16,avx512vl")]
14174	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14175	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14176	pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14177	unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14178	}
14179
14180	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14181	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182	///
14183	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14184	#[inline]
14185	#[target_feature(enable = "avx512fp16,avx512vl")]
14186	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14187	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14188	pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14189	_mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14190	}
14191
14192	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14193	/// results in dst.
14194	///
14195	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14196	#[inline]
14197	#[target_feature(enable = "avx512fp16")]
14198	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14199	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14200	pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14201	_mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14202	}
14203
14204	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14205	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14206	///
14207	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14208	#[inline]
14209	#[target_feature(enable = "avx512fp16")]
14210	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14211	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14212	pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14213	unsafe {
14214	transmute(src:vcvtph2dq_512(
14215	a,
14216	src.as_i32x16(),
14217	k,
14218	_MM_FROUND_CUR_DIRECTION,
14219	))
14220	}
14221	}
14222
14223	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14224	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14225	///
14226	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14227	#[inline]
14228	#[target_feature(enable = "avx512fp16")]
14229	#[cfg_attr(test, assert_instr(vcvtph2dq))]
14230	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14231	pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14232	_mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14233	}
14234
14235	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14236	/// results in dst.
14237	///
14238	/// Rounding is done according to the rounding parameter, which can be one of:
14239	///
14240	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14241	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14242	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14243	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14244	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14245	///
14246	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14247	#[inline]
14248	#[target_feature(enable = "avx512fp16")]
14249	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14250	#[rustc_legacy_const_generics(`1`)]
14251	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14252	pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14253	static_assert_rounding!(ROUNDING);
14254	_mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14255	}
14256
14257	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14258	/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14259	///
14260	/// Rounding is done according to the rounding parameter, which can be one of:
14261	///
14262	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14263	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14264	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14265	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14266	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14267	///
14268	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14269	#[inline]
14270	#[target_feature(enable = "avx512fp16")]
14271	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14272	#[rustc_legacy_const_generics(`3`)]
14273	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14274	pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14275	src: __m512i,
14276	k: __mmask16,
14277	a: __m256h,
14278	) -> __m512i {
14279	unsafe {
14280	static_assert_rounding!(ROUNDING);
14281	transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14282	}
14283	}
14284
14285	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14286	/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14287	///
14288	/// Rounding is done according to the rounding parameter, which can be one of:
14289	///
14290	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14291	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14292	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14293	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14294	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14295	///
14296	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14297	#[inline]
14298	#[target_feature(enable = "avx512fp16")]
14299	#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = `8`))]
14300	#[rustc_legacy_const_generics(`2`)]
14301	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14302	pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14303	static_assert_rounding!(ROUNDING);
14304	_mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14305	}
14306
14307	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14308	/// the result in dst.
14309	///
14310	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14311	#[inline]
14312	#[target_feature(enable = "avx512fp16")]
14313	#[cfg_attr(test, assert_instr(vcvtsh2si))]
14314	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14315	pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14316	unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14317	}
14318
14319	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14320	/// the result in dst.
14321	///
14322	/// Rounding is done according to the rounding parameter, which can be one of:
14323	///
14324	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14325	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14326	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14327	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14328	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14329	///
14330	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14331	#[inline]
14332	#[target_feature(enable = "avx512fp16")]
14333	#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = `8`))]
14334	#[rustc_legacy_const_generics(`1`)]
14335	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14336	pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14337	unsafe {
14338	static_assert_rounding!(ROUNDING);
14339	vcvtsh2si32(a, ROUNDING)
14340	}
14341	}
14342
14343	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14344	/// results in dst.
14345	///
14346	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14347	#[inline]
14348	#[target_feature(enable = "avx512fp16,avx512vl")]
14349	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14350	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14351	pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14352	_mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:`0xff`, a)
14353	}
14354
14355	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14356	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14357	///
14358	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14359	#[inline]
14360	#[target_feature(enable = "avx512fp16,avx512vl")]
14361	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14362	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14363	pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14364	unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14365	}
14366
14367	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14368	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14369	///
14370	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14371	#[inline]
14372	#[target_feature(enable = "avx512fp16,avx512vl")]
14373	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14374	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14375	pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14376	_mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14377	}
14378
14379	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14380	/// the results in dst.
14381	///
14382	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14383	#[inline]
14384	#[target_feature(enable = "avx512fp16,avx512vl")]
14385	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14386	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14387	pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14388	_mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:`0xff`, a)
14389	}
14390
14391	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14392	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14393	///
14394	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14395	#[inline]
14396	#[target_feature(enable = "avx512fp16,avx512vl")]
14397	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14398	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14399	pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14400	unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14401	}
14402
14403	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14404	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14405	///
14406	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14407	#[inline]
14408	#[target_feature(enable = "avx512fp16,avx512vl")]
14409	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14410	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14411	pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14412	_mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14413	}
14414
14415	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14416	/// the results in dst.
14417	///
14418	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14419	#[inline]
14420	#[target_feature(enable = "avx512fp16")]
14421	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14422	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14423	pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14424	_mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14425	}
14426
14427	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14428	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14429	///
14430	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14431	#[inline]
14432	#[target_feature(enable = "avx512fp16")]
14433	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14434	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14435	pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14436	unsafe {
14437	transmute(src:vcvtph2udq_512(
14438	a,
14439	src.as_u32x16(),
14440	k,
14441	_MM_FROUND_CUR_DIRECTION,
14442	))
14443	}
14444	}
14445
14446	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14447	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14448	///
14449	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14450	#[inline]
14451	#[target_feature(enable = "avx512fp16")]
14452	#[cfg_attr(test, assert_instr(vcvtph2udq))]
14453	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14454	pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14455	_mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14456	}
14457
14458	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14459	/// the results in dst.
14460	///
14461	/// Rounding is done according to the rounding parameter, which can be one of:
14462	///
14463	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14464	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14465	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14466	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14467	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14468	///
14469	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14470	#[inline]
14471	#[target_feature(enable = "avx512fp16")]
14472	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14473	#[rustc_legacy_const_generics(`1`)]
14474	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14475	pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14476	static_assert_rounding!(ROUNDING);
14477	_mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14478	}
14479
14480	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14481	/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14482	///
14483	/// Rounding is done according to the rounding parameter, which can be one of:
14484	///
14485	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14486	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14487	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14488	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14489	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14490	///
14491	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14492	#[inline]
14493	#[target_feature(enable = "avx512fp16")]
14494	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14495	#[rustc_legacy_const_generics(`3`)]
14496	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14497	pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14498	src: __m512i,
14499	k: __mmask16,
14500	a: __m256h,
14501	) -> __m512i {
14502	unsafe {
14503	static_assert_rounding!(ROUNDING);
14504	transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14505	}
14506	}
14507
14508	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14509	/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14510	///
14511	/// Rounding is done according to the rounding parameter, which can be one of:
14512	///
14513	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14514	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14515	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14516	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14517	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14518	///
14519	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14520	#[inline]
14521	#[target_feature(enable = "avx512fp16")]
14522	#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = `8`))]
14523	#[rustc_legacy_const_generics(`2`)]
14524	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14525	pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14526	static_assert_rounding!(ROUNDING);
14527	_mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14528	}
14529
14530	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14531	/// the result in dst.
14532	///
14533	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14534	#[inline]
14535	#[target_feature(enable = "avx512fp16")]
14536	#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14537	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14538	pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14539	unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14540	}
14541
14542	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14543	/// the result in dst.
14544	///
14545	/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14546	///
14547	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14548	#[inline]
14549	#[target_feature(enable = "avx512fp16")]
14550	#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = `8`))]
14551	#[rustc_legacy_const_generics(`1`)]
14552	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14553	pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14554	unsafe {
14555	static_assert_rounding!(SAE);
14556	vcvtsh2usi32(a, SAE)
14557	}
14558	}
14559
14560	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14561	/// store the results in dst.
14562	///
14563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14564	#[inline]
14565	#[target_feature(enable = "avx512fp16,avx512vl")]
14566	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14567	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14568	pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14569	_mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:`0xff`, a)
14570	}
14571
14572	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14573	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14574	///
14575	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14576	#[inline]
14577	#[target_feature(enable = "avx512fp16,avx512vl")]
14578	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14579	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14580	pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14581	unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14582	}
14583
14584	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14585	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14586	///
14587	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14588	#[inline]
14589	#[target_feature(enable = "avx512fp16,avx512vl")]
14590	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14591	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14592	pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14593	_mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14594	}
14595
14596	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14597	/// store the results in dst.
14598	///
14599	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14600	#[inline]
14601	#[target_feature(enable = "avx512fp16,avx512vl")]
14602	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14603	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14604	pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14605	_mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:`0xff`, a)
14606	}
14607
14608	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14609	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14610	///
14611	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14612	#[inline]
14613	#[target_feature(enable = "avx512fp16,avx512vl")]
14614	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14615	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14616	pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14617	unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14618	}
14619
14620	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14621	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14622	///
14623	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14624	#[inline]
14625	#[target_feature(enable = "avx512fp16,avx512vl")]
14626	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14627	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14628	pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14629	_mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14630	}
14631
14632	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14633	/// store the results in dst.
14634	///
14635	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14636	#[inline]
14637	#[target_feature(enable = "avx512fp16")]
14638	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14639	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14640	pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14641	_mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14642	}
14643
14644	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14645	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14646	///
14647	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14648	#[inline]
14649	#[target_feature(enable = "avx512fp16")]
14650	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14651	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14652	pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14653	unsafe {
14654	transmute(src:vcvttph2dq_512(
14655	a,
14656	src.as_i32x16(),
14657	k,
14658	_MM_FROUND_CUR_DIRECTION,
14659	))
14660	}
14661	}
14662
14663	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14664	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14665	///
14666	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14667	#[inline]
14668	#[target_feature(enable = "avx512fp16")]
14669	#[cfg_attr(test, assert_instr(vcvttph2dq))]
14670	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14671	pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14672	_mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14673	}
14674
14675	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14676	/// store the results in dst.
14677	///
14678	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14679	///
14680	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14681	#[inline]
14682	#[target_feature(enable = "avx512fp16")]
14683	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14684	#[rustc_legacy_const_generics(`1`)]
14685	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14686	pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14687	static_assert_sae!(SAE);
14688	_mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14689	}
14690
14691	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14692	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14693	///
14694	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14695	///
14696	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
14697	#[inline]
14698	#[target_feature(enable = "avx512fp16")]
14699	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14700	#[rustc_legacy_const_generics(`3`)]
14701	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14702	pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
14703	src: __m512i,
14704	k: __mmask16,
14705	a: __m256h,
14706	) -> __m512i {
14707	unsafe {
14708	static_assert_sae!(SAE);
14709	transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
14710	}
14711	}
14712
14713	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14714	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14715	///
14716	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14717	///
14718	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
14719	#[inline]
14720	#[target_feature(enable = "avx512fp16")]
14721	#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = `8`))]
14722	#[rustc_legacy_const_generics(`2`)]
14723	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14724	pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14725	static_assert_sae!(SAE);
14726	_mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
14727	}
14728
14729	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14730	/// the result in dst.
14731	///
14732	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
14733	#[inline]
14734	#[target_feature(enable = "avx512fp16")]
14735	#[cfg_attr(test, assert_instr(vcvttsh2si))]
14736	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14737	pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
14738	unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14739	}
14740
14741	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14742	/// the result in dst.
14743	///
14744	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14745	///
14746	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
14747	#[inline]
14748	#[target_feature(enable = "avx512fp16")]
14749	#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = `8`))]
14750	#[rustc_legacy_const_generics(`1`)]
14751	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14752	pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
14753	unsafe {
14754	static_assert_sae!(SAE);
14755	vcvttsh2si32(a, SAE)
14756	}
14757	}
14758
14759	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14760	/// store the results in dst.
14761	///
14762	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
14763	#[inline]
14764	#[target_feature(enable = "avx512fp16,avx512vl")]
14765	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14766	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14767	pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
14768	_mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:`0xff`, a)
14769	}
14770
14771	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14772	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14773	///
14774	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
14775	#[inline]
14776	#[target_feature(enable = "avx512fp16,avx512vl")]
14777	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14778	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14779	pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14780	unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
14781	}
14782
14783	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14784	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14785	///
14786	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
14787	#[inline]
14788	#[target_feature(enable = "avx512fp16,avx512vl")]
14789	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14790	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14791	pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14792	_mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
14793	}
14794
14795	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14796	/// store the results in dst.
14797	///
14798	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
14799	#[inline]
14800	#[target_feature(enable = "avx512fp16,avx512vl")]
14801	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14802	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14803	pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
14804	_mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:`0xff`, a)
14805	}
14806
14807	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14808	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14809	///
14810	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
14811	#[inline]
14812	#[target_feature(enable = "avx512fp16,avx512vl")]
14813	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14814	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14815	pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14816	unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
14817	}
14818
14819	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14820	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14821	///
14822	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
14823	#[inline]
14824	#[target_feature(enable = "avx512fp16,avx512vl")]
14825	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14826	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14827	pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14828	_mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
14829	}
14830
14831	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14832	/// store the results in dst.
14833	///
14834	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
14835	#[inline]
14836	#[target_feature(enable = "avx512fp16")]
14837	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14838	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14839	pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
14840	_mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14841	}
14842
14843	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14844	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14845	///
14846	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
14847	#[inline]
14848	#[target_feature(enable = "avx512fp16")]
14849	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14850	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14851	pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14852	unsafe {
14853	transmute(src:vcvttph2udq_512(
14854	a,
14855	src.as_u32x16(),
14856	k,
14857	_MM_FROUND_CUR_DIRECTION,
14858	))
14859	}
14860	}
14861
14862	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14863	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14864	///
14865	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
14866	#[inline]
14867	#[target_feature(enable = "avx512fp16")]
14868	#[cfg_attr(test, assert_instr(vcvttph2udq))]
14869	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14870	pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14871	_mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
14872	}
14873
14874	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14875	/// store the results in dst.
14876	///
14877	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14878	///
14879	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
14880	#[inline]
14881	#[target_feature(enable = "avx512fp16")]
14882	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14883	#[rustc_legacy_const_generics(`1`)]
14884	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14885	pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
14886	static_assert_sae!(SAE);
14887	_mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:`0xffff`, a)
14888	}
14889
14890	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14891	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14892	///
14893	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14894	///
14895	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
14896	#[inline]
14897	#[target_feature(enable = "avx512fp16")]
14898	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14899	#[rustc_legacy_const_generics(`3`)]
14900	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14901	pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
14902	src: __m512i,
14903	k: __mmask16,
14904	a: __m256h,
14905	) -> __m512i {
14906	unsafe {
14907	static_assert_sae!(SAE);
14908	transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
14909	}
14910	}
14911
14912	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14913	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14914	///
14915	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14916	///
14917	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
14918	#[inline]
14919	#[target_feature(enable = "avx512fp16")]
14920	#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = `8`))]
14921	#[rustc_legacy_const_generics(`2`)]
14922	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14923	pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14924	static_assert_sae!(SAE);
14925	_mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
14926	}
14927
14928	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14929	/// the result in dst.
14930	///
14931	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
14932	#[inline]
14933	#[target_feature(enable = "avx512fp16")]
14934	#[cfg_attr(test, assert_instr(vcvttsh2usi))]
14935	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14936	pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
14937	unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14938	}
14939
14940	/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14941	/// the result in dst.
14942	///
14943	/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14944	///
14945	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
14946	#[inline]
14947	#[target_feature(enable = "avx512fp16")]
14948	#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = `8`))]
14949	#[rustc_legacy_const_generics(`1`)]
14950	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14951	pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14952	unsafe {
14953	static_assert_sae!(SAE);
14954	vcvttsh2usi32(a, SAE)
14955	}
14956	}
14957
14958	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14959	/// store the results in dst.
14960	///
14961	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
14962	#[inline]
14963	#[target_feature(enable = "avx512fp16,avx512vl")]
14964	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14965	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14966	pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
14967	_mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
14968	}
14969
14970	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14971	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14972	///
14973	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
14974	#[inline]
14975	#[target_feature(enable = "avx512fp16,avx512vl")]
14976	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14977	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14978	pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14979	unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
14980	}
14981
14982	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14983	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14984	///
14985	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
14986	#[inline]
14987	#[target_feature(enable = "avx512fp16,avx512vl")]
14988	#[cfg_attr(test, assert_instr(vcvtph2qq))]
14989	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14990	pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
14991	_mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
14992	}
14993
14994	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14995	/// store the results in dst.
14996	///
14997	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
14998	#[inline]
14999	#[target_feature(enable = "avx512fp16,avx512vl")]
15000	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15001	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15002	pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15003	_mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
15004	}
15005
15006	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15007	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15008	///
15009	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15010	#[inline]
15011	#[target_feature(enable = "avx512fp16,avx512vl")]
15012	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15013	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15014	pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15015	unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15016	}
15017
15018	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15019	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15020	///
15021	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15022	#[inline]
15023	#[target_feature(enable = "avx512fp16,avx512vl")]
15024	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15025	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15026	pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15027	_mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15028	}
15029
15030	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15031	/// store the results in dst.
15032	///
15033	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15034	#[inline]
15035	#[target_feature(enable = "avx512fp16")]
15036	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15037	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15038	pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15039	_mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15040	}
15041
15042	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15043	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15044	///
15045	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15046	#[inline]
15047	#[target_feature(enable = "avx512fp16")]
15048	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15049	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15050	pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15051	unsafe {
15052	transmute(src:vcvtph2qq_512(
15053	a,
15054	src.as_i64x8(),
15055	k,
15056	_MM_FROUND_CUR_DIRECTION,
15057	))
15058	}
15059	}
15060
15061	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15062	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15063	///
15064	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15065	#[inline]
15066	#[target_feature(enable = "avx512fp16")]
15067	#[cfg_attr(test, assert_instr(vcvtph2qq))]
15068	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15069	pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15070	_mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15071	}
15072
15073	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15074	/// store the results in dst.
15075	///
15076	/// Rounding is done according to the rounding parameter, which can be one of:
15077	///
15078	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15079	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15080	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15081	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15082	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15083	///
15084	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15085	#[inline]
15086	#[target_feature(enable = "avx512fp16")]
15087	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15088	#[rustc_legacy_const_generics(`1`)]
15089	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15090	pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15091	static_assert_rounding!(ROUNDING);
15092	_mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15093	}
15094
15095	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15096	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15097	///
15098	/// Rounding is done according to the rounding parameter, which can be one of:
15099	///
15100	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15101	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15102	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15103	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15104	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15105	///
15106	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15107	#[inline]
15108	#[target_feature(enable = "avx512fp16")]
15109	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15110	#[rustc_legacy_const_generics(`3`)]
15111	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15112	pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15113	src: __m512i,
15114	k: __mmask8,
15115	a: __m128h,
15116	) -> __m512i {
15117	unsafe {
15118	static_assert_rounding!(ROUNDING);
15119	transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15120	}
15121	}
15122
15123	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15124	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15125	///
15126	/// Rounding is done according to the rounding parameter, which can be one of:
15127	///
15128	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15129	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15130	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15131	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15132	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15133	///
15134	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15135	#[inline]
15136	#[target_feature(enable = "avx512fp16")]
15137	#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = `8`))]
15138	#[rustc_legacy_const_generics(`2`)]
15139	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15140	pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15141	static_assert_rounding!(ROUNDING);
15142	_mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15143	}
15144
15145	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15146	/// store the results in dst.
15147	///
15148	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15149	#[inline]
15150	#[target_feature(enable = "avx512fp16,avx512vl")]
15151	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15152	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15153	pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15154	_mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
15155	}
15156
15157	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15158	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15159	///
15160	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15161	#[inline]
15162	#[target_feature(enable = "avx512fp16,avx512vl")]
15163	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15164	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15165	pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15166	unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15167	}
15168
15169	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15170	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15171	///
15172	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15173	#[inline]
15174	#[target_feature(enable = "avx512fp16,avx512vl")]
15175	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15176	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15177	pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15178	_mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15179	}
15180
15181	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15182	/// store the results in dst.
15183	///
15184	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15185	#[inline]
15186	#[target_feature(enable = "avx512fp16,avx512vl")]
15187	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15188	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15189	pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15190	_mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
15191	}
15192
15193	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15194	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15195	///
15196	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15197	#[inline]
15198	#[target_feature(enable = "avx512fp16,avx512vl")]
15199	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15200	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15201	pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15202	unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15203	}
15204
15205	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15206	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15207	///
15208	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15209	#[inline]
15210	#[target_feature(enable = "avx512fp16,avx512vl")]
15211	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15212	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15213	pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15214	_mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15215	}
15216
15217	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15218	/// store the results in dst.
15219	///
15220	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15221	#[inline]
15222	#[target_feature(enable = "avx512fp16")]
15223	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15224	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15225	pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15226	_mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15227	}
15228
15229	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15230	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15231	///
15232	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15233	#[inline]
15234	#[target_feature(enable = "avx512fp16")]
15235	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15236	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15237	pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15238	unsafe {
15239	transmute(src:vcvtph2uqq_512(
15240	a,
15241	src.as_u64x8(),
15242	k,
15243	_MM_FROUND_CUR_DIRECTION,
15244	))
15245	}
15246	}
15247
15248	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15249	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15250	///
15251	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15252	#[inline]
15253	#[target_feature(enable = "avx512fp16")]
15254	#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15255	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15256	pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15257	_mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15258	}
15259
15260	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15261	/// store the results in dst.
15262	///
15263	/// Rounding is done according to the rounding parameter, which can be one of:
15264	///
15265	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15266	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15267	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15268	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15269	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15270	///
15271	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15272	#[inline]
15273	#[target_feature(enable = "avx512fp16")]
15274	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15275	#[rustc_legacy_const_generics(`1`)]
15276	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15277	pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15278	static_assert_rounding!(ROUNDING);
15279	_mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15280	}
15281
15282	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15283	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15284	///
15285	/// Rounding is done according to the rounding parameter, which can be one of:
15286	///
15287	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15288	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15289	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15290	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15291	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15292	///
15293	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15294	#[inline]
15295	#[target_feature(enable = "avx512fp16")]
15296	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15297	#[rustc_legacy_const_generics(`3`)]
15298	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15299	pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15300	src: __m512i,
15301	k: __mmask8,
15302	a: __m128h,
15303	) -> __m512i {
15304	unsafe {
15305	static_assert_rounding!(ROUNDING);
15306	transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15307	}
15308	}
15309
15310	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15311	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15312	///
15313	/// Rounding is done according to the rounding parameter, which can be one of:
15314	///
15315	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15316	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15317	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15318	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15319	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15320	///
15321	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15322	#[inline]
15323	#[target_feature(enable = "avx512fp16")]
15324	#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = `8`))]
15325	#[rustc_legacy_const_generics(`2`)]
15326	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15327	pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15328	static_assert_rounding!(ROUNDING);
15329	_mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15330	}
15331
15332	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15333	/// store the results in dst.
15334	///
15335	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15336	#[inline]
15337	#[target_feature(enable = "avx512fp16,avx512vl")]
15338	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15339	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15340	pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15341	_mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:`0xff`, a)
15342	}
15343
15344	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15345	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15346	///
15347	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15348	#[inline]
15349	#[target_feature(enable = "avx512fp16,avx512vl")]
15350	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15351	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15352	pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15353	unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15354	}
15355
15356	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15357	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15358	///
15359	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15360	#[inline]
15361	#[target_feature(enable = "avx512fp16,avx512vl")]
15362	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15363	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15364	pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15365	_mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15366	}
15367
15368	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15369	/// store the results in dst.
15370	///
15371	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15372	#[inline]
15373	#[target_feature(enable = "avx512fp16,avx512vl")]
15374	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15375	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15376	pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15377	_mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:`0xff`, a)
15378	}
15379
15380	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15381	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15382	///
15383	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15384	#[inline]
15385	#[target_feature(enable = "avx512fp16,avx512vl")]
15386	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15387	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15388	pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15389	unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15390	}
15391
15392	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15393	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15394	///
15395	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15396	#[inline]
15397	#[target_feature(enable = "avx512fp16,avx512vl")]
15398	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15399	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15400	pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15401	_mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15402	}
15403
15404	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15405	/// store the results in dst.
15406	///
15407	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15408	#[inline]
15409	#[target_feature(enable = "avx512fp16")]
15410	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15411	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15412	pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15413	_mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15414	}
15415
15416	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15417	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15418	///
15419	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15420	#[inline]
15421	#[target_feature(enable = "avx512fp16")]
15422	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15423	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15424	pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15425	unsafe {
15426	transmute(src:vcvttph2qq_512(
15427	a,
15428	src.as_i64x8(),
15429	k,
15430	_MM_FROUND_CUR_DIRECTION,
15431	))
15432	}
15433	}
15434
15435	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15436	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15437	///
15438	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15439	#[inline]
15440	#[target_feature(enable = "avx512fp16")]
15441	#[cfg_attr(test, assert_instr(vcvttph2qq))]
15442	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15443	pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15444	_mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15445	}
15446
15447	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15448	/// store the results in dst.
15449	///
15450	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15451	///
15452	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15453	#[inline]
15454	#[target_feature(enable = "avx512fp16")]
15455	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15456	#[rustc_legacy_const_generics(`1`)]
15457	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15458	pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15459	static_assert_sae!(SAE);
15460	_mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15461	}
15462
15463	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15464	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15465	///
15466	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15467	///
15468	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15469	#[inline]
15470	#[target_feature(enable = "avx512fp16")]
15471	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15472	#[rustc_legacy_const_generics(`3`)]
15473	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15474	pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15475	src: __m512i,
15476	k: __mmask8,
15477	a: __m128h,
15478	) -> __m512i {
15479	unsafe {
15480	static_assert_sae!(SAE);
15481	transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15482	}
15483	}
15484
15485	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15486	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15487	///
15488	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15489	///
15490	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15491	#[inline]
15492	#[target_feature(enable = "avx512fp16")]
15493	#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = `8`))]
15494	#[rustc_legacy_const_generics(`2`)]
15495	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15496	pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15497	static_assert_sae!(SAE);
15498	_mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15499	}
15500
15501	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15502	/// store the results in dst.
15503	///
15504	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15505	#[inline]
15506	#[target_feature(enable = "avx512fp16,avx512vl")]
15507	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15508	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15509	pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15510	_mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:`0xff`, a)
15511	}
15512
15513	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15514	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15515	///
15516	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15517	#[inline]
15518	#[target_feature(enable = "avx512fp16,avx512vl")]
15519	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15520	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15521	pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15522	unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15523	}
15524
15525	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15526	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15527	///
15528	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15529	#[inline]
15530	#[target_feature(enable = "avx512fp16,avx512vl")]
15531	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15532	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15533	pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15534	_mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15535	}
15536
15537	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15538	/// store the results in dst.
15539	///
15540	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15541	#[inline]
15542	#[target_feature(enable = "avx512fp16,avx512vl")]
15543	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15544	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15545	pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15546	_mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:`0xff`, a)
15547	}
15548
15549	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15550	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15551	///
15552	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15553	#[inline]
15554	#[target_feature(enable = "avx512fp16,avx512vl")]
15555	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15556	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15557	pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15558	unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15559	}
15560
15561	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15562	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15563	///
15564	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15565	#[inline]
15566	#[target_feature(enable = "avx512fp16,avx512vl")]
15567	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15568	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15569	pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15570	_mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15571	}
15572
15573	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15574	/// store the results in dst.
15575	///
15576	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15577	#[inline]
15578	#[target_feature(enable = "avx512fp16")]
15579	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15580	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15581	pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15582	_mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:`0xff`, a)
15583	}
15584
15585	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15586	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15587	///
15588	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15589	#[inline]
15590	#[target_feature(enable = "avx512fp16")]
15591	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15592	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15593	pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15594	unsafe {
15595	transmute(src:vcvttph2uqq_512(
15596	a,
15597	src.as_u64x8(),
15598	k,
15599	_MM_FROUND_CUR_DIRECTION,
15600	))
15601	}
15602	}
15603
15604	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15605	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15606	///
15607	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15608	#[inline]
15609	#[target_feature(enable = "avx512fp16")]
15610	#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15611	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15612	pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15613	_mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15614	}
15615
15616	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15617	/// store the results in dst.
15618	///
15619	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15620	///
15621	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15622	#[inline]
15623	#[target_feature(enable = "avx512fp16")]
15624	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15625	#[rustc_legacy_const_generics(`1`)]
15626	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15627	pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15628	static_assert_sae!(SAE);
15629	_mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:`0xff`, a)
15630	}
15631
15632	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15633	/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15634	///
15635	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15636	///
15637	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15638	#[inline]
15639	#[target_feature(enable = "avx512fp16")]
15640	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15641	#[rustc_legacy_const_generics(`3`)]
15642	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15643	pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15644	src: __m512i,
15645	k: __mmask8,
15646	a: __m128h,
15647	) -> __m512i {
15648	unsafe {
15649	static_assert_sae!(SAE);
15650	transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15651	}
15652	}
15653
15654	/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15655	/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15656	///
15657	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15658	///
15659	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15660	#[inline]
15661	#[target_feature(enable = "avx512fp16")]
15662	#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = `8`))]
15663	#[rustc_legacy_const_generics(`2`)]
15664	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15665	pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15666	static_assert_sae!(SAE);
15667	_mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15668	}
15669
15670	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15671	/// floating-point elements, and store the results in dst.
15672	///
15673	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15674	#[inline]
15675	#[target_feature(enable = "avx512fp16,avx512vl")]
15676	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15677	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15678	pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15679	_mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:`0xff`, a)
15680	}
15681
15682	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15683	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15684	/// dst when the corresponding mask bit is not set).
15685	///
15686	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15687	#[inline]
15688	#[target_feature(enable = "avx512fp16,avx512vl")]
15689	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15690	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15691	pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
15692	unsafe { vcvtph2psx_128(a, src, k) }
15693	}
15694
15695	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15696	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15697	/// corresponding mask bit is not set).
15698	///
15699	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
15700	#[inline]
15701	#[target_feature(enable = "avx512fp16,avx512vl")]
15702	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15703	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15704	pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
15705	_mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
15706	}
15707
15708	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15709	/// floating-point elements, and store the results in dst.
15710	///
15711	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
15712	#[inline]
15713	#[target_feature(enable = "avx512fp16,avx512vl")]
15714	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15715	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15716	pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
15717	_mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:`0xff`, a)
15718	}
15719
15720	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15721	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15722	/// dst when the corresponding mask bit is not set).
15723	///
15724	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
15725	#[inline]
15726	#[target_feature(enable = "avx512fp16,avx512vl")]
15727	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15728	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15729	pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
15730	unsafe { vcvtph2psx_256(a, src, k) }
15731	}
15732
15733	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15734	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15735	/// corresponding mask bit is not set).
15736	///
15737	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
15738	#[inline]
15739	#[target_feature(enable = "avx512fp16,avx512vl")]
15740	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15741	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15742	pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
15743	_mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
15744	}
15745
15746	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15747	/// floating-point elements, and store the results in dst.
15748	///
15749	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
15750	#[inline]
15751	#[target_feature(enable = "avx512fp16")]
15752	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15753	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15754	pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
15755	_mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:`0xffff`, a)
15756	}
15757
15758	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15759	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15760	/// dst when the corresponding mask bit is not set).
15761	///
15762	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
15763	#[inline]
15764	#[target_feature(enable = "avx512fp16")]
15765	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15766	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15767	pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
15768	unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
15769	}
15770
15771	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15772	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15773	/// corresponding mask bit is not set).
15774	///
15775	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
15776	#[inline]
15777	#[target_feature(enable = "avx512fp16")]
15778	#[cfg_attr(test, assert_instr(vcvtph2psx))]
15779	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15780	pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
15781	_mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
15782	}
15783
15784	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15785	/// floating-point elements, and store the results in dst.
15786	///
15787	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15788	///
15789	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
15790	#[inline]
15791	#[target_feature(enable = "avx512fp16")]
15792	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15793	#[rustc_legacy_const_generics(`1`)]
15794	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15795	pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
15796	static_assert_sae!(SAE);
15797	_mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:`0xffff`, a)
15798	}
15799
15800	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15801	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15802	/// dst when the corresponding mask bit is not set).
15803	///
15804	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15805	///
15806	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
15807	#[inline]
15808	#[target_feature(enable = "avx512fp16")]
15809	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15810	#[rustc_legacy_const_generics(`3`)]
15811	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15812	pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
15813	src: __m512,
15814	k: __mmask16,
15815	a: __m256h,
15816	) -> __m512 {
15817	unsafe {
15818	static_assert_sae!(SAE);
15819	vcvtph2psx_512(a, src, k, SAE)
15820	}
15821	}
15822
15823	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15824	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15825	/// corresponding mask bit is not set).
15826	///
15827	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15828	///
15829	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
15830	#[inline]
15831	#[target_feature(enable = "avx512fp16")]
15832	#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = `8`))]
15833	#[rustc_legacy_const_generics(`2`)]
15834	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15835	pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
15836	static_assert_sae!(SAE);
15837	_mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
15838	}
15839
15840	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15841	/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
15842	/// elements from a to the upper elements of dst.
15843	///
15844	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
15845	#[inline]
15846	#[target_feature(enable = "avx512fp16")]
15847	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15848	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15849	pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
15850	_mm_mask_cvtsh_ss(src:a, k:`0xff`, a, b)
15851	}
15852
15853	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15854	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15855	/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15856	/// upper elements of dst.
15857	///
15858	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
15859	#[inline]
15860	#[target_feature(enable = "avx512fp16")]
15861	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15862	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15863	pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15864	unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
15865	}
15866
15867	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15868	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15869	/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15870	/// of dst.
15871	///
15872	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
15873	#[inline]
15874	#[target_feature(enable = "avx512fp16")]
15875	#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15876	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15877	pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15878	_mm_mask_cvtsh_ss(src:_mm_set_ss(`0.0`), k, a, b)
15879	}
15880
15881	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15882	/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
15883	/// from a to the upper elements of dst.
15884	///
15885	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15886	///
15887	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
15888	#[inline]
15889	#[target_feature(enable = "avx512fp16")]
15890	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15891	#[rustc_legacy_const_generics(`2`)]
15892	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15893	pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
15894	static_assert_sae!(SAE);
15895	_mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:`0xff`, a, b)
15896	}
15897
15898	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15899	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15900	/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15901	/// upper elements of dst.
15902	///
15903	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15904	///
15905	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
15906	#[inline]
15907	#[target_feature(enable = "avx512fp16")]
15908	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15909	#[rustc_legacy_const_generics(`4`)]
15910	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15911	pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
15912	src: __m128,
15913	k: __mmask8,
15914	a: __m128,
15915	b: __m128h,
15916	) -> __m128 {
15917	unsafe {
15918	static_assert_sae!(SAE);
15919	vcvtsh2ss(a, b, src, k, SAE)
15920	}
15921	}
15922
15923	/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15924	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15925	/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15926	/// of dst.
15927	///
15928	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15929	///
15930	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
15931	#[inline]
15932	#[target_feature(enable = "avx512fp16")]
15933	#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = `8`))]
15934	#[rustc_legacy_const_generics(`3`)]
15935	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15936	pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15937	static_assert_sae!(SAE);
15938	_mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_set_ss(`0.0`), k, a, b)
15939	}
15940
15941	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15942	/// floating-point elements, and store the results in dst.
15943	///
15944	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
15945	#[inline]
15946	#[target_feature(enable = "avx512fp16,avx512vl")]
15947	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15948	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15949	pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
15950	_mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:`0xff`, a)
15951	}
15952
15953	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15954	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15955	/// dst when the corresponding mask bit is not set).
15956	///
15957	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
15958	#[inline]
15959	#[target_feature(enable = "avx512fp16,avx512vl")]
15960	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15961	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15962	pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
15963	unsafe { vcvtph2pd_128(a, src, k) }
15964	}
15965
15966	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15967	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15968	/// corresponding mask bit is not set).
15969	///
15970	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
15971	#[inline]
15972	#[target_feature(enable = "avx512fp16,avx512vl")]
15973	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15974	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15975	pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
15976	_mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
15977	}
15978
15979	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15980	/// floating-point elements, and store the results in dst.
15981	///
15982	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
15983	#[inline]
15984	#[target_feature(enable = "avx512fp16,avx512vl")]
15985	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15986	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15987	pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
15988	_mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:`0xff`, a)
15989	}
15990
15991	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15992	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15993	/// dst when the corresponding mask bit is not set).
15994	///
15995	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
15996	#[inline]
15997	#[target_feature(enable = "avx512fp16,avx512vl")]
15998	#[cfg_attr(test, assert_instr(vcvtph2pd))]
15999	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16000	pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16001	unsafe { vcvtph2pd_256(a, src, k) }
16002	}
16003
16004	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16005	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16006	/// corresponding mask bit is not set).
16007	///
16008	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16009	#[inline]
16010	#[target_feature(enable = "avx512fp16,avx512vl")]
16011	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16012	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16013	pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16014	_mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16015	}
16016
16017	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16018	/// floating-point elements, and store the results in dst.
16019	///
16020	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16021	#[inline]
16022	#[target_feature(enable = "avx512fp16")]
16023	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16024	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16025	pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16026	_mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:`0xff`, a)
16027	}
16028
16029	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16030	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16031	/// dst when the corresponding mask bit is not set).
16032	///
16033	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16034	#[inline]
16035	#[target_feature(enable = "avx512fp16")]
16036	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16037	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16038	pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16039	unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16040	}
16041
16042	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16043	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16044	/// corresponding mask bit is not set).
16045	///
16046	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16047	#[inline]
16048	#[target_feature(enable = "avx512fp16")]
16049	#[cfg_attr(test, assert_instr(vcvtph2pd))]
16050	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16051	pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16052	_mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16053	}
16054
16055	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16056	/// floating-point elements, and store the results in dst.
16057	///
16058	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16059	///
16060	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16061	#[inline]
16062	#[target_feature(enable = "avx512fp16")]
16063	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16064	#[rustc_legacy_const_generics(`1`)]
16065	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16066	pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16067	static_assert_sae!(SAE);
16068	_mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:`0xff`, a)
16069	}
16070
16071	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16072	/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16073	/// dst when the corresponding mask bit is not set).
16074	///
16075	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16076	///
16077	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16078	#[inline]
16079	#[target_feature(enable = "avx512fp16")]
16080	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16081	#[rustc_legacy_const_generics(`3`)]
16082	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16083	pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16084	src: __m512d,
16085	k: __mmask8,
16086	a: __m128h,
16087	) -> __m512d {
16088	unsafe {
16089	static_assert_sae!(SAE);
16090	vcvtph2pd_512(a, src, k, SAE)
16091	}
16092	}
16093
16094	/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16095	/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16096	/// corresponding mask bit is not set).
16097	///
16098	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16099	///
16100	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16101	#[inline]
16102	#[target_feature(enable = "avx512fp16")]
16103	#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = `8`))]
16104	#[rustc_legacy_const_generics(`2`)]
16105	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16106	pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16107	static_assert_sae!(SAE);
16108	_mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16109	}
16110
16111	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16112	/// floating-point element, store the result in the lower element of dst, and copy the upper element
16113	/// from a to the upper element of dst.
16114	///
16115	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16116	#[inline]
16117	#[target_feature(enable = "avx512fp16")]
16118	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16119	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16120	pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16121	_mm_mask_cvtsh_sd(src:a, k:`0xff`, a, b)
16122	}
16123
16124	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16125	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16126	/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16127	/// of dst.
16128	///
16129	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16130	#[inline]
16131	#[target_feature(enable = "avx512fp16")]
16132	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16133	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16134	pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16135	unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16136	}
16137
16138	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16139	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16140	/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16141	///
16142	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16143	#[inline]
16144	#[target_feature(enable = "avx512fp16")]
16145	#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16146	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16147	pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16148	_mm_mask_cvtsh_sd(src:_mm_set_sd(`0.0`), k, a, b)
16149	}
16150
16151	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16152	/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16153	/// to the upper element of dst.
16154	///
16155	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16156	///
16157	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16158	#[inline]
16159	#[target_feature(enable = "avx512fp16")]
16160	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16161	#[rustc_legacy_const_generics(`2`)]
16162	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16163	pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16164	static_assert_sae!(SAE);
16165	_mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:`0xff`, a, b)
16166	}
16167
16168	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16169	/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16170	/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16171	/// of dst.
16172	///
16173	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16174	///
16175	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16176	#[inline]
16177	#[target_feature(enable = "avx512fp16")]
16178	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16179	#[rustc_legacy_const_generics(`4`)]
16180	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16181	pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16182	src: __m128d,
16183	k: __mmask8,
16184	a: __m128d,
16185	b: __m128h,
16186	) -> __m128d {
16187	unsafe {
16188	static_assert_sae!(SAE);
16189	vcvtsh2sd(a, b, src, k, SAE)
16190	}
16191	}
16192
16193	/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16194	/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16195	/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16196	///
16197	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16198	///
16199	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16200	#[inline]
16201	#[target_feature(enable = "avx512fp16")]
16202	#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = `8`))]
16203	#[rustc_legacy_const_generics(`3`)]
16204	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16205	pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16206	static_assert_sae!(SAE);
16207	_mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_set_sd(`0.0`), k, a, b)
16208	}
16209
16210	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16211	///
16212	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16213	#[inline]
16214	#[target_feature(enable = "avx512fp16")]
16215	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16216	pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
16217	unsafe { simd_extract!(a, `0`) }
16218	}
16219
16220	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16221	///
16222	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16223	#[inline]
16224	#[target_feature(enable = "avx512fp16")]
16225	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16226	pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16227	unsafe { simd_extract!(a, `0`) }
16228	}
16229
16230	/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16231	///
16232	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16233	#[inline]
16234	#[target_feature(enable = "avx512fp16")]
16235	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16236	pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16237	unsafe { simd_extract!(a, `0`) }
16238	}
16239
16240	/// Copy the lower 16-bit integer in a to dst.
16241	///
16242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16243	#[inline]
16244	#[target_feature(enable = "avx512fp16")]
16245	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16246	pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16247	unsafe { simd_extract!(a.as_i16x8(), `0`) }
16248	}
16249
16250	/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16251	///
16252	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16253	#[inline]
16254	#[target_feature(enable = "avx512fp16")]
16255	#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16256	pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16257	unsafe { transmute(src:simd_insert!(i16x8::ZERO, `0`, a)) }
16258	}
16259
16260	#[allow(improper_ctypes)]
16261	unsafe extern "C" {
16262	#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16263	unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16264	#[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16265	unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16266
16267	#[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16268	unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16269	#[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16270	unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16271	#[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16272	unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16273	#[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16274	unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16275
16276	#[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16277	unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16278	#[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16279	unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16280	#[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16281	unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16282	#[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16283	unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16284
16285	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16286	unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16287	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16288	unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16289	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16290	unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16291	#[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16292	unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16293
16294	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16295	unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16296	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16297	unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16298	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16299	unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16300	#[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16301	unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16302
16303	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16304	unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16305	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16306	unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16307	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16308	unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16309	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16310	unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16311	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16312	unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16313	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16314	unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16315	#[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16316	unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16317	#[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16318	unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16319
16320	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16321	unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16322	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16323	unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16324	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16325	unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16326	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16327	unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16328	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16329	unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16330	-> __m512;
16331	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16332	unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16333	-> __m512;
16334	#[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16335	unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16336	#[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16337	unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16338
16339	#[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16340	unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16341	#[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16342	unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16343
16344	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
16345	unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
16346	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
16347	unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
16348	#[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16349	unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16350
16351	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16352	unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16353	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16354	unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16355	#[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16356	unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16357	#[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16358	unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16359
16360	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16361	unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16362	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16363	unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16364	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16365	unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16366	#[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16367	unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16368
16369	#[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16370	unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16371	#[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16372	unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16373
16374	#[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16375	unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16376	#[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16377	unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16378	#[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16379	unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16380	#[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16381	unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16382
16383	#[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16384	unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16385	#[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16386	unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16387	#[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16388	unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16389	#[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16390	unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16391
16392	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16393	unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16394	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16395	unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16396	#[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16397	unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16398	#[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16399	unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16400
16401	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16402	unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16403	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16404	unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16405	#[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16406	unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16407	#[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16408	unsafefn vgetmantsh(
16409	a: __m128h,
16410	b: __m128h,
16411	imm8: i32,
16412	src: __m128h,
16413	k: __mmask8,
16414	sae: i32,
16415	) -> __m128h;
16416
16417	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16418	unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16419	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16420	unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16421	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16422	unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16423	#[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16424	unsafefn vrndscalesh(
16425	a: __m128h,
16426	b: __m128h,
16427	src: __m128h,
16428	k: __mmask8,
16429	imm8: i32,
16430	sae: i32,
16431	) -> __m128h;
16432
16433	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16434	unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16435	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16436	unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16437	#[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16438	unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16439	#[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16440	unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16441
16442	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16443	unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16444	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16445	unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16446	#[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16447	unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16448	#[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16449	unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16450	-> __m128h;
16451
16452	#[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16453	unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16454
16455	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16456	unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16457	#[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16458	unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16459	#[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16460	unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16461	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16"]
16462	unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16463	#[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16"]
16464	unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16465	#[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16"]
16466	unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16467
16468	#[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16469	unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16470	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16471	unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16472	#[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16473	unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16474	#[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16475	unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16476	#[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16477	unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16478	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32"]
16479	unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16480	#[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32"]
16481	unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16482	#[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16483	unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16484
16485	#[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16486	unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16487	#[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16488	unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16489	#[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16490	unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16491	#[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16492	unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16493	#[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16494	unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16495	#[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64"]
16496	unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16497
16498	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16499	unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16500	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16501	unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16502	#[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16503	unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16504	#[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16505	unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16506
16507	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16508	unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16509	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16510	unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16511	#[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16512	unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16513	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16514	unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16515
16516	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16517	unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16518	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16519	unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16520	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16521	unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16522	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16523	unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16524	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16525	unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16526	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16527	unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16528
16529	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16530	unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16531	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16532	unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16533	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16534	unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16535	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16536	unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16537	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16538	unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16539	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16540	unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16541
16542	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16543	unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16544	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16545	unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16546	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16547	unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16548	#[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16549	unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16550	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16551	unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16552	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16553	unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16554	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16555	unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16556	#[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16557	unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16558
16559	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16560	unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16561	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16562	unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16563	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16564	unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16565	#[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16566	unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16567	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16568	unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16569	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16570	unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16571	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16572	unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16573	#[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16574	unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16575
16576	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16577	unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16578	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16579	unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16580	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16581	unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16582	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16583	unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16584	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16585	unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16586	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16587	unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16588
16589	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16590	unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16591	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16592	unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16593	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16594	unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16595	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16596	unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16597	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16598	unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16599	#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16600	unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16601
16602	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16603	unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16604	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16605	unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16606	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16607	unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16608	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16609	unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16610
16611	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16612	unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16613	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16614	unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16615	#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16616	unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16617	#[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16618	unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16619
16620	}
16621
16622	#[cfg(test)]
16623	mod tests {
16624	use crate::core_arch::x86::*;
16625	use crate::mem::transmute;
16626	use crate::ptr::{addr_of, addr_of_mut};
16627	use stdarch_test::simd_test;
16628
16629	#[target_feature(enable = "avx512fp16")]
16630	unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16631	_mm_setr_ph(re, im, re, im, re, im, re, im)
16632	}
16633
16634	#[target_feature(enable = "avx512fp16")]
16635	unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16636	_mm256_setr_ph(
16637	re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16638	)
16639	}
16640
16641	#[target_feature(enable = "avx512fp16")]
16642	unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16643	_mm512_setr_ph(
16644	re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16645	re, im, re, im, re, im, re, im, re, im,
16646	)
16647	}
16648
16649	#[simd_test(enable = "avx512fp16,avx512vl")]
16650	unsafe fn test_mm_set_ph() {
16651	let r = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
16652	let e = _mm_setr_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
16653	assert_eq_m128h(r, e);
16654	}
16655
16656	#[simd_test(enable = "avx512fp16,avx512vl")]
16657	unsafe fn test_mm256_set_ph() {
16658	let r = _mm256_set_ph(
16659	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16660	);
16661	let e = _mm256_setr_ph(
16662	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
16663	);
16664	assert_eq_m256h(r, e);
16665	}
16666
16667	#[simd_test(enable = "avx512fp16")]
16668	unsafe fn test_mm512_set_ph() {
16669	let r = _mm512_set_ph(
16670	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16671	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
16672	`31.0`, `32.0`,
16673	);
16674	let e = _mm512_setr_ph(
16675	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
16676	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
16677	`3.0`, `2.0`, `1.0`,
16678	);
16679	assert_eq_m512h(r, e);
16680	}
16681
16682	#[simd_test(enable = "avx512fp16,avx512vl")]
16683	unsafe fn test_mm_set_sh() {
16684	let r = _mm_set_sh(`1.0`);
16685	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`);
16686	assert_eq_m128h(r, e);
16687	}
16688
16689	#[simd_test(enable = "avx512fp16,avx512vl")]
16690	unsafe fn test_mm_set1_ph() {
16691	let r = _mm_set1_ph(`1.0`);
16692	let e = _mm_set_ph(`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`);
16693	assert_eq_m128h(r, e);
16694	}
16695
16696	#[simd_test(enable = "avx512fp16,avx512vl")]
16697	unsafe fn test_mm256_set1_ph() {
16698	let r = _mm256_set1_ph(`1.0`);
16699	let e = _mm256_set_ph(
16700	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16701	);
16702	assert_eq_m256h(r, e);
16703	}
16704
16705	#[simd_test(enable = "avx512fp16")]
16706	unsafe fn test_mm512_set1_ph() {
16707	let r = _mm512_set1_ph(`1.0`);
16708	let e = _mm512_set_ph(
16709	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16710	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
16711	);
16712	assert_eq_m512h(r, e);
16713	}
16714
16715	#[simd_test(enable = "avx512fp16,avx512vl")]
16716	unsafe fn test_mm_setr_ph() {
16717	let r = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
16718	let e = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
16719	assert_eq_m128h(r, e);
16720	}
16721
16722	#[simd_test(enable = "avx512fp16,avx512vl")]
16723	unsafe fn test_mm256_setr_ph() {
16724	let r = _mm256_setr_ph(
16725	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16726	);
16727	let e = _mm256_set_ph(
16728	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
16729	);
16730	assert_eq_m256h(r, e);
16731	}
16732
16733	#[simd_test(enable = "avx512fp16")]
16734	unsafe fn test_mm512_setr_ph() {
16735	let r = _mm512_setr_ph(
16736	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
16737	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
16738	`31.0`, `32.0`,
16739	);
16740	let e = _mm512_set_ph(
16741	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
16742	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
16743	`3.0`, `2.0`, `1.0`,
16744	);
16745	assert_eq_m512h(r, e);
16746	}
16747
16748	#[simd_test(enable = "avx512fp16,avx512vl")]
16749	unsafe fn test_mm_setzero_ph() {
16750	let r = _mm_setzero_ph();
16751	let e = _mm_set1_ph(`0.0`);
16752	assert_eq_m128h(r, e);
16753	}
16754
16755	#[simd_test(enable = "avx512fp16,avx512vl")]
16756	unsafe fn test_mm256_setzero_ph() {
16757	let r = _mm256_setzero_ph();
16758	let e = _mm256_set1_ph(`0.0`);
16759	assert_eq_m256h(r, e);
16760	}
16761
16762	#[simd_test(enable = "avx512fp16")]
16763	unsafe fn test_mm512_setzero_ph() {
16764	let r = _mm512_setzero_ph();
16765	let e = _mm512_set1_ph(`0.0`);
16766	assert_eq_m512h(r, e);
16767	}
16768
16769	#[simd_test(enable = "avx512fp16,avx512vl")]
16770	unsafe fn test_mm_castsi128_ph() {
16771	let a = _mm_set1_epi16(`0x3c00`);
16772	let r = _mm_castsi128_ph(a);
16773	let e = _mm_set1_ph(`1.0`);
16774	assert_eq_m128h(r, e);
16775	}
16776
16777	#[simd_test(enable = "avx512fp16,avx512vl")]
16778	unsafe fn test_mm256_castsi256_ph() {
16779	let a = _mm256_set1_epi16(`0x3c00`);
16780	let r = _mm256_castsi256_ph(a);
16781	let e = _mm256_set1_ph(`1.0`);
16782	assert_eq_m256h(r, e);
16783	}
16784
16785	#[simd_test(enable = "avx512fp16")]
16786	unsafe fn test_mm512_castsi512_ph() {
16787	let a = _mm512_set1_epi16(`0x3c00`);
16788	let r = _mm512_castsi512_ph(a);
16789	let e = _mm512_set1_ph(`1.0`);
16790	assert_eq_m512h(r, e);
16791	}
16792
16793	#[simd_test(enable = "avx512fp16")]
16794	unsafe fn test_mm_castph_si128() {
16795	let a = _mm_set1_ph(`1.0`);
16796	let r = _mm_castph_si128(a);
16797	let e = _mm_set1_epi16(`0x3c00`);
16798	assert_eq_m128i(r, e);
16799	}
16800
16801	#[simd_test(enable = "avx512fp16")]
16802	unsafe fn test_mm256_castph_si256() {
16803	let a = _mm256_set1_ph(`1.0`);
16804	let r = _mm256_castph_si256(a);
16805	let e = _mm256_set1_epi16(`0x3c00`);
16806	assert_eq_m256i(r, e);
16807	}
16808
16809	#[simd_test(enable = "avx512fp16")]
16810	unsafe fn test_mm512_castph_si512() {
16811	let a = _mm512_set1_ph(`1.0`);
16812	let r = _mm512_castph_si512(a);
16813	let e = _mm512_set1_epi16(`0x3c00`);
16814	assert_eq_m512i(r, e);
16815	}
16816
16817	#[simd_test(enable = "avx512fp16,avx512vl")]
16818	unsafe fn test_mm_castps_ph() {
16819	let a = _mm_castsi128_ps(_mm_set1_epi16(`0x3c00`));
16820	let r = _mm_castps_ph(a);
16821	let e = _mm_set1_ph(`1.0`);
16822	assert_eq_m128h(r, e);
16823	}
16824
16825	#[simd_test(enable = "avx512fp16,avx512vl")]
16826	unsafe fn test_mm256_castps_ph() {
16827	let a = _mm256_castsi256_ps(_mm256_set1_epi16(`0x3c00`));
16828	let r = _mm256_castps_ph(a);
16829	let e = _mm256_set1_ph(`1.0`);
16830	assert_eq_m256h(r, e);
16831	}
16832
16833	#[simd_test(enable = "avx512fp16")]
16834	unsafe fn test_mm512_castps_ph() {
16835	let a = _mm512_castsi512_ps(_mm512_set1_epi16(`0x3c00`));
16836	let r = _mm512_castps_ph(a);
16837	let e = _mm512_set1_ph(`1.0`);
16838	assert_eq_m512h(r, e);
16839	}
16840
16841	#[simd_test(enable = "avx512fp16")]
16842	unsafe fn test_mm_castph_ps() {
16843	let a = _mm_castsi128_ph(_mm_set1_epi32(`0x3f800000`));
16844	let r = _mm_castph_ps(a);
16845	let e = _mm_set1_ps(`1.0`);
16846	assert_eq_m128(r, e);
16847	}
16848
16849	#[simd_test(enable = "avx512fp16")]
16850	unsafe fn test_mm256_castph_ps() {
16851	let a = _mm256_castsi256_ph(_mm256_set1_epi32(`0x3f800000`));
16852	let r = _mm256_castph_ps(a);
16853	let e = _mm256_set1_ps(`1.0`);
16854	assert_eq_m256(r, e);
16855	}
16856
16857	#[simd_test(enable = "avx512fp16")]
16858	unsafe fn test_mm512_castph_ps() {
16859	let a = _mm512_castsi512_ph(_mm512_set1_epi32(`0x3f800000`));
16860	let r = _mm512_castph_ps(a);
16861	let e = _mm512_set1_ps(`1.0`);
16862	assert_eq_m512(r, e);
16863	}
16864
16865	#[simd_test(enable = "avx512fp16,avx512vl")]
16866	unsafe fn test_mm_castpd_ph() {
16867	let a = _mm_castsi128_pd(_mm_set1_epi16(`0x3c00`));
16868	let r = _mm_castpd_ph(a);
16869	let e = _mm_set1_ph(`1.0`);
16870	assert_eq_m128h(r, e);
16871	}
16872
16873	#[simd_test(enable = "avx512fp16,avx512vl")]
16874	unsafe fn test_mm256_castpd_ph() {
16875	let a = _mm256_castsi256_pd(_mm256_set1_epi16(`0x3c00`));
16876	let r = _mm256_castpd_ph(a);
16877	let e = _mm256_set1_ph(`1.0`);
16878	assert_eq_m256h(r, e);
16879	}
16880
16881	#[simd_test(enable = "avx512fp16")]
16882	unsafe fn test_mm512_castpd_ph() {
16883	let a = _mm512_castsi512_pd(_mm512_set1_epi16(`0x3c00`));
16884	let r = _mm512_castpd_ph(a);
16885	let e = _mm512_set1_ph(`1.0`);
16886	assert_eq_m512h(r, e);
16887	}
16888
16889	#[simd_test(enable = "avx512fp16")]
16890	unsafe fn test_mm_castph_pd() {
16891	let a = _mm_castsi128_ph(_mm_set1_epi64x(`0x3ff0000000000000`));
16892	let r = _mm_castph_pd(a);
16893	let e = _mm_set1_pd(`1.0`);
16894	assert_eq_m128d(r, e);
16895	}
16896
16897	#[simd_test(enable = "avx512fp16")]
16898	unsafe fn test_mm256_castph_pd() {
16899	let a = _mm256_castsi256_ph(_mm256_set1_epi64x(`0x3ff0000000000000`));
16900	let r = _mm256_castph_pd(a);
16901	let e = _mm256_set1_pd(`1.0`);
16902	assert_eq_m256d(r, e);
16903	}
16904
16905	#[simd_test(enable = "avx512fp16")]
16906	unsafe fn test_mm512_castph_pd() {
16907	let a = _mm512_castsi512_ph(_mm512_set1_epi64(`0x3ff0000000000000`));
16908	let r = _mm512_castph_pd(a);
16909	let e = _mm512_set1_pd(`1.0`);
16910	assert_eq_m512d(r, e);
16911	}
16912
16913	#[simd_test(enable = "avx512fp16,avx512vl")]
16914	unsafe fn test_mm256_castph256_ph128() {
16915	let a = _mm256_setr_ph(
16916	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16917	);
16918	let r = _mm256_castph256_ph128(a);
16919	let e = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16920	assert_eq_m128h(r, e);
16921	}
16922
16923	#[simd_test(enable = "avx512fp16,avx512vl")]
16924	unsafe fn test_mm512_castph512_ph128() {
16925	let a = _mm512_setr_ph(
16926	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`,
16927	`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
16928	);
16929	let r = _mm512_castph512_ph128(a);
16930	let e = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16931	assert_eq_m128h(r, e);
16932	}
16933
16934	#[simd_test(enable = "avx512fp16,avx512vl")]
16935	unsafe fn test_mm512_castph512_ph256() {
16936	let a = _mm512_setr_ph(
16937	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`,
16938	`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
16939	);
16940	let r = _mm512_castph512_ph256(a);
16941	let e = _mm256_setr_ph(
16942	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16943	);
16944	assert_eq_m256h(r, e);
16945	}
16946
16947	#[simd_test(enable = "avx512fp16,avx512vl")]
16948	unsafe fn test_mm256_castph128_ph256() {
16949	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16950	let r = _mm256_castph128_ph256(a);
16951	assert_eq_m128h(_mm256_castph256_ph128(r), a);
16952	}
16953
16954	#[simd_test(enable = "avx512fp16,avx512vl")]
16955	unsafe fn test_mm512_castph128_ph512() {
16956	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16957	let r = _mm512_castph128_ph512(a);
16958	assert_eq_m128h(_mm512_castph512_ph128(r), a);
16959	}
16960
16961	#[simd_test(enable = "avx512fp16,avx512vl")]
16962	unsafe fn test_mm512_castph256_ph512() {
16963	let a = _mm256_setr_ph(
16964	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16965	);
16966	let r = _mm512_castph256_ph512(a);
16967	assert_eq_m256h(_mm512_castph512_ph256(r), a);
16968	}
16969
16970	#[simd_test(enable = "avx512fp16,avx512vl")]
16971	unsafe fn test_mm256_zextph128_ph256() {
16972	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16973	let r = _mm256_zextph128_ph256(a);
16974	let e = _mm256_setr_ph(
16975	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16976	);
16977	assert_eq_m256h(r, e);
16978	}
16979
16980	#[simd_test(enable = "avx512fp16")]
16981	unsafe fn test_mm512_zextph128_ph512() {
16982	let a = _mm_setr_ph(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
16983	let r = _mm512_zextph128_ph512(a);
16984	let e = _mm512_setr_ph(
16985	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16986	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
16987	);
16988	assert_eq_m512h(r, e);
16989	}
16990
16991	#[simd_test(enable = "avx512fp16")]
16992	unsafe fn test_mm512_zextph256_ph512() {
16993	let a = _mm256_setr_ph(
16994	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
16995	);
16996	let r = _mm512_zextph256_ph512(a);
16997	let e = _mm512_setr_ph(
16998	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `0.`, `0.`, `0.`, `0.`,
16999	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
17000	);
17001	assert_eq_m512h(r, e);
17002	}
17003
17004	#[simd_test(enable = "avx512fp16,avx512vl")]
17005	unsafe fn test_mm_cmp_ph_mask() {
17006	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17007	let b = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
17008	let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17009	assert_eq!(r, `0b11110000`);
17010	}
17011
17012	#[simd_test(enable = "avx512fp16,avx512vl")]
17013	unsafe fn test_mm_mask_cmp_ph_mask() {
17014	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17015	let b = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
17016	let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b01010101`, a, b);
17017	assert_eq!(r, `0b01010000`);
17018	}
17019
17020	#[simd_test(enable = "avx512fp16,avx512vl")]
17021	unsafe fn test_mm256_cmp_ph_mask() {
17022	let a = _mm256_set_ph(
17023	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17024	);
17025	let b = _mm256_set_ph(
17026	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17027	`-16.0`,
17028	);
17029	let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17030	assert_eq!(r, `0b1111000011110000`);
17031	}
17032
17033	#[simd_test(enable = "avx512fp16,avx512vl")]
17034	unsafe fn test_mm256_mask_cmp_ph_mask() {
17035	let a = _mm256_set_ph(
17036	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17037	);
17038	let b = _mm256_set_ph(
17039	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17040	`-16.0`,
17041	);
17042	let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b0101010101010101`, a, b);
17043	assert_eq!(r, `0b0101000001010000`);
17044	}
17045
17046	#[simd_test(enable = "avx512fp16")]
17047	unsafe fn test_mm512_cmp_ph_mask() {
17048	let a = _mm512_set_ph(
17049	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17050	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17051	`31.0`, `32.0`,
17052	);
17053	let b = _mm512_set_ph(
17054	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17055	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17056	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17057	);
17058	let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17059	assert_eq!(r, `0b11110000111100001111000011110000`);
17060	}
17061
17062	#[simd_test(enable = "avx512fp16")]
17063	unsafe fn test_mm512_mask_cmp_ph_mask() {
17064	let a = _mm512_set_ph(
17065	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17066	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17067	`31.0`, `32.0`,
17068	);
17069	let b = _mm512_set_ph(
17070	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17071	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17072	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17073	);
17074	let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(`0b01010101010101010101010101010101`, a, b);
17075	assert_eq!(r, `0b01010000010100000101000001010000`);
17076	}
17077
17078	#[simd_test(enable = "avx512fp16")]
17079	unsafe fn test_mm512_cmp_round_ph_mask() {
17080	let a = _mm512_set_ph(
17081	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17082	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17083	`31.0`, `32.0`,
17084	);
17085	let b = _mm512_set_ph(
17086	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17087	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17088	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17089	);
17090	let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17091	assert_eq!(r, `0b11110000111100001111000011110000`);
17092	}
17093
17094	#[simd_test(enable = "avx512fp16")]
17095	unsafe fn test_mm512_mask_cmp_round_ph_mask() {
17096	let a = _mm512_set_ph(
17097	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17098	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17099	`31.0`, `32.0`,
17100	);
17101	let b = _mm512_set_ph(
17102	`1.0`, `2.0`, `3.0`, `4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `-13.0`, `-14.0`, `-15.0`,
17103	`-16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
17104	`-29.0`, `-30.0`, `-31.0`, `-32.0`,
17105	);
17106	let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17107	`0b01010101010101010101010101010101`,
17108	a,
17109	b,
17110	);
17111	assert_eq!(r, `0b01010000010100000101000001010000`);
17112	}
17113
17114	#[simd_test(enable = "avx512fp16")]
17115	unsafe fn test_mm_cmp_round_sh_mask() {
17116	let a = _mm_set_sh(`1.0`);
17117	let b = _mm_set_sh(`1.0`);
17118	let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17119	assert_eq!(r, `1`);
17120	}
17121
17122	#[simd_test(enable = "avx512fp16")]
17123	unsafe fn test_mm_mask_cmp_round_sh_mask() {
17124	let a = _mm_set_sh(`1.0`);
17125	let b = _mm_set_sh(`1.0`);
17126	let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(`0`, a, b);
17127	assert_eq!(r, `0`);
17128	}
17129
17130	#[simd_test(enable = "avx512fp16")]
17131	unsafe fn test_mm_cmp_sh_mask() {
17132	let a = _mm_set_sh(`1.0`);
17133	let b = _mm_set_sh(`1.0`);
17134	let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17135	assert_eq!(r, `1`);
17136	}
17137
17138	#[simd_test(enable = "avx512fp16")]
17139	unsafe fn test_mm_mask_cmp_sh_mask() {
17140	let a = _mm_set_sh(`1.0`);
17141	let b = _mm_set_sh(`1.0`);
17142	let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(`0`, a, b);
17143	assert_eq!(r, `0`);
17144	}
17145
17146	#[simd_test(enable = "avx512fp16")]
17147	unsafe fn test_mm_comi_round_sh() {
17148	let a = _mm_set_sh(`1.0`);
17149	let b = _mm_set_sh(`1.0`);
17150	let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17151	assert_eq!(r, `1`);
17152	}
17153
17154	#[simd_test(enable = "avx512fp16")]
17155	unsafe fn test_mm_comi_sh() {
17156	let a = _mm_set_sh(`1.0`);
17157	let b = _mm_set_sh(`1.0`);
17158	let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17159	assert_eq!(r, `1`);
17160	}
17161
17162	#[simd_test(enable = "avx512fp16")]
17163	unsafe fn test_mm_comieq_sh() {
17164	let a = _mm_set_sh(`1.0`);
17165	let b = _mm_set_sh(`1.0`);
17166	let r = _mm_comieq_sh(a, b);
17167	assert_eq!(r, `1`);
17168	}
17169
17170	#[simd_test(enable = "avx512fp16")]
17171	unsafe fn test_mm_comige_sh() {
17172	let a = _mm_set_sh(`2.0`);
17173	let b = _mm_set_sh(`1.0`);
17174	let r = _mm_comige_sh(a, b);
17175	assert_eq!(r, `1`);
17176	}
17177
17178	#[simd_test(enable = "avx512fp16")]
17179	unsafe fn test_mm_comigt_sh() {
17180	let a = _mm_set_sh(`2.0`);
17181	let b = _mm_set_sh(`1.0`);
17182	let r = _mm_comigt_sh(a, b);
17183	assert_eq!(r, `1`);
17184	}
17185
17186	#[simd_test(enable = "avx512fp16")]
17187	unsafe fn test_mm_comile_sh() {
17188	let a = _mm_set_sh(`1.0`);
17189	let b = _mm_set_sh(`2.0`);
17190	let r = _mm_comile_sh(a, b);
17191	assert_eq!(r, `1`);
17192	}
17193
17194	#[simd_test(enable = "avx512fp16")]
17195	unsafe fn test_mm_comilt_sh() {
17196	let a = _mm_set_sh(`1.0`);
17197	let b = _mm_set_sh(`2.0`);
17198	let r = _mm_comilt_sh(a, b);
17199	assert_eq!(r, `1`);
17200	}
17201
17202	#[simd_test(enable = "avx512fp16")]
17203	unsafe fn test_mm_comineq_sh() {
17204	let a = _mm_set_sh(`1.0`);
17205	let b = _mm_set_sh(`2.0`);
17206	let r = _mm_comineq_sh(a, b);
17207	assert_eq!(r, `1`);
17208	}
17209
17210	#[simd_test(enable = "avx512fp16")]
17211	unsafe fn test_mm_ucomieq_sh() {
17212	let a = _mm_set_sh(`1.0`);
17213	let b = _mm_set_sh(`1.0`);
17214	let r = _mm_ucomieq_sh(a, b);
17215	assert_eq!(r, `1`);
17216	}
17217
17218	#[simd_test(enable = "avx512fp16")]
17219	unsafe fn test_mm_ucomige_sh() {
17220	let a = _mm_set_sh(`2.0`);
17221	let b = _mm_set_sh(`1.0`);
17222	let r = _mm_ucomige_sh(a, b);
17223	assert_eq!(r, `1`);
17224	}
17225
17226	#[simd_test(enable = "avx512fp16")]
17227	unsafe fn test_mm_ucomigt_sh() {
17228	let a = _mm_set_sh(`2.0`);
17229	let b = _mm_set_sh(`1.0`);
17230	let r = _mm_ucomigt_sh(a, b);
17231	assert_eq!(r, `1`);
17232	}
17233
17234	#[simd_test(enable = "avx512fp16")]
17235	unsafe fn test_mm_ucomile_sh() {
17236	let a = _mm_set_sh(`1.0`);
17237	let b = _mm_set_sh(`2.0`);
17238	let r = _mm_ucomile_sh(a, b);
17239	assert_eq!(r, `1`);
17240	}
17241
17242	#[simd_test(enable = "avx512fp16")]
17243	unsafe fn test_mm_ucomilt_sh() {
17244	let a = _mm_set_sh(`1.0`);
17245	let b = _mm_set_sh(`2.0`);
17246	let r = _mm_ucomilt_sh(a, b);
17247	assert_eq!(r, `1`);
17248	}
17249
17250	#[simd_test(enable = "avx512fp16")]
17251	unsafe fn test_mm_ucomineq_sh() {
17252	let a = _mm_set_sh(`1.0`);
17253	let b = _mm_set_sh(`2.0`);
17254	let r = _mm_ucomineq_sh(a, b);
17255	assert_eq!(r, `1`);
17256	}
17257
17258	#[simd_test(enable = "avx512fp16,avx512vl")]
17259	unsafe fn test_mm_load_ph() {
17260	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17261	let b = _mm_load_ph(addr_of!(a).cast());
17262	assert_eq_m128h(a, b);
17263	}
17264
17265	#[simd_test(enable = "avx512fp16,avx512vl")]
17266	unsafe fn test_mm256_load_ph() {
17267	let a = _mm256_set_ph(
17268	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17269	);
17270	let b = _mm256_load_ph(addr_of!(a).cast());
17271	assert_eq_m256h(a, b);
17272	}
17273
17274	#[simd_test(enable = "avx512fp16")]
17275	unsafe fn test_mm512_load_ph() {
17276	let a = _mm512_set_ph(
17277	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17278	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17279	`31.0`, `32.0`,
17280	);
17281	let b = _mm512_load_ph(addr_of!(a).cast());
17282	assert_eq_m512h(a, b);
17283	}
17284
17285	#[simd_test(enable = "avx512fp16")]
17286	unsafe fn test_mm_load_sh() {
17287	let a = _mm_set_sh(`1.0`);
17288	let b = _mm_load_sh(addr_of!(a).cast());
17289	assert_eq_m128h(a, b);
17290	}
17291
17292	#[simd_test(enable = "avx512fp16")]
17293	unsafe fn test_mm_mask_load_sh() {
17294	let a = _mm_set_sh(`1.0`);
17295	let src = _mm_set_sh(`2.`);
17296	let b = _mm_mask_load_sh(src, `1`, addr_of!(a).cast());
17297	assert_eq_m128h(a, b);
17298	let b = _mm_mask_load_sh(src, `0`, addr_of!(a).cast());
17299	assert_eq_m128h(src, b);
17300	}
17301
17302	#[simd_test(enable = "avx512fp16")]
17303	unsafe fn test_mm_maskz_load_sh() {
17304	let a = _mm_set_sh(`1.0`);
17305	let b = _mm_maskz_load_sh(`1`, addr_of!(a).cast());
17306	assert_eq_m128h(a, b);
17307	let b = _mm_maskz_load_sh(`0`, addr_of!(a).cast());
17308	assert_eq_m128h(_mm_setzero_ph(), b);
17309	}
17310
17311	#[simd_test(enable = "avx512fp16,avx512vl")]
17312	unsafe fn test_mm_loadu_ph() {
17313	let array = [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
17314	let r = _mm_loadu_ph(array.as_ptr());
17315	let e = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17316	assert_eq_m128h(r, e);
17317	}
17318
17319	#[simd_test(enable = "avx512fp16,avx512vl")]
17320	unsafe fn test_mm256_loadu_ph() {
17321	let array = [
17322	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17323	];
17324	let r = _mm256_loadu_ph(array.as_ptr());
17325	let e = _mm256_setr_ph(
17326	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17327	);
17328	assert_eq_m256h(r, e);
17329	}
17330
17331	#[simd_test(enable = "avx512fp16")]
17332	unsafe fn test_mm512_loadu_ph() {
17333	let array = [
17334	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17335	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17336	`31.0`, `32.0`,
17337	];
17338	let r = _mm512_loadu_ph(array.as_ptr());
17339	let e = _mm512_setr_ph(
17340	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17341	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17342	`31.0`, `32.0`,
17343	);
17344	assert_eq_m512h(r, e);
17345	}
17346
17347	#[simd_test(enable = "avx512fp16")]
17348	unsafe fn test_mm_move_sh() {
17349	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17350	let b = _mm_set_sh(`9.0`);
17351	let r = _mm_move_sh(a, b);
17352	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `9.0`);
17353	assert_eq_m128h(r, e);
17354	}
17355
17356	#[simd_test(enable = "avx512fp16")]
17357	unsafe fn test_mm_mask_move_sh() {
17358	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17359	let b = _mm_set_sh(`9.0`);
17360	let src = _mm_set_sh(`10.0`);
17361	let r = _mm_mask_move_sh(src, `0`, a, b);
17362	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `10.0`);
17363	assert_eq_m128h(r, e);
17364	}
17365
17366	#[simd_test(enable = "avx512fp16")]
17367	unsafe fn test_mm_maskz_move_sh() {
17368	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17369	let b = _mm_set_sh(`9.0`);
17370	let r = _mm_maskz_move_sh(`0`, a, b);
17371	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `0.0`);
17372	assert_eq_m128h(r, e);
17373	}
17374
17375	#[simd_test(enable = "avx512fp16,avx512vl")]
17376	unsafe fn test_mm_store_ph() {
17377	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17378	let mut b = _mm_setzero_ph();
17379	_mm_store_ph(addr_of_mut!(b).cast(), a);
17380	assert_eq_m128h(a, b);
17381	}
17382
17383	#[simd_test(enable = "avx512fp16,avx512vl")]
17384	unsafe fn test_mm256_store_ph() {
17385	let a = _mm256_set_ph(
17386	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17387	);
17388	let mut b = _mm256_setzero_ph();
17389	_mm256_store_ph(addr_of_mut!(b).cast(), a);
17390	assert_eq_m256h(a, b);
17391	}
17392
17393	#[simd_test(enable = "avx512fp16")]
17394	unsafe fn test_mm512_store_ph() {
17395	let a = _mm512_set_ph(
17396	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17397	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17398	`31.0`, `32.0`,
17399	);
17400	let mut b = _mm512_setzero_ph();
17401	_mm512_store_ph(addr_of_mut!(b).cast(), a);
17402	assert_eq_m512h(a, b);
17403	}
17404
17405	#[simd_test(enable = "avx512fp16")]
17406	unsafe fn test_mm_store_sh() {
17407	let a = _mm_set_sh(`1.0`);
17408	let mut b = _mm_setzero_ph();
17409	_mm_store_sh(addr_of_mut!(b).cast(), a);
17410	assert_eq_m128h(a, b);
17411	}
17412
17413	#[simd_test(enable = "avx512fp16")]
17414	unsafe fn test_mm_mask_store_sh() {
17415	let a = _mm_set_sh(`1.0`);
17416	let mut b = _mm_setzero_ph();
17417	_mm_mask_store_sh(addr_of_mut!(b).cast(), `0`, a);
17418	assert_eq_m128h(_mm_setzero_ph(), b);
17419	_mm_mask_store_sh(addr_of_mut!(b).cast(), `1`, a);
17420	assert_eq_m128h(a, b);
17421	}
17422
17423	#[simd_test(enable = "avx512fp16,avx512vl")]
17424	unsafe fn test_mm_storeu_ph() {
17425	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17426	let mut array = [`0.0`; `8`];
17427	_mm_storeu_ph(array.as_mut_ptr(), a);
17428	assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
17429	}
17430
17431	#[simd_test(enable = "avx512fp16,avx512vl")]
17432	unsafe fn test_mm256_storeu_ph() {
17433	let a = _mm256_set_ph(
17434	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17435	);
17436	let mut array = [`0.0`; `16`];
17437	_mm256_storeu_ph(array.as_mut_ptr(), a);
17438	assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
17439	}
17440
17441	#[simd_test(enable = "avx512fp16")]
17442	unsafe fn test_mm512_storeu_ph() {
17443	let a = _mm512_set_ph(
17444	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17445	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17446	`31.0`, `32.0`,
17447	);
17448	let mut array = [`0.0`; `32`];
17449	_mm512_storeu_ph(array.as_mut_ptr(), a);
17450	assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
17451	}
17452
17453	#[simd_test(enable = "avx512fp16,avx512vl")]
17454	unsafe fn test_mm_add_ph() {
17455	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17456	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17457	let r = _mm_add_ph(a, b);
17458	let e = _mm_set1_ph(`9.0`);
17459	assert_eq_m128h(r, e);
17460	}
17461
17462	#[simd_test(enable = "avx512fp16,avx512vl")]
17463	unsafe fn test_mm_mask_add_ph() {
17464	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17465	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17466	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
17467	let r = _mm_mask_add_ph(src, `0b01010101`, a, b);
17468	let e = _mm_set_ph(`10.`, `9.`, `12.`, `9.`, `14.`, `9.`, `16.`, `9.`);
17469	assert_eq_m128h(r, e);
17470	}
17471
17472	#[simd_test(enable = "avx512fp16,avx512vl")]
17473	unsafe fn test_mm_maskz_add_ph() {
17474	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17475	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17476	let r = _mm_maskz_add_ph(`0b01010101`, a, b);
17477	let e = _mm_set_ph(`0.`, `9.`, `0.`, `9.`, `0.`, `9.`, `0.`, `9.`);
17478	assert_eq_m128h(r, e);
17479	}
17480
17481	#[simd_test(enable = "avx512fp16,avx512vl")]
17482	unsafe fn test_mm256_add_ph() {
17483	let a = _mm256_set_ph(
17484	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17485	);
17486	let b = _mm256_set_ph(
17487	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17488	);
17489	let r = _mm256_add_ph(a, b);
17490	let e = _mm256_set1_ph(`17.0`);
17491	assert_eq_m256h(r, e);
17492	}
17493
17494	#[simd_test(enable = "avx512fp16,avx512vl")]
17495	unsafe fn test_mm256_mask_add_ph() {
17496	let a = _mm256_set_ph(
17497	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17498	);
17499	let b = _mm256_set_ph(
17500	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17501	);
17502	let src = _mm256_set_ph(
17503	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
17504	);
17505	let r = _mm256_mask_add_ph(src, `0b0101010101010101`, a, b);
17506	let e = _mm256_set_ph(
17507	`18.`, `17.`, `20.`, `17.`, `22.`, `17.`, `24.`, `17.`, `26.`, `17.`, `28.`, `17.`, `30.`, `17.`, `32.`, `17.`,
17508	);
17509	assert_eq_m256h(r, e);
17510	}
17511
17512	#[simd_test(enable = "avx512fp16,avx512vl")]
17513	unsafe fn test_mm256_maskz_add_ph() {
17514	let a = _mm256_set_ph(
17515	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17516	);
17517	let b = _mm256_set_ph(
17518	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17519	);
17520	let r = _mm256_maskz_add_ph(`0b0101010101010101`, a, b);
17521	let e = _mm256_set_ph(
17522	`0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`, `0.`, `17.`,
17523	);
17524	assert_eq_m256h(r, e);
17525	}
17526
17527	#[simd_test(enable = "avx512fp16")]
17528	unsafe fn test_mm512_add_ph() {
17529	let a = _mm512_set_ph(
17530	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17531	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17532	`31.0`, `32.0`,
17533	);
17534	let b = _mm512_set_ph(
17535	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17536	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17537	`3.0`, `2.0`, `1.0`,
17538	);
17539	let r = _mm512_add_ph(a, b);
17540	let e = _mm512_set1_ph(`33.0`);
17541	assert_eq_m512h(r, e);
17542	}
17543
17544	#[simd_test(enable = "avx512fp16")]
17545	unsafe fn test_mm512_mask_add_ph() {
17546	let a = _mm512_set_ph(
17547	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17548	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17549	`31.0`, `32.0`,
17550	);
17551	let b = _mm512_set_ph(
17552	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17553	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17554	`3.0`, `2.0`, `1.0`,
17555	);
17556	let src = _mm512_set_ph(
17557	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17558	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17559	);
17560	let r = _mm512_mask_add_ph(src, `0b01010101010101010101010101010101`, a, b);
17561	let e = _mm512_set_ph(
17562	`34.`, `33.`, `36.`, `33.`, `38.`, `33.`, `40.`, `33.`, `42.`, `33.`, `44.`, `33.`, `46.`, `33.`, `48.`, `33.`, `50.`,
17563	`33.`, `52.`, `33.`, `54.`, `33.`, `56.`, `33.`, `58.`, `33.`, `60.`, `33.`, `62.`, `33.`, `64.`, `33.`,
17564	);
17565	assert_eq_m512h(r, e);
17566	}
17567
17568	#[simd_test(enable = "avx512fp16")]
17569	unsafe fn test_mm512_maskz_add_ph() {
17570	let a = _mm512_set_ph(
17571	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17572	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17573	`31.0`, `32.0`,
17574	);
17575	let b = _mm512_set_ph(
17576	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17577	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17578	`3.0`, `2.0`, `1.0`,
17579	);
17580	let r = _mm512_maskz_add_ph(`0b01010101010101010101010101010101`, a, b);
17581	let e = _mm512_set_ph(
17582	`0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`,
17583	`33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`,
17584	);
17585	assert_eq_m512h(r, e);
17586	}
17587
17588	#[simd_test(enable = "avx512fp16")]
17589	unsafe fn test_mm512_add_round_ph() {
17590	let a = _mm512_set_ph(
17591	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17592	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17593	`31.0`, `32.0`,
17594	);
17595	let b = _mm512_set_ph(
17596	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17597	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17598	`3.0`, `2.0`, `1.0`,
17599	);
17600	let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17601	let e = _mm512_set1_ph(`33.0`);
17602	assert_eq_m512h(r, e);
17603	}
17604
17605	#[simd_test(enable = "avx512fp16")]
17606	unsafe fn test_mm512_mask_add_round_ph() {
17607	let a = _mm512_set_ph(
17608	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17609	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17610	`31.0`, `32.0`,
17611	);
17612	let b = _mm512_set_ph(
17613	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17614	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17615	`3.0`, `2.0`, `1.0`,
17616	);
17617	let src = _mm512_set_ph(
17618	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17619	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17620	);
17621	let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17622	src,
17623	`0b01010101010101010101010101010101`,
17624	a,
17625	b,
17626	);
17627	let e = _mm512_set_ph(
17628	`34.`, `33.`, `36.`, `33.`, `38.`, `33.`, `40.`, `33.`, `42.`, `33.`, `44.`, `33.`, `46.`, `33.`, `48.`, `33.`, `50.`,
17629	`33.`, `52.`, `33.`, `54.`, `33.`, `56.`, `33.`, `58.`, `33.`, `60.`, `33.`, `62.`, `33.`, `64.`, `33.`,
17630	);
17631	assert_eq_m512h(r, e);
17632	}
17633
17634	#[simd_test(enable = "avx512fp16")]
17635	unsafe fn test_mm512_maskz_add_round_ph() {
17636	let a = _mm512_set_ph(
17637	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17638	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17639	`31.0`, `32.0`,
17640	);
17641	let b = _mm512_set_ph(
17642	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17643	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17644	`3.0`, `2.0`, `1.0`,
17645	);
17646	let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17647	`0b01010101010101010101010101010101`,
17648	a,
17649	b,
17650	);
17651	let e = _mm512_set_ph(
17652	`0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`,
17653	`33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`, `0.`, `33.`,
17654	);
17655	assert_eq_m512h(r, e);
17656	}
17657
17658	#[simd_test(enable = "avx512fp16")]
17659	unsafe fn test_mm_add_round_sh() {
17660	let a = _mm_set_sh(`1.0`);
17661	let b = _mm_set_sh(`2.0`);
17662	let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17663	let e = _mm_set_sh(`3.0`);
17664	assert_eq_m128h(r, e);
17665	}
17666
17667	#[simd_test(enable = "avx512fp16")]
17668	unsafe fn test_mm_mask_add_round_sh() {
17669	let a = _mm_set_sh(`1.0`);
17670	let b = _mm_set_sh(`2.0`);
17671	let src = _mm_set_sh(`4.0`);
17672	let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17673	src, `0`, a, b,
17674	);
17675	let e = _mm_set_sh(`4.0`);
17676	assert_eq_m128h(r, e);
17677	let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17678	src, `1`, a, b,
17679	);
17680	let e = _mm_set_sh(`3.0`);
17681	assert_eq_m128h(r, e);
17682	}
17683
17684	#[simd_test(enable = "avx512fp16")]
17685	unsafe fn test_mm_maskz_add_round_sh() {
17686	let a = _mm_set_sh(`1.0`);
17687	let b = _mm_set_sh(`2.0`);
17688	let r =
17689	_mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
17690	let e = _mm_set_sh(`0.0`);
17691	assert_eq_m128h(r, e);
17692	let r =
17693	_mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
17694	let e = _mm_set_sh(`3.0`);
17695	assert_eq_m128h(r, e);
17696	}
17697
17698	#[simd_test(enable = "avx512fp16")]
17699	unsafe fn test_mm_add_sh() {
17700	let a = _mm_set_sh(`1.0`);
17701	let b = _mm_set_sh(`2.0`);
17702	let r = _mm_add_sh(a, b);
17703	let e = _mm_set_sh(`3.0`);
17704	assert_eq_m128h(r, e);
17705	}
17706
17707	#[simd_test(enable = "avx512fp16")]
17708	unsafe fn test_mm_mask_add_sh() {
17709	let a = _mm_set_sh(`1.0`);
17710	let b = _mm_set_sh(`2.0`);
17711	let src = _mm_set_sh(`4.0`);
17712	let r = _mm_mask_add_sh(src, `0`, a, b);
17713	let e = _mm_set_sh(`4.0`);
17714	assert_eq_m128h(r, e);
17715	let r = _mm_mask_add_sh(src, `1`, a, b);
17716	let e = _mm_set_sh(`3.0`);
17717	assert_eq_m128h(r, e);
17718	}
17719
17720	#[simd_test(enable = "avx512fp16")]
17721	unsafe fn test_mm_maskz_add_sh() {
17722	let a = _mm_set_sh(`1.0`);
17723	let b = _mm_set_sh(`2.0`);
17724	let r = _mm_maskz_add_sh(`0`, a, b);
17725	let e = _mm_set_sh(`0.0`);
17726	assert_eq_m128h(r, e);
17727	let r = _mm_maskz_add_sh(`1`, a, b);
17728	let e = _mm_set_sh(`3.0`);
17729	assert_eq_m128h(r, e);
17730	}
17731
17732	#[simd_test(enable = "avx512fp16,avx512vl")]
17733	unsafe fn test_mm_sub_ph() {
17734	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17735	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17736	let r = _mm_sub_ph(a, b);
17737	let e = _mm_set_ph(`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`);
17738	assert_eq_m128h(r, e);
17739	}
17740
17741	#[simd_test(enable = "avx512fp16,avx512vl")]
17742	unsafe fn test_mm_mask_sub_ph() {
17743	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17744	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17745	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
17746	let r = _mm_mask_sub_ph(src, `0b01010101`, a, b);
17747	let e = _mm_set_ph(`10.`, `-5.`, `12.`, `-1.`, `14.`, `3.`, `16.`, `7.`);
17748	assert_eq_m128h(r, e);
17749	}
17750
17751	#[simd_test(enable = "avx512fp16,avx512vl")]
17752	unsafe fn test_mm_maskz_sub_ph() {
17753	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
17754	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
17755	let r = _mm_maskz_sub_ph(`0b01010101`, a, b);
17756	let e = _mm_set_ph(`0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`, `0.`, `7.`);
17757	assert_eq_m128h(r, e);
17758	}
17759
17760	#[simd_test(enable = "avx512fp16,avx512vl")]
17761	unsafe fn test_mm256_sub_ph() {
17762	let a = _mm256_set_ph(
17763	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17764	);
17765	let b = _mm256_set_ph(
17766	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17767	);
17768	let r = _mm256_sub_ph(a, b);
17769	let e = _mm256_set_ph(
17770	`-15.0`, `-13.0`, `-11.0`, `-9.0`, `-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`,
17771	`15.0`,
17772	);
17773	assert_eq_m256h(r, e);
17774	}
17775
17776	#[simd_test(enable = "avx512fp16,avx512vl")]
17777	unsafe fn test_mm256_mask_sub_ph() {
17778	let a = _mm256_set_ph(
17779	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17780	);
17781	let b = _mm256_set_ph(
17782	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17783	);
17784	let src = _mm256_set_ph(
17785	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
17786	);
17787	let r = _mm256_mask_sub_ph(src, `0b0101010101010101`, a, b);
17788	let e = _mm256_set_ph(
17789	`18.`, `-13.`, `20.`, `-9.`, `22.`, `-5.`, `24.`, `-1.`, `26.`, `3.`, `28.`, `7.`, `30.`, `11.`, `32.`, `15.`,
17790	);
17791	assert_eq_m256h(r, e);
17792	}
17793
17794	#[simd_test(enable = "avx512fp16,avx512vl")]
17795	unsafe fn test_mm256_maskz_sub_ph() {
17796	let a = _mm256_set_ph(
17797	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17798	);
17799	let b = _mm256_set_ph(
17800	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
17801	);
17802	let r = _mm256_maskz_sub_ph(`0b0101010101010101`, a, b);
17803	let e = _mm256_set_ph(
17804	`0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`, `0.`, `7.`, `0.`, `11.`, `0.`, `15.`,
17805	);
17806	assert_eq_m256h(r, e);
17807	}
17808
17809	#[simd_test(enable = "avx512fp16")]
17810	unsafe fn test_mm512_sub_ph() {
17811	let a = _mm512_set_ph(
17812	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17813	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17814	`31.0`, `32.0`,
17815	);
17816	let b = _mm512_set_ph(
17817	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17818	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17819	`3.0`, `2.0`, `1.0`,
17820	);
17821	let r = _mm512_sub_ph(a, b);
17822	let e = _mm512_set_ph(
17823	`-31.0`, `-29.0`, `-27.0`, `-25.0`, `-23.0`, `-21.0`, `-19.0`, `-17.0`, `-15.0`, `-13.0`, `-11.0`, `-9.0`,
17824	`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`,
17825	`23.0`, `25.0`, `27.0`, `29.0`, `31.0`,
17826	);
17827	assert_eq_m512h(r, e);
17828	}
17829
17830	#[simd_test(enable = "avx512fp16")]
17831	unsafe fn test_mm512_mask_sub_ph() {
17832	let a = _mm512_set_ph(
17833	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17834	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17835	`31.0`, `32.0`,
17836	);
17837	let b = _mm512_set_ph(
17838	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17839	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17840	`3.0`, `2.0`, `1.0`,
17841	);
17842	let src = _mm512_set_ph(
17843	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17844	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17845	);
17846	let r = _mm512_mask_sub_ph(src, `0b01010101010101010101010101010101`, a, b);
17847	let e = _mm512_set_ph(
17848	`34.`, `-29.`, `36.`, `-25.`, `38.`, `-21.`, `40.`, `-17.`, `42.`, `-13.`, `44.`, `-9.`, `46.`, `-5.`, `48.`, `-1.`,
17849	`50.`, `3.`, `52.`, `7.`, `54.`, `11.`, `56.`, `15.`, `58.`, `19.`, `60.`, `23.`, `62.`, `27.`, `64.`, `31.`,
17850	);
17851	assert_eq_m512h(r, e);
17852	}
17853
17854	#[simd_test(enable = "avx512fp16")]
17855	unsafe fn test_mm512_maskz_sub_ph() {
17856	let a = _mm512_set_ph(
17857	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17858	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17859	`31.0`, `32.0`,
17860	);
17861	let b = _mm512_set_ph(
17862	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17863	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17864	`3.0`, `2.0`, `1.0`,
17865	);
17866	let r = _mm512_maskz_sub_ph(`0b01010101010101010101010101010101`, a, b);
17867	let e = _mm512_set_ph(
17868	`0.`, `-29.`, `0.`, `-25.`, `0.`, `-21.`, `0.`, `-17.`, `0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`,
17869	`0.`, `7.`, `0.`, `11.`, `0.`, `15.`, `0.`, `19.`, `0.`, `23.`, `0.`, `27.`, `0.`, `31.`,
17870	);
17871	assert_eq_m512h(r, e);
17872	}
17873
17874	#[simd_test(enable = "avx512fp16")]
17875	unsafe fn test_mm512_sub_round_ph() {
17876	let a = _mm512_set_ph(
17877	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17878	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17879	`31.0`, `32.0`,
17880	);
17881	let b = _mm512_set_ph(
17882	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17883	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17884	`3.0`, `2.0`, `1.0`,
17885	);
17886	let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17887	let e = _mm512_set_ph(
17888	`-31.0`, `-29.0`, `-27.0`, `-25.0`, `-23.0`, `-21.0`, `-19.0`, `-17.0`, `-15.0`, `-13.0`, `-11.0`, `-9.0`,
17889	`-7.0`, `-5.0`, `-3.0`, `-1.0`, `1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`,
17890	`23.0`, `25.0`, `27.0`, `29.0`, `31.0`,
17891	);
17892	assert_eq_m512h(r, e);
17893	}
17894
17895	#[simd_test(enable = "avx512fp16")]
17896	unsafe fn test_mm512_mask_sub_round_ph() {
17897	let a = _mm512_set_ph(
17898	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17899	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17900	`31.0`, `32.0`,
17901	);
17902	let b = _mm512_set_ph(
17903	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17904	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17905	`3.0`, `2.0`, `1.0`,
17906	);
17907	let src = _mm512_set_ph(
17908	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
17909	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
17910	);
17911	let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17912	src,
17913	`0b01010101010101010101010101010101`,
17914	a,
17915	b,
17916	);
17917	let e = _mm512_set_ph(
17918	`34.`, `-29.`, `36.`, `-25.`, `38.`, `-21.`, `40.`, `-17.`, `42.`, `-13.`, `44.`, `-9.`, `46.`, `-5.`, `48.`, `-1.`,
17919	`50.`, `3.`, `52.`, `7.`, `54.`, `11.`, `56.`, `15.`, `58.`, `19.`, `60.`, `23.`, `62.`, `27.`, `64.`, `31.`,
17920	);
17921	assert_eq_m512h(r, e);
17922	}
17923
17924	#[simd_test(enable = "avx512fp16")]
17925	unsafe fn test_mm512_maskz_sub_round_ph() {
17926	let a = _mm512_set_ph(
17927	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
17928	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
17929	`31.0`, `32.0`,
17930	);
17931	let b = _mm512_set_ph(
17932	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
17933	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
17934	`3.0`, `2.0`, `1.0`,
17935	);
17936	let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17937	`0b01010101010101010101010101010101`,
17938	a,
17939	b,
17940	);
17941	let e = _mm512_set_ph(
17942	`0.`, `-29.`, `0.`, `-25.`, `0.`, `-21.`, `0.`, `-17.`, `0.`, `-13.`, `0.`, `-9.`, `0.`, `-5.`, `0.`, `-1.`, `0.`, `3.`,
17943	`0.`, `7.`, `0.`, `11.`, `0.`, `15.`, `0.`, `19.`, `0.`, `23.`, `0.`, `27.`, `0.`, `31.`,
17944	);
17945	assert_eq_m512h(r, e);
17946	}
17947
17948	#[simd_test(enable = "avx512fp16")]
17949	unsafe fn test_mm_sub_round_sh() {
17950	let a = _mm_set_sh(`1.0`);
17951	let b = _mm_set_sh(`2.0`);
17952	let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
17953	let e = _mm_set_sh(`-1.0`);
17954	assert_eq_m128h(r, e);
17955	}
17956
17957	#[simd_test(enable = "avx512fp16")]
17958	unsafe fn test_mm_mask_sub_round_sh() {
17959	let a = _mm_set_sh(`1.0`);
17960	let b = _mm_set_sh(`2.0`);
17961	let src = _mm_set_sh(`4.0`);
17962	let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17963	src, `0`, a, b,
17964	);
17965	let e = _mm_set_sh(`4.0`);
17966	assert_eq_m128h(r, e);
17967	let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
17968	src, `1`, a, b,
17969	);
17970	let e = _mm_set_sh(`-1.0`);
17971	assert_eq_m128h(r, e);
17972	}
17973
17974	#[simd_test(enable = "avx512fp16")]
17975	unsafe fn test_mm_maskz_sub_round_sh() {
17976	let a = _mm_set_sh(`1.0`);
17977	let b = _mm_set_sh(`2.0`);
17978	let r =
17979	_mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
17980	let e = _mm_set_sh(`0.0`);
17981	assert_eq_m128h(r, e);
17982	let r =
17983	_mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
17984	let e = _mm_set_sh(`-1.0`);
17985	assert_eq_m128h(r, e);
17986	}
17987
17988	#[simd_test(enable = "avx512fp16")]
17989	unsafe fn test_mm_sub_sh() {
17990	let a = _mm_set_sh(`1.0`);
17991	let b = _mm_set_sh(`2.0`);
17992	let r = _mm_sub_sh(a, b);
17993	let e = _mm_set_sh(`-1.0`);
17994	assert_eq_m128h(r, e);
17995	}
17996
17997	#[simd_test(enable = "avx512fp16")]
17998	unsafe fn test_mm_mask_sub_sh() {
17999	let a = _mm_set_sh(`1.0`);
18000	let b = _mm_set_sh(`2.0`);
18001	let src = _mm_set_sh(`4.0`);
18002	let r = _mm_mask_sub_sh(src, `0`, a, b);
18003	let e = _mm_set_sh(`4.0`);
18004	assert_eq_m128h(r, e);
18005	let r = _mm_mask_sub_sh(src, `1`, a, b);
18006	let e = _mm_set_sh(`-1.0`);
18007	assert_eq_m128h(r, e);
18008	}
18009
18010	#[simd_test(enable = "avx512fp16")]
18011	unsafe fn test_mm_maskz_sub_sh() {
18012	let a = _mm_set_sh(`1.0`);
18013	let b = _mm_set_sh(`2.0`);
18014	let r = _mm_maskz_sub_sh(`0`, a, b);
18015	let e = _mm_set_sh(`0.0`);
18016	assert_eq_m128h(r, e);
18017	let r = _mm_maskz_sub_sh(`1`, a, b);
18018	let e = _mm_set_sh(`-1.0`);
18019	assert_eq_m128h(r, e);
18020	}
18021
18022	#[simd_test(enable = "avx512fp16,avx512vl")]
18023	unsafe fn test_mm_mul_ph() {
18024	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18025	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18026	let r = _mm_mul_ph(a, b);
18027	let e = _mm_set_ph(`8.0`, `14.0`, `18.0`, `20.0`, `20.0`, `18.0`, `14.0`, `8.0`);
18028	assert_eq_m128h(r, e);
18029	}
18030
18031	#[simd_test(enable = "avx512fp16,avx512vl")]
18032	unsafe fn test_mm_mask_mul_ph() {
18033	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18034	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18035	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
18036	let r = _mm_mask_mul_ph(src, `0b01010101`, a, b);
18037	let e = _mm_set_ph(`10.`, `14.`, `12.`, `20.`, `14.`, `18.`, `16.`, `8.`);
18038	assert_eq_m128h(r, e);
18039	}
18040
18041	#[simd_test(enable = "avx512fp16,avx512vl")]
18042	unsafe fn test_mm_maskz_mul_ph() {
18043	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
18044	let b = _mm_set_ph(`8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`);
18045	let r = _mm_maskz_mul_ph(`0b01010101`, a, b);
18046	let e = _mm_set_ph(`0.`, `14.`, `0.`, `20.`, `0.`, `18.`, `0.`, `8.`);
18047	assert_eq_m128h(r, e);
18048	}
18049
18050	#[simd_test(enable = "avx512fp16,avx512vl")]
18051	unsafe fn test_mm256_mul_ph() {
18052	let a = _mm256_set_ph(
18053	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18054	);
18055	let b = _mm256_set_ph(
18056	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18057	);
18058	let r = _mm256_mul_ph(a, b);
18059	let e = _mm256_set_ph(
18060	`16.0`, `30.0`, `42.0`, `52.0`, `60.0`, `66.0`, `70.0`, `72.0`, `72.0`, `70.0`, `66.0`, `60.0`, `52.0`, `42.0`,
18061	`30.0`, `16.0`,
18062	);
18063	assert_eq_m256h(r, e);
18064	}
18065
18066	#[simd_test(enable = "avx512fp16,avx512vl")]
18067	unsafe fn test_mm256_mask_mul_ph() {
18068	let a = _mm256_set_ph(
18069	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18070	);
18071	let b = _mm256_set_ph(
18072	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18073	);
18074	let src = _mm256_set_ph(
18075	`18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`,
18076	);
18077	let r = _mm256_mask_mul_ph(src, `0b0101010101010101`, a, b);
18078	let e = _mm256_set_ph(
18079	`18.`, `30.`, `20.`, `52.`, `22.`, `66.`, `24.`, `72.`, `26.`, `70.`, `28.`, `60.`, `30.`, `42.`, `32.`, `16.`,
18080	);
18081	assert_eq_m256h(r, e);
18082	}
18083
18084	#[simd_test(enable = "avx512fp16,avx512vl")]
18085	unsafe fn test_mm256_maskz_mul_ph() {
18086	let a = _mm256_set_ph(
18087	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18088	);
18089	let b = _mm256_set_ph(
18090	`16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`, `3.0`, `2.0`, `1.0`,
18091	);
18092	let r = _mm256_maskz_mul_ph(`0b0101010101010101`, a, b);
18093	let e = _mm256_set_ph(
18094	`0.`, `30.`, `0.`, `52.`, `0.`, `66.`, `0.`, `72.`, `0.`, `70.`, `0.`, `60.`, `0.`, `42.`, `0.`, `16.`,
18095	);
18096	assert_eq_m256h(r, e);
18097	}
18098
18099	#[simd_test(enable = "avx512fp16")]
18100	unsafe fn test_mm512_mul_ph() {
18101	let a = _mm512_set_ph(
18102	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18103	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18104	`31.0`, `32.0`,
18105	);
18106	let b = _mm512_set_ph(
18107	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18108	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18109	`3.0`, `2.0`, `1.0`,
18110	);
18111	let r = _mm512_mul_ph(a, b);
18112	let e = _mm512_set_ph(
18113	`32.0`, `62.0`, `90.0`, `116.0`, `140.0`, `162.0`, `182.0`, `200.0`, `216.0`, `230.0`, `242.0`, `252.0`, `260.0`,
18114	`266.0`, `270.0`, `272.0`, `272.0`, `270.0`, `266.0`, `260.0`, `252.0`, `242.0`, `230.0`, `216.0`, `200.0`,
18115	`182.0`, `162.0`, `140.0`, `116.0`, `90.0`, `62.0`, `32.0`,
18116	);
18117	assert_eq_m512h(r, e);
18118	}
18119
18120	#[simd_test(enable = "avx512fp16")]
18121	unsafe fn test_mm512_mask_mul_ph() {
18122	let a = _mm512_set_ph(
18123	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18124	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18125	`31.0`, `32.0`,
18126	);
18127	let b = _mm512_set_ph(
18128	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18129	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18130	`3.0`, `2.0`, `1.0`,
18131	);
18132	let src = _mm512_set_ph(
18133	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
18134	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
18135	);
18136	let r = _mm512_mask_mul_ph(src, `0b01010101010101010101010101010101`, a, b);
18137	let e = _mm512_set_ph(
18138	`34.`, `62.`, `36.`, `116.`, `38.`, `162.`, `40.`, `200.`, `42.`, `230.`, `44.`, `252.`, `46.`, `266.`, `48.`, `272.`,
18139	`50.`, `270.`, `52.`, `260.`, `54.`, `242.`, `56.`, `216.`, `58.`, `182.`, `60.`, `140.`, `62.`, `90.`, `64.`, `32.`,
18140	);
18141	assert_eq_m512h(r, e);
18142	}
18143
18144	#[simd_test(enable = "avx512fp16")]
18145	unsafe fn test_mm512_maskz_mul_ph() {
18146	let a = _mm512_set_ph(
18147	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18148	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18149	`31.0`, `32.0`,
18150	);
18151	let b = _mm512_set_ph(
18152	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18153	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18154	`3.0`, `2.0`, `1.0`,
18155	);
18156	let r = _mm512_maskz_mul_ph(`0b01010101010101010101010101010101`, a, b);
18157	let e = _mm512_set_ph(
18158	`0.`, `62.`, `0.`, `116.`, `0.`, `162.`, `0.`, `200.`, `0.`, `230.`, `0.`, `252.`, `0.`, `266.`, `0.`, `272.`, `0.`,
18159	`270.`, `0.`, `260.`, `0.`, `242.`, `0.`, `216.`, `0.`, `182.`, `0.`, `140.`, `0.`, `90.`, `0.`, `32.`,
18160	);
18161	assert_eq_m512h(r, e);
18162	}
18163
18164	#[simd_test(enable = "avx512fp16")]
18165	unsafe fn test_mm512_mul_round_ph() {
18166	let a = _mm512_set_ph(
18167	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18168	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18169	`31.0`, `32.0`,
18170	);
18171	let b = _mm512_set_ph(
18172	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18173	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18174	`3.0`, `2.0`, `1.0`,
18175	);
18176	let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18177	let e = _mm512_set_ph(
18178	`32.0`, `62.0`, `90.0`, `116.0`, `140.0`, `162.0`, `182.0`, `200.0`, `216.0`, `230.0`, `242.0`, `252.0`, `260.0`,
18179	`266.0`, `270.0`, `272.0`, `272.0`, `270.0`, `266.0`, `260.0`, `252.0`, `242.0`, `230.0`, `216.0`, `200.0`,
18180	`182.0`, `162.0`, `140.0`, `116.0`, `90.0`, `62.0`, `32.0`,
18181	);
18182	assert_eq_m512h(r, e);
18183	}
18184
18185	#[simd_test(enable = "avx512fp16")]
18186	unsafe fn test_mm512_mask_mul_round_ph() {
18187	let a = _mm512_set_ph(
18188	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18189	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18190	`31.0`, `32.0`,
18191	);
18192	let b = _mm512_set_ph(
18193	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18194	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18195	`3.0`, `2.0`, `1.0`,
18196	);
18197	let src = _mm512_set_ph(
18198	`34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`, `42.`, `43.`, `44.`, `45.`, `46.`, `47.`, `48.`, `49.`, `50.`,
18199	`51.`, `52.`, `53.`, `54.`, `55.`, `56.`, `57.`, `58.`, `59.`, `60.`, `61.`, `62.`, `63.`, `64.`, `65.`,
18200	);
18201	let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18202	src,
18203	`0b01010101010101010101010101010101`,
18204	a,
18205	b,
18206	);
18207	let e = _mm512_set_ph(
18208	`34.`, `62.`, `36.`, `116.`, `38.`, `162.`, `40.`, `200.`, `42.`, `230.`, `44.`, `252.`, `46.`, `266.`, `48.`, `272.`,
18209	`50.`, `270.`, `52.`, `260.`, `54.`, `242.`, `56.`, `216.`, `58.`, `182.`, `60.`, `140.`, `62.`, `90.`, `64.`, `32.`,
18210	);
18211	assert_eq_m512h(r, e);
18212	}
18213
18214	#[simd_test(enable = "avx512fp16")]
18215	unsafe fn test_mm512_maskz_mul_round_ph() {
18216	let a = _mm512_set_ph(
18217	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
18218	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
18219	`31.0`, `32.0`,
18220	);
18221	let b = _mm512_set_ph(
18222	`32.0`, `31.0`, `30.0`, `29.0`, `28.0`, `27.0`, `26.0`, `25.0`, `24.0`, `23.0`, `22.0`, `21.0`, `20.0`, `19.0`,
18223	`18.0`, `17.0`, `16.0`, `15.0`, `14.0`, `13.0`, `12.0`, `11.0`, `10.0`, `9.0`, `8.0`, `7.0`, `6.0`, `5.0`, `4.0`,
18224	`3.0`, `2.0`, `1.0`,
18225	);
18226	let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18227	`0b01010101010101010101010101010101`,
18228	a,
18229	b,
18230	);
18231	let e = _mm512_set_ph(
18232	`0.`, `62.`, `0.`, `116.`, `0.`, `162.`, `0.`, `200.`, `0.`, `230.`, `0.`, `252.`, `0.`, `266.`, `0.`, `272.`, `0.`,
18233	`270.`, `0.`, `260.`, `0.`, `242.`, `0.`, `216.`, `0.`, `182.`, `0.`, `140.`, `0.`, `90.`, `0.`, `32.`,
18234	);
18235	assert_eq_m512h(r, e);
18236	}
18237
18238	#[simd_test(enable = "avx512fp16")]
18239	unsafe fn test_mm_mul_round_sh() {
18240	let a = _mm_set_sh(`1.0`);
18241	let b = _mm_set_sh(`2.0`);
18242	let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18243	let e = _mm_set_sh(`2.0`);
18244	assert_eq_m128h(r, e);
18245	}
18246
18247	#[simd_test(enable = "avx512fp16")]
18248	unsafe fn test_mm_mask_mul_round_sh() {
18249	let a = _mm_set_sh(`1.0`);
18250	let b = _mm_set_sh(`2.0`);
18251	let src = _mm_set_sh(`4.0`);
18252	let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18253	src, `0`, a, b,
18254	);
18255	let e = _mm_set_sh(`4.0`);
18256	assert_eq_m128h(r, e);
18257	let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18258	src, `1`, a, b,
18259	);
18260	let e = _mm_set_sh(`2.0`);
18261	assert_eq_m128h(r, e);
18262	}
18263
18264	#[simd_test(enable = "avx512fp16")]
18265	unsafe fn test_mm_maskz_mul_round_sh() {
18266	let a = _mm_set_sh(`1.0`);
18267	let b = _mm_set_sh(`2.0`);
18268	let r =
18269	_mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18270	let e = _mm_set_sh(`0.0`);
18271	assert_eq_m128h(r, e);
18272	let r =
18273	_mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
18274	let e = _mm_set_sh(`2.0`);
18275	assert_eq_m128h(r, e);
18276	}
18277
18278	#[simd_test(enable = "avx512fp16")]
18279	unsafe fn test_mm_mul_sh() {
18280	let a = _mm_set_sh(`1.0`);
18281	let b = _mm_set_sh(`2.0`);
18282	let r = _mm_mul_sh(a, b);
18283	let e = _mm_set_sh(`2.0`);
18284	assert_eq_m128h(r, e);
18285	}
18286
18287	#[simd_test(enable = "avx512fp16")]
18288	unsafe fn test_mm_mask_mul_sh() {
18289	let a = _mm_set_sh(`1.0`);
18290	let b = _mm_set_sh(`2.0`);
18291	let src = _mm_set_sh(`4.0`);
18292	let r = _mm_mask_mul_sh(src, `0`, a, b);
18293	let e = _mm_set_sh(`4.0`);
18294	assert_eq_m128h(r, e);
18295	let r = _mm_mask_mul_sh(src, `1`, a, b);
18296	let e = _mm_set_sh(`2.0`);
18297	assert_eq_m128h(r, e);
18298	}
18299
18300	#[simd_test(enable = "avx512fp16")]
18301	unsafe fn test_mm_maskz_mul_sh() {
18302	let a = _mm_set_sh(`1.0`);
18303	let b = _mm_set_sh(`2.0`);
18304	let r = _mm_maskz_mul_sh(`0`, a, b);
18305	let e = _mm_set_sh(`0.0`);
18306	assert_eq_m128h(r, e);
18307	let r = _mm_maskz_mul_sh(`1`, a, b);
18308	let e = _mm_set_sh(`2.0`);
18309	assert_eq_m128h(r, e);
18310	}
18311
18312	#[simd_test(enable = "avx512fp16,avx512vl")]
18313	unsafe fn test_mm_div_ph() {
18314	let a = _mm_set1_ph(`1.0`);
18315	let b = _mm_set1_ph(`2.0`);
18316	let r = _mm_div_ph(a, b);
18317	let e = _mm_set1_ph(`0.5`);
18318	assert_eq_m128h(r, e);
18319	}
18320
18321	#[simd_test(enable = "avx512fp16,avx512vl")]
18322	unsafe fn test_mm_mask_div_ph() {
18323	let a = _mm_set1_ph(`1.0`);
18324	let b = _mm_set1_ph(`2.0`);
18325	let src = _mm_set_ph(`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`);
18326	let r = _mm_mask_div_ph(src, `0b01010101`, a, b);
18327	let e = _mm_set_ph(`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`);
18328	assert_eq_m128h(r, e);
18329	}
18330
18331	#[simd_test(enable = "avx512fp16,avx512vl")]
18332	unsafe fn test_mm_maskz_div_ph() {
18333	let a = _mm_set1_ph(`1.0`);
18334	let b = _mm_set1_ph(`2.0`);
18335	let r = _mm_maskz_div_ph(`0b01010101`, a, b);
18336	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
18337	assert_eq_m128h(r, e);
18338	}
18339
18340	#[simd_test(enable = "avx512fp16,avx512vl")]
18341	unsafe fn test_mm256_div_ph() {
18342	let a = _mm256_set1_ph(`1.0`);
18343	let b = _mm256_set1_ph(`2.0`);
18344	let r = _mm256_div_ph(a, b);
18345	let e = _mm256_set1_ph(`0.5`);
18346	assert_eq_m256h(r, e);
18347	}
18348
18349	#[simd_test(enable = "avx512fp16,avx512vl")]
18350	unsafe fn test_mm256_mask_div_ph() {
18351	let a = _mm256_set1_ph(`1.0`);
18352	let b = _mm256_set1_ph(`2.0`);
18353	let src = _mm256_set_ph(
18354	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18355	`19.0`,
18356	);
18357	let r = _mm256_mask_div_ph(src, `0b0101010101010101`, a, b);
18358	let e = _mm256_set_ph(
18359	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18360	);
18361	assert_eq_m256h(r, e);
18362	}
18363
18364	#[simd_test(enable = "avx512fp16,avx512vl")]
18365	unsafe fn test_mm256_maskz_div_ph() {
18366	let a = _mm256_set1_ph(`1.0`);
18367	let b = _mm256_set1_ph(`2.0`);
18368	let r = _mm256_maskz_div_ph(`0b0101010101010101`, a, b);
18369	let e = _mm256_set_ph(
18370	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18371	);
18372	assert_eq_m256h(r, e);
18373	}
18374
18375	#[simd_test(enable = "avx512fp16")]
18376	unsafe fn test_mm512_div_ph() {
18377	let a = _mm512_set1_ph(`1.0`);
18378	let b = _mm512_set1_ph(`2.0`);
18379	let r = _mm512_div_ph(a, b);
18380	let e = _mm512_set1_ph(`0.5`);
18381	assert_eq_m512h(r, e);
18382	}
18383
18384	#[simd_test(enable = "avx512fp16")]
18385	unsafe fn test_mm512_mask_div_ph() {
18386	let a = _mm512_set1_ph(`1.0`);
18387	let b = _mm512_set1_ph(`2.0`);
18388	let src = _mm512_set_ph(
18389	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18390	`19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`, `32.0`,
18391	`33.0`, `34.0`, `35.0`,
18392	);
18393	let r = _mm512_mask_div_ph(src, `0b01010101010101010101010101010101`, a, b);
18394	let e = _mm512_set_ph(
18395	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18396	`20.0`, `0.5`, `22.0`, `0.5`, `24.0`, `0.5`, `26.0`, `0.5`, `28.0`, `0.5`, `30.0`, `0.5`, `32.0`, `0.5`, `34.0`, `0.5`,
18397	);
18398	assert_eq_m512h(r, e);
18399	}
18400
18401	#[simd_test(enable = "avx512fp16")]
18402	unsafe fn test_mm512_maskz_div_ph() {
18403	let a = _mm512_set1_ph(`1.0`);
18404	let b = _mm512_set1_ph(`2.0`);
18405	let r = _mm512_maskz_div_ph(`0b01010101010101010101010101010101`, a, b);
18406	let e = _mm512_set_ph(
18407	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
18408	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18409	);
18410	assert_eq_m512h(r, e);
18411	}
18412
18413	#[simd_test(enable = "avx512fp16")]
18414	unsafe fn test_mm512_div_round_ph() {
18415	let a = _mm512_set1_ph(`1.0`);
18416	let b = _mm512_set1_ph(`2.0`);
18417	let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18418	let e = _mm512_set1_ph(`0.5`);
18419	assert_eq_m512h(r, e);
18420	}
18421
18422	#[simd_test(enable = "avx512fp16")]
18423	unsafe fn test_mm512_mask_div_round_ph() {
18424	let a = _mm512_set1_ph(`1.0`);
18425	let b = _mm512_set1_ph(`2.0`);
18426	let src = _mm512_set_ph(
18427	`4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`,
18428	`19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`, `32.0`,
18429	`33.0`, `34.0`, `35.0`,
18430	);
18431	let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18432	src,
18433	`0b01010101010101010101010101010101`,
18434	a,
18435	b,
18436	);
18437	let e = _mm512_set_ph(
18438	`4.0`, `0.5`, `6.0`, `0.5`, `8.0`, `0.5`, `10.0`, `0.5`, `12.0`, `0.5`, `14.0`, `0.5`, `16.0`, `0.5`, `18.0`, `0.5`,
18439	`20.0`, `0.5`, `22.0`, `0.5`, `24.0`, `0.5`, `26.0`, `0.5`, `28.0`, `0.5`, `30.0`, `0.5`, `32.0`, `0.5`, `34.0`, `0.5`,
18440	);
18441	assert_eq_m512h(r, e);
18442	}
18443
18444	#[simd_test(enable = "avx512fp16")]
18445	unsafe fn test_mm512_maskz_div_round_ph() {
18446	let a = _mm512_set1_ph(`1.0`);
18447	let b = _mm512_set1_ph(`2.0`);
18448	let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18449	`0b01010101010101010101010101010101`,
18450	a,
18451	b,
18452	);
18453	let e = _mm512_set_ph(
18454	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
18455	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
18456	);
18457	assert_eq_m512h(r, e);
18458	}
18459
18460	#[simd_test(enable = "avx512fp16")]
18461	unsafe fn test_mm_div_round_sh() {
18462	let a = _mm_set_sh(`1.0`);
18463	let b = _mm_set_sh(`2.0`);
18464	let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18465	let e = _mm_set_sh(`0.5`);
18466	assert_eq_m128h(r, e);
18467	}
18468
18469	#[simd_test(enable = "avx512fp16")]
18470	unsafe fn test_mm_mask_div_round_sh() {
18471	let a = _mm_set_sh(`1.0`);
18472	let b = _mm_set_sh(`2.0`);
18473	let src = _mm_set_sh(`4.0`);
18474	let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18475	src, `0`, a, b,
18476	);
18477	let e = _mm_set_sh(`4.0`);
18478	assert_eq_m128h(r, e);
18479	let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18480	src, `1`, a, b,
18481	);
18482	let e = _mm_set_sh(`0.5`);
18483	assert_eq_m128h(r, e);
18484	}
18485
18486	#[simd_test(enable = "avx512fp16")]
18487	unsafe fn test_mm_maskz_div_round_sh() {
18488	let a = _mm_set_sh(`1.0`);
18489	let b = _mm_set_sh(`2.0`);
18490	let r =
18491	_mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18492	let e = _mm_set_sh(`0.0`);
18493	assert_eq_m128h(r, e);
18494	let r =
18495	_mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
18496	let e = _mm_set_sh(`0.5`);
18497	assert_eq_m128h(r, e);
18498	}
18499
18500	#[simd_test(enable = "avx512fp16")]
18501	unsafe fn test_mm_div_sh() {
18502	let a = _mm_set_sh(`1.0`);
18503	let b = _mm_set_sh(`2.0`);
18504	let r = _mm_div_sh(a, b);
18505	let e = _mm_set_sh(`0.5`);
18506	assert_eq_m128h(r, e);
18507	}
18508
18509	#[simd_test(enable = "avx512fp16")]
18510	unsafe fn test_mm_mask_div_sh() {
18511	let a = _mm_set_sh(`1.0`);
18512	let b = _mm_set_sh(`2.0`);
18513	let src = _mm_set_sh(`4.0`);
18514	let r = _mm_mask_div_sh(src, `0`, a, b);
18515	let e = _mm_set_sh(`4.0`);
18516	assert_eq_m128h(r, e);
18517	let r = _mm_mask_div_sh(src, `1`, a, b);
18518	let e = _mm_set_sh(`0.5`);
18519	assert_eq_m128h(r, e);
18520	}
18521
18522	#[simd_test(enable = "avx512fp16")]
18523	unsafe fn test_mm_maskz_div_sh() {
18524	let a = _mm_set_sh(`1.0`);
18525	let b = _mm_set_sh(`2.0`);
18526	let r = _mm_maskz_div_sh(`0`, a, b);
18527	let e = _mm_set_sh(`0.0`);
18528	assert_eq_m128h(r, e);
18529	let r = _mm_maskz_div_sh(`1`, a, b);
18530	let e = _mm_set_sh(`0.5`);
18531	assert_eq_m128h(r, e);
18532	}
18533
18534	#[simd_test(enable = "avx512fp16,avx512vl")]
18535	unsafe fn test_mm_mul_pch() {
18536	let a = _mm_set1_pch(`0.0`, `1.0`);
18537	let b = _mm_set1_pch(`0.0`, `1.0`);
18538	let r = _mm_mul_pch(a, b);
18539	let e = _mm_set1_pch(`-1.0`, `0.0`);
18540	assert_eq_m128h(r, e);
18541	}
18542
18543	#[simd_test(enable = "avx512fp16,avx512vl")]
18544	unsafe fn test_mm_mask_mul_pch() {
18545	let a = _mm_set1_pch(`0.0`, `1.0`);
18546	let b = _mm_set1_pch(`0.0`, `1.0`);
18547	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18548	let r = _mm_mask_mul_pch(src, `0b0101`, a, b);
18549	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18550	assert_eq_m128h(r, e);
18551	}
18552
18553	#[simd_test(enable = "avx512fp16,avx512vl")]
18554	unsafe fn test_mm_maskz_mul_pch() {
18555	let a = _mm_set1_pch(`0.0`, `1.0`);
18556	let b = _mm_set1_pch(`0.0`, `1.0`);
18557	let r = _mm_maskz_mul_pch(`0b0101`, a, b);
18558	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18559	assert_eq_m128h(r, e);
18560	}
18561
18562	#[simd_test(enable = "avx512fp16,avx512vl")]
18563	unsafe fn test_mm256_mul_pch() {
18564	let a = _mm256_set1_pch(`0.0`, `1.0`);
18565	let b = _mm256_set1_pch(`0.0`, `1.0`);
18566	let r = _mm256_mul_pch(a, b);
18567	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18568	assert_eq_m256h(r, e);
18569	}
18570
18571	#[simd_test(enable = "avx512fp16,avx512vl")]
18572	unsafe fn test_mm256_mask_mul_pch() {
18573	let a = _mm256_set1_pch(`0.0`, `1.0`);
18574	let b = _mm256_set1_pch(`0.0`, `1.0`);
18575	let src = _mm256_setr_ph(
18576	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18577	);
18578	let r = _mm256_mask_mul_pch(src, `0b01010101`, a, b);
18579	let e = _mm256_setr_ph(
18580	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18581	);
18582	assert_eq_m256h(r, e);
18583	}
18584
18585	#[simd_test(enable = "avx512fp16,avx512vl")]
18586	unsafe fn test_mm256_maskz_mul_pch() {
18587	let a = _mm256_set1_pch(`0.0`, `1.0`);
18588	let b = _mm256_set1_pch(`0.0`, `1.0`);
18589	let r = _mm256_maskz_mul_pch(`0b01010101`, a, b);
18590	let e = _mm256_setr_ph(
18591	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18592	);
18593	assert_eq_m256h(r, e);
18594	}
18595
18596	#[simd_test(enable = "avx512fp16")]
18597	unsafe fn test_mm512_mul_pch() {
18598	let a = _mm512_set1_pch(`0.0`, `1.0`);
18599	let b = _mm512_set1_pch(`0.0`, `1.0`);
18600	let r = _mm512_mul_pch(a, b);
18601	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18602	assert_eq_m512h(r, e);
18603	}
18604
18605	#[simd_test(enable = "avx512fp16")]
18606	unsafe fn test_mm512_mask_mul_pch() {
18607	let a = _mm512_set1_pch(`0.0`, `1.0`);
18608	let b = _mm512_set1_pch(`0.0`, `1.0`);
18609	let src = _mm512_setr_ph(
18610	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18611	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18612	`32.0`, `33.0`,
18613	);
18614	let r = _mm512_mask_mul_pch(src, `0b0101010101010101`, a, b);
18615	let e = _mm512_setr_ph(
18616	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18617	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18618	`33.0`,
18619	);
18620	assert_eq_m512h(r, e);
18621	}
18622
18623	#[simd_test(enable = "avx512fp16")]
18624	unsafe fn test_mm512_maskz_mul_pch() {
18625	let a = _mm512_set1_pch(`0.0`, `1.0`);
18626	let b = _mm512_set1_pch(`0.0`, `1.0`);
18627	let r = _mm512_maskz_mul_pch(`0b0101010101010101`, a, b);
18628	let e = _mm512_setr_ph(
18629	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18630	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18631	);
18632	assert_eq_m512h(r, e);
18633	}
18634
18635	#[simd_test(enable = "avx512fp16")]
18636	unsafe fn test_mm512_mul_round_pch() {
18637	let a = _mm512_set1_pch(`0.0`, `1.0`);
18638	let b = _mm512_set1_pch(`0.0`, `1.0`);
18639	let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18640	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18641	assert_eq_m512h(r, e);
18642	}
18643
18644	#[simd_test(enable = "avx512fp16")]
18645	unsafe fn test_mm512_mask_mul_round_pch() {
18646	let a = _mm512_set1_pch(`0.0`, `1.0`);
18647	let b = _mm512_set1_pch(`0.0`, `1.0`);
18648	let src = _mm512_setr_ph(
18649	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18650	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18651	`32.0`, `33.0`,
18652	);
18653	let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18654	src,
18655	`0b0101010101010101`,
18656	a,
18657	b,
18658	);
18659	let e = _mm512_setr_ph(
18660	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18661	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18662	`33.0`,
18663	);
18664	assert_eq_m512h(r, e);
18665	}
18666
18667	#[simd_test(enable = "avx512fp16")]
18668	unsafe fn test_mm512_maskz_mul_round_pch() {
18669	let a = _mm512_set1_pch(`0.0`, `1.0`);
18670	let b = _mm512_set1_pch(`0.0`, `1.0`);
18671	let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18672	`0b0101010101010101`,
18673	a,
18674	b,
18675	);
18676	let e = _mm512_setr_ph(
18677	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18678	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18679	);
18680	assert_eq_m512h(r, e);
18681	}
18682
18683	#[simd_test(enable = "avx512fp16")]
18684	unsafe fn test_mm_mul_round_sch() {
18685	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18686	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18687	let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18688	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18689	assert_eq_m128h(r, e);
18690	}
18691
18692	#[simd_test(enable = "avx512fp16")]
18693	unsafe fn test_mm_mask_mul_round_sch() {
18694	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18695	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18696	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18697	let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18698	src, `0`, a, b,
18699	);
18700	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18701	assert_eq_m128h(r, e);
18702	}
18703
18704	#[simd_test(enable = "avx512fp16")]
18705	unsafe fn test_mm_maskz_mul_round_sch() {
18706	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18707	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18708	let r =
18709	_mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18710	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18711	assert_eq_m128h(r, e);
18712	}
18713
18714	#[simd_test(enable = "avx512fp16")]
18715	unsafe fn test_mm_mul_sch() {
18716	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18717	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18718	let r = _mm_mul_sch(a, b);
18719	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18720	assert_eq_m128h(r, e);
18721	}
18722
18723	#[simd_test(enable = "avx512fp16")]
18724	unsafe fn test_mm_mask_mul_sch() {
18725	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18726	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18727	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18728	let r = _mm_mask_mul_sch(src, `0`, a, b);
18729	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18730	assert_eq_m128h(r, e);
18731	}
18732
18733	#[simd_test(enable = "avx512fp16")]
18734	unsafe fn test_mm_maskz_mul_sch() {
18735	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18736	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18737	let r = _mm_maskz_mul_sch(`0`, a, b);
18738	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18739	assert_eq_m128h(r, e);
18740	}
18741
18742	#[simd_test(enable = "avx512fp16,avx512vl")]
18743	unsafe fn test_mm_fmul_pch() {
18744	let a = _mm_set1_pch(`0.0`, `1.0`);
18745	let b = _mm_set1_pch(`0.0`, `1.0`);
18746	let r = _mm_fmul_pch(a, b);
18747	let e = _mm_set1_pch(`-1.0`, `0.0`);
18748	assert_eq_m128h(r, e);
18749	}
18750
18751	#[simd_test(enable = "avx512fp16,avx512vl")]
18752	unsafe fn test_mm_mask_fmul_pch() {
18753	let a = _mm_set1_pch(`0.0`, `1.0`);
18754	let b = _mm_set1_pch(`0.0`, `1.0`);
18755	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18756	let r = _mm_mask_fmul_pch(src, `0b0101`, a, b);
18757	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18758	assert_eq_m128h(r, e);
18759	}
18760
18761	#[simd_test(enable = "avx512fp16,avx512vl")]
18762	unsafe fn test_mm_maskz_fmul_pch() {
18763	let a = _mm_set1_pch(`0.0`, `1.0`);
18764	let b = _mm_set1_pch(`0.0`, `1.0`);
18765	let r = _mm_maskz_fmul_pch(`0b0101`, a, b);
18766	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18767	assert_eq_m128h(r, e);
18768	}
18769
18770	#[simd_test(enable = "avx512fp16,avx512vl")]
18771	unsafe fn test_mm256_fmul_pch() {
18772	let a = _mm256_set1_pch(`0.0`, `1.0`);
18773	let b = _mm256_set1_pch(`0.0`, `1.0`);
18774	let r = _mm256_fmul_pch(a, b);
18775	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18776	assert_eq_m256h(r, e);
18777	}
18778
18779	#[simd_test(enable = "avx512fp16,avx512vl")]
18780	unsafe fn test_mm256_mask_fmul_pch() {
18781	let a = _mm256_set1_pch(`0.0`, `1.0`);
18782	let b = _mm256_set1_pch(`0.0`, `1.0`);
18783	let src = _mm256_setr_ph(
18784	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18785	);
18786	let r = _mm256_mask_fmul_pch(src, `0b01010101`, a, b);
18787	let e = _mm256_setr_ph(
18788	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18789	);
18790	assert_eq_m256h(r, e);
18791	}
18792
18793	#[simd_test(enable = "avx512fp16,avx512vl")]
18794	unsafe fn test_mm256_maskz_fmul_pch() {
18795	let a = _mm256_set1_pch(`0.0`, `1.0`);
18796	let b = _mm256_set1_pch(`0.0`, `1.0`);
18797	let r = _mm256_maskz_fmul_pch(`0b01010101`, a, b);
18798	let e = _mm256_setr_ph(
18799	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18800	);
18801	assert_eq_m256h(r, e);
18802	}
18803
18804	#[simd_test(enable = "avx512fp16")]
18805	unsafe fn test_mm512_fmul_pch() {
18806	let a = _mm512_set1_pch(`0.0`, `1.0`);
18807	let b = _mm512_set1_pch(`0.0`, `1.0`);
18808	let r = _mm512_fmul_pch(a, b);
18809	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18810	assert_eq_m512h(r, e);
18811	}
18812
18813	#[simd_test(enable = "avx512fp16")]
18814	unsafe fn test_mm512_mask_fmul_pch() {
18815	let a = _mm512_set1_pch(`0.0`, `1.0`);
18816	let b = _mm512_set1_pch(`0.0`, `1.0`);
18817	let src = _mm512_setr_ph(
18818	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18819	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18820	`32.0`, `33.0`,
18821	);
18822	let r = _mm512_mask_fmul_pch(src, `0b0101010101010101`, a, b);
18823	let e = _mm512_setr_ph(
18824	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18825	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18826	`33.0`,
18827	);
18828	assert_eq_m512h(r, e);
18829	}
18830
18831	#[simd_test(enable = "avx512fp16")]
18832	unsafe fn test_mm512_maskz_fmul_pch() {
18833	let a = _mm512_set1_pch(`0.0`, `1.0`);
18834	let b = _mm512_set1_pch(`0.0`, `1.0`);
18835	let r = _mm512_maskz_fmul_pch(`0b0101010101010101`, a, b);
18836	let e = _mm512_setr_ph(
18837	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18838	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18839	);
18840	assert_eq_m512h(r, e);
18841	}
18842
18843	#[simd_test(enable = "avx512fp16")]
18844	unsafe fn test_mm512_fmul_round_pch() {
18845	let a = _mm512_set1_pch(`0.0`, `1.0`);
18846	let b = _mm512_set1_pch(`0.0`, `1.0`);
18847	let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18848	let e = _mm512_set1_pch(`-1.0`, `0.0`);
18849	assert_eq_m512h(r, e);
18850	}
18851
18852	#[simd_test(enable = "avx512fp16")]
18853	unsafe fn test_mm512_mask_fmul_round_pch() {
18854	let a = _mm512_set1_pch(`0.0`, `1.0`);
18855	let b = _mm512_set1_pch(`0.0`, `1.0`);
18856	let src = _mm512_setr_ph(
18857	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18858	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
18859	`32.0`, `33.0`,
18860	);
18861	let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18862	src,
18863	`0b0101010101010101`,
18864	a,
18865	b,
18866	);
18867	let e = _mm512_setr_ph(
18868	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18869	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
18870	`33.0`,
18871	);
18872	assert_eq_m512h(r, e);
18873	}
18874
18875	#[simd_test(enable = "avx512fp16")]
18876	unsafe fn test_mm512_maskz_fmul_round_pch() {
18877	let a = _mm512_set1_pch(`0.0`, `1.0`);
18878	let b = _mm512_set1_pch(`0.0`, `1.0`);
18879	let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18880	`0b0101010101010101`,
18881	a,
18882	b,
18883	);
18884	let e = _mm512_setr_ph(
18885	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18886	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
18887	);
18888	assert_eq_m512h(r, e);
18889	}
18890
18891	#[simd_test(enable = "avx512fp16")]
18892	unsafe fn test_mm_fmul_round_sch() {
18893	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18894	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18895	let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
18896	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18897	assert_eq_m128h(r, e);
18898	}
18899
18900	#[simd_test(enable = "avx512fp16")]
18901	unsafe fn test_mm_mask_fmul_round_sch() {
18902	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18903	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18904	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18905	let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
18906	src, `0`, a, b,
18907	);
18908	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18909	assert_eq_m128h(r, e);
18910	}
18911
18912	#[simd_test(enable = "avx512fp16")]
18913	unsafe fn test_mm_maskz_fmul_round_sch() {
18914	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18915	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18916	let r =
18917	_mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
18918	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18919	assert_eq_m128h(r, e);
18920	}
18921
18922	#[simd_test(enable = "avx512fp16")]
18923	unsafe fn test_mm_fmul_sch() {
18924	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18925	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18926	let r = _mm_fmul_sch(a, b);
18927	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18928	assert_eq_m128h(r, e);
18929	}
18930
18931	#[simd_test(enable = "avx512fp16")]
18932	unsafe fn test_mm_mask_fmul_sch() {
18933	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18934	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18935	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
18936	let r = _mm_mask_fmul_sch(src, `0`, a, b);
18937	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18938	assert_eq_m128h(r, e);
18939	}
18940
18941	#[simd_test(enable = "avx512fp16")]
18942	unsafe fn test_mm_maskz_fmul_sch() {
18943	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18944	let b = _mm_setr_ph(`0.0`, `1.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
18945	let r = _mm_maskz_fmul_sch(`0`, a, b);
18946	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
18947	assert_eq_m128h(r, e);
18948	}
18949
18950	#[simd_test(enable = "avx512fp16,avx512vl")]
18951	unsafe fn test_mm_cmul_pch() {
18952	let a = _mm_set1_pch(`0.0`, `1.0`);
18953	let b = _mm_set1_pch(`0.0`, `-1.0`);
18954	let r = _mm_cmul_pch(a, b);
18955	let e = _mm_set1_pch(`-1.0`, `0.0`);
18956	assert_eq_m128h(r, e);
18957	}
18958
18959	#[simd_test(enable = "avx512fp16,avx512vl")]
18960	unsafe fn test_mm_mask_cmul_pch() {
18961	let a = _mm_set1_pch(`0.0`, `1.0`);
18962	let b = _mm_set1_pch(`0.0`, `-1.0`);
18963	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
18964	let r = _mm_mask_cmul_pch(src, `0b0101`, a, b);
18965	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
18966	assert_eq_m128h(r, e);
18967	}
18968
18969	#[simd_test(enable = "avx512fp16,avx512vl")]
18970	unsafe fn test_mm_maskz_cmul_pch() {
18971	let a = _mm_set1_pch(`0.0`, `1.0`);
18972	let b = _mm_set1_pch(`0.0`, `-1.0`);
18973	let r = _mm_maskz_cmul_pch(`0b0101`, a, b);
18974	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
18975	assert_eq_m128h(r, e);
18976	}
18977
18978	#[simd_test(enable = "avx512fp16,avx512vl")]
18979	unsafe fn test_mm256_cmul_pch() {
18980	let a = _mm256_set1_pch(`0.0`, `1.0`);
18981	let b = _mm256_set1_pch(`0.0`, `-1.0`);
18982	let r = _mm256_cmul_pch(a, b);
18983	let e = _mm256_set1_pch(`-1.0`, `0.0`);
18984	assert_eq_m256h(r, e);
18985	}
18986
18987	#[simd_test(enable = "avx512fp16,avx512vl")]
18988	unsafe fn test_mm256_mask_cmul_pch() {
18989	let a = _mm256_set1_pch(`0.0`, `1.0`);
18990	let b = _mm256_set1_pch(`0.0`, `-1.0`);
18991	let src = _mm256_setr_ph(
18992	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
18993	);
18994	let r = _mm256_mask_cmul_pch(src, `0b01010101`, a, b);
18995	let e = _mm256_setr_ph(
18996	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
18997	);
18998	assert_eq_m256h(r, e);
18999	}
19000
19001	#[simd_test(enable = "avx512fp16,avx512vl")]
19002	unsafe fn test_mm256_maskz_cmul_pch() {
19003	let a = _mm256_set1_pch(`0.0`, `1.0`);
19004	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19005	let r = _mm256_maskz_cmul_pch(`0b01010101`, a, b);
19006	let e = _mm256_setr_ph(
19007	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19008	);
19009	assert_eq_m256h(r, e);
19010	}
19011
19012	#[simd_test(enable = "avx512fp16")]
19013	unsafe fn test_mm512_cmul_pch() {
19014	let a = _mm512_set1_pch(`0.0`, `1.0`);
19015	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19016	let r = _mm512_cmul_pch(a, b);
19017	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19018	assert_eq_m512h(r, e);
19019	}
19020
19021	#[simd_test(enable = "avx512fp16")]
19022	unsafe fn test_mm512_mask_cmul_pch() {
19023	let a = _mm512_set1_pch(`0.0`, `1.0`);
19024	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19025	let src = _mm512_setr_ph(
19026	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19027	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19028	`32.0`, `33.0`,
19029	);
19030	let r = _mm512_mask_cmul_pch(src, `0b0101010101010101`, a, b);
19031	let e = _mm512_setr_ph(
19032	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19033	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19034	`33.0`,
19035	);
19036	assert_eq_m512h(r, e);
19037	}
19038
19039	#[simd_test(enable = "avx512fp16")]
19040	unsafe fn test_mm512_maskz_cmul_pch() {
19041	let a = _mm512_set1_pch(`0.0`, `1.0`);
19042	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19043	let r = _mm512_maskz_cmul_pch(`0b0101010101010101`, a, b);
19044	let e = _mm512_setr_ph(
19045	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19046	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19047	);
19048	assert_eq_m512h(r, e);
19049	}
19050
19051	#[simd_test(enable = "avx512fp16")]
19052	unsafe fn test_mm512_cmul_round_pch() {
19053	let a = _mm512_set1_pch(`0.0`, `1.0`);
19054	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19055	let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19056	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19057	assert_eq_m512h(r, e);
19058	}
19059
19060	#[simd_test(enable = "avx512fp16")]
19061	unsafe fn test_mm512_mask_cmul_round_pch() {
19062	let a = _mm512_set1_pch(`0.0`, `1.0`);
19063	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19064	let src = _mm512_setr_ph(
19065	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19066	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19067	`32.0`, `33.0`,
19068	);
19069	let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19070	src,
19071	`0b0101010101010101`,
19072	a,
19073	b,
19074	);
19075	let e = _mm512_setr_ph(
19076	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19077	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19078	`33.0`,
19079	);
19080	assert_eq_m512h(r, e);
19081	}
19082
19083	#[simd_test(enable = "avx512fp16")]
19084	unsafe fn test_mm512_maskz_cmul_round_pch() {
19085	let a = _mm512_set1_pch(`0.0`, `1.0`);
19086	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19087	let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19088	`0b0101010101010101`,
19089	a,
19090	b,
19091	);
19092	let e = _mm512_setr_ph(
19093	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19094	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19095	);
19096	assert_eq_m512h(r, e);
19097	}
19098
19099	#[simd_test(enable = "avx512fp16")]
19100	unsafe fn test_mm_cmul_sch() {
19101	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19102	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19103	let r = _mm_cmul_sch(a, b);
19104	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19105	assert_eq_m128h(r, e);
19106	}
19107
19108	#[simd_test(enable = "avx512fp16")]
19109	unsafe fn test_mm_mask_cmul_sch() {
19110	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19111	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19112	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19113	let r = _mm_mask_cmul_sch(src, `0`, a, b);
19114	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19115	assert_eq_m128h(r, e);
19116	}
19117
19118	#[simd_test(enable = "avx512fp16")]
19119	unsafe fn test_mm_maskz_cmul_sch() {
19120	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19121	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19122	let r = _mm_maskz_cmul_sch(`0`, a, b);
19123	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19124	assert_eq_m128h(r, e);
19125	}
19126
19127	#[simd_test(enable = "avx512fp16")]
19128	unsafe fn test_mm_cmul_round_sch() {
19129	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19130	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19131	let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19132	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19133	assert_eq_m128h(r, e);
19134	}
19135
19136	#[simd_test(enable = "avx512fp16")]
19137	unsafe fn test_mm_mask_cmul_round_sch() {
19138	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19139	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19140	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19141	let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19142	src, `0`, a, b,
19143	);
19144	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19145	assert_eq_m128h(r, e);
19146	}
19147
19148	#[simd_test(enable = "avx512fp16")]
19149	unsafe fn test_mm_maskz_cmul_round_sch() {
19150	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19151	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19152	let r =
19153	_mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
19154	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19155	assert_eq_m128h(r, e);
19156	}
19157
19158	#[simd_test(enable = "avx512fp16,avx512vl")]
19159	unsafe fn test_mm_fcmul_pch() {
19160	let a = _mm_set1_pch(`0.0`, `1.0`);
19161	let b = _mm_set1_pch(`0.0`, `-1.0`);
19162	let r = _mm_fcmul_pch(a, b);
19163	let e = _mm_set1_pch(`-1.0`, `0.0`);
19164	assert_eq_m128h(r, e);
19165	}
19166
19167	#[simd_test(enable = "avx512fp16,avx512vl")]
19168	unsafe fn test_mm_mask_fcmul_pch() {
19169	let a = _mm_set1_pch(`0.0`, `1.0`);
19170	let b = _mm_set1_pch(`0.0`, `-1.0`);
19171	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
19172	let r = _mm_mask_fcmul_pch(src, `0b0101`, a, b);
19173	let e = _mm_setr_ph(`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`);
19174	assert_eq_m128h(r, e);
19175	}
19176
19177	#[simd_test(enable = "avx512fp16,avx512vl")]
19178	unsafe fn test_mm_maskz_fcmul_pch() {
19179	let a = _mm_set1_pch(`0.0`, `1.0`);
19180	let b = _mm_set1_pch(`0.0`, `-1.0`);
19181	let r = _mm_maskz_fcmul_pch(`0b0101`, a, b);
19182	let e = _mm_setr_ph(`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`);
19183	assert_eq_m128h(r, e);
19184	}
19185
19186	#[simd_test(enable = "avx512fp16,avx512vl")]
19187	unsafe fn test_mm256_fcmul_pch() {
19188	let a = _mm256_set1_pch(`0.0`, `1.0`);
19189	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19190	let r = _mm256_fcmul_pch(a, b);
19191	let e = _mm256_set1_pch(`-1.0`, `0.0`);
19192	assert_eq_m256h(r, e);
19193	}
19194
19195	#[simd_test(enable = "avx512fp16,avx512vl")]
19196	unsafe fn test_mm256_mask_fcmul_pch() {
19197	let a = _mm256_set1_pch(`0.0`, `1.0`);
19198	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19199	let src = _mm256_setr_ph(
19200	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19201	);
19202	let r = _mm256_mask_fcmul_pch(src, `0b01010101`, a, b);
19203	let e = _mm256_setr_ph(
19204	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19205	);
19206	assert_eq_m256h(r, e);
19207	}
19208
19209	#[simd_test(enable = "avx512fp16,avx512vl")]
19210	unsafe fn test_mm256_maskz_fcmul_pch() {
19211	let a = _mm256_set1_pch(`0.0`, `1.0`);
19212	let b = _mm256_set1_pch(`0.0`, `-1.0`);
19213	let r = _mm256_maskz_fcmul_pch(`0b01010101`, a, b);
19214	let e = _mm256_setr_ph(
19215	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19216	);
19217	assert_eq_m256h(r, e);
19218	}
19219
19220	#[simd_test(enable = "avx512fp16")]
19221	unsafe fn test_mm512_fcmul_pch() {
19222	let a = _mm512_set1_pch(`0.0`, `1.0`);
19223	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19224	let r = _mm512_fcmul_pch(a, b);
19225	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19226	assert_eq_m512h(r, e);
19227	}
19228
19229	#[simd_test(enable = "avx512fp16")]
19230	unsafe fn test_mm512_mask_fcmul_pch() {
19231	let a = _mm512_set1_pch(`0.0`, `1.0`);
19232	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19233	let src = _mm512_setr_ph(
19234	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19235	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19236	`32.0`, `33.0`,
19237	);
19238	let r = _mm512_mask_fcmul_pch(src, `0b0101010101010101`, a, b);
19239	let e = _mm512_setr_ph(
19240	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19241	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19242	`33.0`,
19243	);
19244	assert_eq_m512h(r, e);
19245	}
19246
19247	#[simd_test(enable = "avx512fp16")]
19248	unsafe fn test_mm512_maskz_fcmul_pch() {
19249	let a = _mm512_set1_pch(`0.0`, `1.0`);
19250	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19251	let r = _mm512_maskz_fcmul_pch(`0b0101010101010101`, a, b);
19252	let e = _mm512_setr_ph(
19253	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19254	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19255	);
19256	assert_eq_m512h(r, e);
19257	}
19258
19259	#[simd_test(enable = "avx512fp16")]
19260	unsafe fn test_mm512_fcmul_round_pch() {
19261	let a = _mm512_set1_pch(`0.0`, `1.0`);
19262	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19263	let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19264	let e = _mm512_set1_pch(`-1.0`, `0.0`);
19265	assert_eq_m512h(r, e);
19266	}
19267
19268	#[simd_test(enable = "avx512fp16")]
19269	unsafe fn test_mm512_mask_fcmul_round_pch() {
19270	let a = _mm512_set1_pch(`0.0`, `1.0`);
19271	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19272	let src = _mm512_setr_ph(
19273	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19274	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19275	`32.0`, `33.0`,
19276	);
19277	let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19278	src,
19279	`0b0101010101010101`,
19280	a,
19281	b,
19282	);
19283	let e = _mm512_setr_ph(
19284	`-1.0`, `0.0`, `4.0`, `5.0`, `-1.0`, `0.0`, `8.0`, `9.0`, `-1.0`, `0.0`, `12.0`, `13.0`, `-1.0`, `0.0`, `16.0`, `17.0`,
19285	`-1.0`, `0.0`, `20.0`, `21.0`, `-1.0`, `0.0`, `24.0`, `25.0`, `-1.0`, `0.0`, `28.0`, `29.0`, `-1.0`, `0.0`, `32.0`,
19286	`33.0`,
19287	);
19288	assert_eq_m512h(r, e);
19289	}
19290
19291	#[simd_test(enable = "avx512fp16")]
19292	unsafe fn test_mm512_maskz_fcmul_round_pch() {
19293	let a = _mm512_set1_pch(`0.0`, `1.0`);
19294	let b = _mm512_set1_pch(`0.0`, `-1.0`);
19295	let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19296	`0b0101010101010101`,
19297	a,
19298	b,
19299	);
19300	let e = _mm512_setr_ph(
19301	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19302	`-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`,
19303	);
19304	assert_eq_m512h(r, e);
19305	}
19306
19307	#[simd_test(enable = "avx512fp16")]
19308	unsafe fn test_mm_fcmul_sch() {
19309	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19310	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19311	let r = _mm_fcmul_sch(a, b);
19312	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19313	assert_eq_m128h(r, e);
19314	}
19315
19316	#[simd_test(enable = "avx512fp16")]
19317	unsafe fn test_mm_mask_fcmul_sch() {
19318	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19319	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19320	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19321	let r = _mm_mask_fcmul_sch(src, `0`, a, b);
19322	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19323	assert_eq_m128h(r, e);
19324	}
19325
19326	#[simd_test(enable = "avx512fp16")]
19327	unsafe fn test_mm_maskz_fcmul_sch() {
19328	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19329	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19330	let r = _mm_maskz_fcmul_sch(`0`, a, b);
19331	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19332	assert_eq_m128h(r, e);
19333	}
19334
19335	#[simd_test(enable = "avx512fp16")]
19336	unsafe fn test_mm_fcmul_round_sch() {
19337	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19338	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19339	let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
19340	let e = _mm_setr_ph(`-1.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19341	assert_eq_m128h(r, e);
19342	}
19343
19344	#[simd_test(enable = "avx512fp16")]
19345	unsafe fn test_mm_mask_fcmul_round_sch() {
19346	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19347	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19348	let src = _mm_setr_ph(`14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`);
19349	let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19350	src, `0`, a, b,
19351	);
19352	let e = _mm_setr_ph(`14.0`, `15.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19353	assert_eq_m128h(r, e);
19354	}
19355
19356	#[simd_test(enable = "avx512fp16")]
19357	unsafe fn test_mm_maskz_fcmul_round_sch() {
19358	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19359	let b = _mm_setr_ph(`0.0`, `-1.0`, `8.0`, `-9.0`, `10.0`, `-11.0`, `12.0`, `-13.0`);
19360	let r =
19361	_mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
19362	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19363	assert_eq_m128h(r, e);
19364	}
19365
19366	#[simd_test(enable = "avx512fp16,avx512vl")]
19367	unsafe fn test_mm_abs_ph() {
19368	let a = _mm_set_ph(`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`);
19369	let r = _mm_abs_ph(a);
19370	let e = _mm_set_ph(`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`);
19371	assert_eq_m128h(r, e);
19372	}
19373
19374	#[simd_test(enable = "avx512fp16,avx512vl")]
19375	unsafe fn test_mm256_abs_ph() {
19376	let a = _mm256_set_ph(
19377	`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`,
19378	`-14.0`,
19379	);
19380	let r = _mm256_abs_ph(a);
19381	let e = _mm256_set_ph(
19382	`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
19383	);
19384	assert_eq_m256h(r, e);
19385	}
19386
19387	#[simd_test(enable = "avx512fp16")]
19388	unsafe fn test_mm512_abs_ph() {
19389	let a = _mm512_set_ph(
19390	`-1.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`,
19391	`-14.0`, `15.0`, `-16.0`, `17.0`, `-18.0`, `19.0`, `-20.0`, `21.0`, `-22.0`, `23.0`, `-24.0`, `25.0`, `-26.0`,
19392	`27.0`, `-28.0`, `29.0`, `-30.0`,
19393	);
19394	let r = _mm512_abs_ph(a);
19395	let e = _mm512_set_ph(
19396	`1.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
19397	`15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`,
19398	`29.0`, `30.0`,
19399	);
19400	assert_eq_m512h(r, e);
19401	}
19402
19403	#[simd_test(enable = "avx512fp16,avx512vl")]
19404	unsafe fn test_mm_conj_pch() {
19405	let a = _mm_set1_pch(`0.0`, `1.0`);
19406	let r = _mm_conj_pch(a);
19407	let e = _mm_set1_pch(`0.0`, `-1.0`);
19408	assert_eq_m128h(r, e);
19409	}
19410
19411	#[simd_test(enable = "avx512fp16,avx512vl")]
19412	unsafe fn test_mm_mask_conj_pch() {
19413	let a = _mm_set1_pch(`0.0`, `1.0`);
19414	let src = _mm_setr_ph(`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`);
19415	let r = _mm_mask_conj_pch(src, `0b0101`, a);
19416	let e = _mm_setr_ph(`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`);
19417	assert_eq_m128h(r, e);
19418	}
19419
19420	#[simd_test(enable = "avx512fp16,avx512vl")]
19421	unsafe fn test_mm_maskz_conj_pch() {
19422	let a = _mm_set1_pch(`0.0`, `1.0`);
19423	let r = _mm_maskz_conj_pch(`0b0101`, a);
19424	let e = _mm_setr_ph(`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`);
19425	assert_eq_m128h(r, e);
19426	}
19427
19428	#[simd_test(enable = "avx512fp16,avx512vl")]
19429	unsafe fn test_mm256_conj_pch() {
19430	let a = _mm256_set1_pch(`0.0`, `1.0`);
19431	let r = _mm256_conj_pch(a);
19432	let e = _mm256_set1_pch(`0.0`, `-1.0`);
19433	assert_eq_m256h(r, e);
19434	}
19435
19436	#[simd_test(enable = "avx512fp16,avx512vl")]
19437	unsafe fn test_mm256_mask_conj_pch() {
19438	let a = _mm256_set1_pch(`0.0`, `1.0`);
19439	let src = _mm256_setr_ph(
19440	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19441	);
19442	let r = _mm256_mask_conj_pch(src, `0b01010101`, a);
19443	let e = _mm256_setr_ph(
19444	`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`, `0.0`, `-1.0`, `12.0`, `13.0`, `0.0`, `-1.0`, `16.0`, `17.0`,
19445	);
19446	assert_eq_m256h(r, e);
19447	}
19448
19449	#[simd_test(enable = "avx512fp16,avx512vl")]
19450	unsafe fn test_mm256_maskz_conj_pch() {
19451	let a = _mm256_set1_pch(`0.0`, `1.0`);
19452	let r = _mm256_maskz_conj_pch(`0b01010101`, a);
19453	let e = _mm256_setr_ph(
19454	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19455	);
19456	assert_eq_m256h(r, e);
19457	}
19458
19459	#[simd_test(enable = "avx512fp16")]
19460	unsafe fn test_mm512_conj_pch() {
19461	let a = _mm512_set1_pch(`0.0`, `1.0`);
19462	let r = _mm512_conj_pch(a);
19463	let e = _mm512_set1_pch(`0.0`, `-1.0`);
19464	assert_eq_m512h(r, e);
19465	}
19466
19467	#[simd_test(enable = "avx512fp16")]
19468	unsafe fn test_mm512_mask_conj_pch() {
19469	let a = _mm512_set1_pch(`0.0`, `1.0`);
19470	let src = _mm512_setr_ph(
19471	`2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`,
19472	`18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`, `31.0`,
19473	`32.0`, `33.0`,
19474	);
19475	let r = _mm512_mask_conj_pch(src, `0b0101010101010101`, a);
19476	let e = _mm512_setr_ph(
19477	`0.0`, `-1.0`, `4.0`, `5.0`, `0.0`, `-1.0`, `8.0`, `9.0`, `0.0`, `-1.0`, `12.0`, `13.0`, `0.0`, `-1.0`, `16.0`, `17.0`,
19478	`0.0`, `-1.0`, `20.0`, `21.0`, `0.0`, `-1.0`, `24.0`, `25.0`, `0.0`, `-1.0`, `28.0`, `29.0`, `0.0`, `-1.0`, `32.0`,
19479	`33.0`,
19480	);
19481	assert_eq_m512h(r, e);
19482	}
19483
19484	#[simd_test(enable = "avx512fp16")]
19485	unsafe fn test_mm512_maskz_conj_pch() {
19486	let a = _mm512_set1_pch(`0.0`, `1.0`);
19487	let r = _mm512_maskz_conj_pch(`0b0101010101010101`, a);
19488	let e = _mm512_setr_ph(
19489	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19490	`0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`, `0.0`, `-1.0`, `0.0`, `0.0`,
19491	);
19492	assert_eq_m512h(r, e);
19493	}
19494
19495	#[simd_test(enable = "avx512fp16,avx512vl")]
19496	unsafe fn test_mm_fmadd_pch() {
19497	let a = _mm_set1_pch(`0.0`, `1.0`);
19498	let b = _mm_set1_pch(`0.0`, `2.0`);
19499	let c = _mm_set1_pch(`0.0`, `3.0`);
19500	let r = _mm_fmadd_pch(a, b, c);
19501	let e = _mm_set1_pch(`-2.0`, `3.0`);
19502	assert_eq_m128h(r, e);
19503	}
19504
19505	#[simd_test(enable = "avx512fp16,avx512vl")]
19506	unsafe fn test_mm_mask_fmadd_pch() {
19507	let a = _mm_set1_pch(`0.0`, `1.0`);
19508	let b = _mm_set1_pch(`0.0`, `2.0`);
19509	let c = _mm_set1_pch(`0.0`, `3.0`);
19510	let r = _mm_mask_fmadd_pch(a, `0b0101`, b, c);
19511	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`);
19512	assert_eq_m128h(r, e);
19513	}
19514
19515	#[simd_test(enable = "avx512fp16,avx512vl")]
19516	unsafe fn test_mm_mask3_fmadd_pch() {
19517	let a = _mm_set1_pch(`0.0`, `1.0`);
19518	let b = _mm_set1_pch(`0.0`, `2.0`);
19519	let c = _mm_set1_pch(`0.0`, `3.0`);
19520	let r = _mm_mask3_fmadd_pch(a, b, c, `0b0101`);
19521	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`);
19522	assert_eq_m128h(r, e);
19523	}
19524
19525	#[simd_test(enable = "avx512fp16,avx512vl")]
19526	unsafe fn test_mm_maskz_fmadd_pch() {
19527	let a = _mm_set1_pch(`0.0`, `1.0`);
19528	let b = _mm_set1_pch(`0.0`, `2.0`);
19529	let c = _mm_set1_pch(`0.0`, `3.0`);
19530	let r = _mm_maskz_fmadd_pch(`0b0101`, a, b, c);
19531	let e = _mm_setr_ph(`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`);
19532	assert_eq_m128h(r, e);
19533	}
19534
19535	#[simd_test(enable = "avx512fp16,avx512vl")]
19536	unsafe fn test_mm256_fmadd_pch() {
19537	let a = _mm256_set1_pch(`0.0`, `1.0`);
19538	let b = _mm256_set1_pch(`0.0`, `2.0`);
19539	let c = _mm256_set1_pch(`0.0`, `3.0`);
19540	let r = _mm256_fmadd_pch(a, b, c);
19541	let e = _mm256_set1_pch(`-2.0`, `3.0`);
19542	assert_eq_m256h(r, e);
19543	}
19544
19545	#[simd_test(enable = "avx512fp16,avx512vl")]
19546	unsafe fn test_mm256_mask_fmadd_pch() {
19547	let a = _mm256_set1_pch(`0.0`, `1.0`);
19548	let b = _mm256_set1_pch(`0.0`, `2.0`);
19549	let c = _mm256_set1_pch(`0.0`, `3.0`);
19550	let r = _mm256_mask_fmadd_pch(a, `0b01010101`, b, c);
19551	let e = _mm256_setr_ph(
19552	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19553	);
19554	assert_eq_m256h(r, e);
19555	}
19556
19557	#[simd_test(enable = "avx512fp16,avx512vl")]
19558	unsafe fn test_mm256_mask3_fmadd_pch() {
19559	let a = _mm256_set1_pch(`0.0`, `1.0`);
19560	let b = _mm256_set1_pch(`0.0`, `2.0`);
19561	let c = _mm256_set1_pch(`0.0`, `3.0`);
19562	let r = _mm256_mask3_fmadd_pch(a, b, c, `0b01010101`);
19563	let e = _mm256_setr_ph(
19564	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19565	);
19566	assert_eq_m256h(r, e);
19567	}
19568
19569	#[simd_test(enable = "avx512fp16,avx512vl")]
19570	unsafe fn test_mm256_maskz_fmadd_pch() {
19571	let a = _mm256_set1_pch(`0.0`, `1.0`);
19572	let b = _mm256_set1_pch(`0.0`, `2.0`);
19573	let c = _mm256_set1_pch(`0.0`, `3.0`);
19574	let r = _mm256_maskz_fmadd_pch(`0b01010101`, a, b, c);
19575	let e = _mm256_setr_ph(
19576	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19577	);
19578	assert_eq_m256h(r, e);
19579	}
19580
19581	#[simd_test(enable = "avx512fp16")]
19582	unsafe fn test_mm512_fmadd_pch() {
19583	let a = _mm512_set1_pch(`0.0`, `1.0`);
19584	let b = _mm512_set1_pch(`0.0`, `2.0`);
19585	let c = _mm512_set1_pch(`0.0`, `3.0`);
19586	let r = _mm512_fmadd_pch(a, b, c);
19587	let e = _mm512_set1_pch(`-2.0`, `3.0`);
19588	assert_eq_m512h(r, e);
19589	}
19590
19591	#[simd_test(enable = "avx512fp16")]
19592	unsafe fn test_mm512_mask_fmadd_pch() {
19593	let a = _mm512_set1_pch(`0.0`, `1.0`);
19594	let b = _mm512_set1_pch(`0.0`, `2.0`);
19595	let c = _mm512_set1_pch(`0.0`, `3.0`);
19596	let r = _mm512_mask_fmadd_pch(a, `0b0101010101010101`, b, c);
19597	let e = _mm512_setr_ph(
19598	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19599	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19600	);
19601	assert_eq_m512h(r, e);
19602	}
19603
19604	#[simd_test(enable = "avx512fp16")]
19605	unsafe fn test_mm512_mask3_fmadd_pch() {
19606	let a = _mm512_set1_pch(`0.0`, `1.0`);
19607	let b = _mm512_set1_pch(`0.0`, `2.0`);
19608	let c = _mm512_set1_pch(`0.0`, `3.0`);
19609	let r = _mm512_mask3_fmadd_pch(a, b, c, `0b0101010101010101`);
19610	let e = _mm512_setr_ph(
19611	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19612	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19613	);
19614	assert_eq_m512h(r, e);
19615	}
19616
19617	#[simd_test(enable = "avx512fp16")]
19618	unsafe fn test_mm512_maskz_fmadd_pch() {
19619	let a = _mm512_set1_pch(`0.0`, `1.0`);
19620	let b = _mm512_set1_pch(`0.0`, `2.0`);
19621	let c = _mm512_set1_pch(`0.0`, `3.0`);
19622	let r = _mm512_maskz_fmadd_pch(`0b0101010101010101`, a, b, c);
19623	let e = _mm512_setr_ph(
19624	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19625	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19626	);
19627	assert_eq_m512h(r, e);
19628	}
19629
19630	#[simd_test(enable = "avx512fp16")]
19631	unsafe fn test_mm512_fmadd_round_pch() {
19632	let a = _mm512_set1_pch(`0.0`, `1.0`);
19633	let b = _mm512_set1_pch(`0.0`, `2.0`);
19634	let c = _mm512_set1_pch(`0.0`, `3.0`);
19635	let r =
19636	_mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19637	let e = _mm512_set1_pch(`-2.0`, `3.0`);
19638	assert_eq_m512h(r, e);
19639	}
19640
19641	#[simd_test(enable = "avx512fp16")]
19642	unsafe fn test_mm512_mask_fmadd_round_pch() {
19643	let a = _mm512_set1_pch(`0.0`, `1.0`);
19644	let b = _mm512_set1_pch(`0.0`, `2.0`);
19645	let c = _mm512_set1_pch(`0.0`, `3.0`);
19646	let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19647	a,
19648	`0b0101010101010101`,
19649	b,
19650	c,
19651	);
19652	let e = _mm512_setr_ph(
19653	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19654	`-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`, `-2.0`, `3.0`, `0.0`, `1.0`,
19655	);
19656	assert_eq_m512h(r, e);
19657	}
19658
19659	#[simd_test(enable = "avx512fp16")]
19660	unsafe fn test_mm512_mask3_fmadd_round_pch() {
19661	let a = _mm512_set1_pch(`0.0`, `1.0`);
19662	let b = _mm512_set1_pch(`0.0`, `2.0`);
19663	let c = _mm512_set1_pch(`0.0`, `3.0`);
19664	let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19665	a,
19666	b,
19667	c,
19668	`0b0101010101010101`,
19669	);
19670	let e = _mm512_setr_ph(
19671	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19672	`-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`, `-2.0`, `3.0`, `0.0`, `3.0`,
19673	);
19674	assert_eq_m512h(r, e);
19675	}
19676
19677	#[simd_test(enable = "avx512fp16")]
19678	unsafe fn test_mm512_maskz_fmadd_round_pch() {
19679	let a = _mm512_set1_pch(`0.0`, `1.0`);
19680	let b = _mm512_set1_pch(`0.0`, `2.0`);
19681	let c = _mm512_set1_pch(`0.0`, `3.0`);
19682	let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19683	`0b0101010101010101`,
19684	a,
19685	b,
19686	c,
19687	);
19688	let e = _mm512_setr_ph(
19689	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19690	`-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`, `-2.0`, `3.0`, `0.0`, `0.0`,
19691	);
19692	assert_eq_m512h(r, e);
19693	}
19694
19695	#[simd_test(enable = "avx512fp16")]
19696	unsafe fn test_mm_fmadd_sch() {
19697	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19698	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19699	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19700	let r = _mm_fmadd_sch(a, b, c);
19701	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19702	assert_eq_m128h(r, e);
19703	}
19704
19705	#[simd_test(enable = "avx512fp16")]
19706	unsafe fn test_mm_mask_fmadd_sch() {
19707	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19708	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19709	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19710	let r = _mm_mask_fmadd_sch(a, `0`, b, c);
19711	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19712	assert_eq_m128h(r, e);
19713	let r = _mm_mask_fmadd_sch(a, `1`, b, c);
19714	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19715	assert_eq_m128h(r, e);
19716	}
19717
19718	#[simd_test(enable = "avx512fp16")]
19719	unsafe fn test_mm_mask3_fmadd_sch() {
19720	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19721	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19722	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19723	let r = _mm_mask3_fmadd_sch(a, b, c, `0`);
19724	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19725	assert_eq_m128h(r, e);
19726	let r = _mm_mask3_fmadd_sch(a, b, c, `1`);
19727	let e = _mm_setr_ph(`-2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19728	assert_eq_m128h(r, e);
19729	}
19730
19731	#[simd_test(enable = "avx512fp16")]
19732	unsafe fn test_mm_maskz_fmadd_sch() {
19733	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19734	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19735	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19736	let r = _mm_maskz_fmadd_sch(`0`, a, b, c);
19737	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19738	assert_eq_m128h(r, e);
19739	let r = _mm_maskz_fmadd_sch(`1`, a, b, c);
19740	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19741	assert_eq_m128h(r, e);
19742	}
19743
19744	#[simd_test(enable = "avx512fp16")]
19745	unsafe fn test_mm_fmadd_round_sch() {
19746	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19747	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19748	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19749	let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19750	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19751	assert_eq_m128h(r, e);
19752	}
19753
19754	#[simd_test(enable = "avx512fp16")]
19755	unsafe fn test_mm_mask_fmadd_round_sch() {
19756	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19757	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19758	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19759	let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19760	a, `0`, b, c,
19761	);
19762	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19763	assert_eq_m128h(r, e);
19764	let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19765	a, `1`, b, c,
19766	);
19767	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19768	assert_eq_m128h(r, e);
19769	}
19770
19771	#[simd_test(enable = "avx512fp16")]
19772	unsafe fn test_mm_mask3_fmadd_round_sch() {
19773	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19774	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19775	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19776	let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19777	a, b, c, `0`,
19778	);
19779	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19780	assert_eq_m128h(r, e);
19781	let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19782	a, b, c, `1`,
19783	);
19784	let e = _mm_setr_ph(`-2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19785	assert_eq_m128h(r, e);
19786	}
19787
19788	#[simd_test(enable = "avx512fp16")]
19789	unsafe fn test_mm_maskz_fmadd_round_sch() {
19790	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19791	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
19792	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
19793	let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19794	`0`, a, b, c,
19795	);
19796	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19797	assert_eq_m128h(r, e);
19798	let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19799	`1`, a, b, c,
19800	);
19801	let e = _mm_setr_ph(`-2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
19802	assert_eq_m128h(r, e);
19803	}
19804
19805	#[simd_test(enable = "avx512fp16,avx512vl")]
19806	unsafe fn test_mm_fcmadd_pch() {
19807	let a = _mm_set1_pch(`0.0`, `1.0`);
19808	let b = _mm_set1_pch(`0.0`, `2.0`);
19809	let c = _mm_set1_pch(`0.0`, `3.0`);
19810	let r = _mm_fcmadd_pch(a, b, c);
19811	let e = _mm_set1_pch(`2.0`, `3.0`);
19812	assert_eq_m128h(r, e);
19813	}
19814
19815	#[simd_test(enable = "avx512fp16,avx512vl")]
19816	unsafe fn test_mm_mask_fcmadd_pch() {
19817	let a = _mm_set1_pch(`0.0`, `1.0`);
19818	let b = _mm_set1_pch(`0.0`, `2.0`);
19819	let c = _mm_set1_pch(`0.0`, `3.0`);
19820	let r = _mm_mask_fcmadd_pch(a, `0b0101`, b, c);
19821	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`);
19822	assert_eq_m128h(r, e);
19823	}
19824
19825	#[simd_test(enable = "avx512fp16,avx512vl")]
19826	unsafe fn test_mm_mask3_fcmadd_pch() {
19827	let a = _mm_set1_pch(`0.0`, `1.0`);
19828	let b = _mm_set1_pch(`0.0`, `2.0`);
19829	let c = _mm_set1_pch(`0.0`, `3.0`);
19830	let r = _mm_mask3_fcmadd_pch(a, b, c, `0b0101`);
19831	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`);
19832	assert_eq_m128h(r, e);
19833	}
19834
19835	#[simd_test(enable = "avx512fp16,avx512vl")]
19836	unsafe fn test_mm_maskz_fcmadd_pch() {
19837	let a = _mm_set1_pch(`0.0`, `1.0`);
19838	let b = _mm_set1_pch(`0.0`, `2.0`);
19839	let c = _mm_set1_pch(`0.0`, `3.0`);
19840	let r = _mm_maskz_fcmadd_pch(`0b0101`, a, b, c);
19841	let e = _mm_setr_ph(`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`);
19842	assert_eq_m128h(r, e);
19843	}
19844
19845	#[simd_test(enable = "avx512fp16,avx512vl")]
19846	unsafe fn test_mm256_fcmadd_pch() {
19847	let a = _mm256_set1_pch(`0.0`, `1.0`);
19848	let b = _mm256_set1_pch(`0.0`, `2.0`);
19849	let c = _mm256_set1_pch(`0.0`, `3.0`);
19850	let r = _mm256_fcmadd_pch(a, b, c);
19851	let e = _mm256_set1_pch(`2.0`, `3.0`);
19852	assert_eq_m256h(r, e);
19853	}
19854
19855	#[simd_test(enable = "avx512fp16,avx512vl")]
19856	unsafe fn test_mm256_mask_fcmadd_pch() {
19857	let a = _mm256_set1_pch(`0.0`, `1.0`);
19858	let b = _mm256_set1_pch(`0.0`, `2.0`);
19859	let c = _mm256_set1_pch(`0.0`, `3.0`);
19860	let r = _mm256_mask_fcmadd_pch(a, `0b01010101`, b, c);
19861	let e = _mm256_setr_ph(
19862	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19863	);
19864	assert_eq_m256h(r, e);
19865	}
19866
19867	#[simd_test(enable = "avx512fp16,avx512vl")]
19868	unsafe fn test_mm256_mask3_fcmadd_pch() {
19869	let a = _mm256_set1_pch(`0.0`, `1.0`);
19870	let b = _mm256_set1_pch(`0.0`, `2.0`);
19871	let c = _mm256_set1_pch(`0.0`, `3.0`);
19872	let r = _mm256_mask3_fcmadd_pch(a, b, c, `0b01010101`);
19873	let e = _mm256_setr_ph(
19874	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19875	);
19876	assert_eq_m256h(r, e);
19877	}
19878
19879	#[simd_test(enable = "avx512fp16,avx512vl")]
19880	unsafe fn test_mm256_maskz_fcmadd_pch() {
19881	let a = _mm256_set1_pch(`0.0`, `1.0`);
19882	let b = _mm256_set1_pch(`0.0`, `2.0`);
19883	let c = _mm256_set1_pch(`0.0`, `3.0`);
19884	let r = _mm256_maskz_fcmadd_pch(`0b01010101`, a, b, c);
19885	let e = _mm256_setr_ph(
19886	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
19887	);
19888	assert_eq_m256h(r, e);
19889	}
19890
19891	#[simd_test(enable = "avx512fp16")]
19892	unsafe fn test_mm512_fcmadd_pch() {
19893	let a = _mm512_set1_pch(`0.0`, `1.0`);
19894	let b = _mm512_set1_pch(`0.0`, `2.0`);
19895	let c = _mm512_set1_pch(`0.0`, `3.0`);
19896	let r = _mm512_fcmadd_pch(a, b, c);
19897	let e = _mm512_set1_pch(`2.0`, `3.0`);
19898	assert_eq_m512h(r, e);
19899	}
19900
19901	#[simd_test(enable = "avx512fp16")]
19902	unsafe fn test_mm512_mask_fcmadd_pch() {
19903	let a = _mm512_set1_pch(`0.0`, `1.0`);
19904	let b = _mm512_set1_pch(`0.0`, `2.0`);
19905	let c = _mm512_set1_pch(`0.0`, `3.0`);
19906	let r = _mm512_mask_fcmadd_pch(a, `0b0101010101010101`, b, c);
19907	let e = _mm512_setr_ph(
19908	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`,
19909	`3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19910	);
19911	assert_eq_m512h(r, e);
19912	}
19913
19914	#[simd_test(enable = "avx512fp16")]
19915	unsafe fn test_mm512_mask3_fcmadd_pch() {
19916	let a = _mm512_set1_pch(`0.0`, `1.0`);
19917	let b = _mm512_set1_pch(`0.0`, `2.0`);
19918	let c = _mm512_set1_pch(`0.0`, `3.0`);
19919	let r = _mm512_mask3_fcmadd_pch(a, b, c, `0b0101010101010101`);
19920	let e = _mm512_setr_ph(
19921	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`,
19922	`3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19923	);
19924	assert_eq_m512h(r, e);
19925	}
19926
19927	#[simd_test(enable = "avx512fp16")]
19928	unsafe fn test_mm512_maskz_fcmadd_pch() {
19929	let a = _mm512_set1_pch(`0.0`, `1.0`);
19930	let b = _mm512_set1_pch(`0.0`, `2.0`);
19931	let c = _mm512_set1_pch(`0.0`, `3.0`);
19932	let r = _mm512_maskz_fcmadd_pch(`0b0101010101010101`, a, b, c);
19933	let e = _mm512_setr_ph(
19934	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`,
19935	`3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
19936	);
19937	assert_eq_m512h(r, e);
19938	}
19939
19940	#[simd_test(enable = "avx512fp16")]
19941	unsafe fn test_mm512_fcmadd_round_pch() {
19942	let a = _mm512_set1_pch(`0.0`, `1.0`);
19943	let b = _mm512_set1_pch(`0.0`, `2.0`);
19944	let c = _mm512_set1_pch(`0.0`, `3.0`);
19945	let r =
19946	_mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
19947	let e = _mm512_set1_pch(`2.0`, `3.0`);
19948	assert_eq_m512h(r, e);
19949	}
19950
19951	#[simd_test(enable = "avx512fp16")]
19952	unsafe fn test_mm512_mask_fcmadd_round_pch() {
19953	let a = _mm512_set1_pch(`0.0`, `1.0`);
19954	let b = _mm512_set1_pch(`0.0`, `2.0`);
19955	let c = _mm512_set1_pch(`0.0`, `3.0`);
19956	let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19957	a,
19958	`0b0101010101010101`,
19959	b,
19960	c,
19961	);
19962	let e = _mm512_setr_ph(
19963	`2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`,
19964	`3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`, `2.0`, `3.0`, `0.0`, `1.0`,
19965	);
19966	assert_eq_m512h(r, e);
19967	}
19968
19969	#[simd_test(enable = "avx512fp16")]
19970	unsafe fn test_mm512_mask3_fcmadd_round_pch() {
19971	let a = _mm512_set1_pch(`0.0`, `1.0`);
19972	let b = _mm512_set1_pch(`0.0`, `2.0`);
19973	let c = _mm512_set1_pch(`0.0`, `3.0`);
19974	let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19975	a,
19976	b,
19977	c,
19978	`0b0101010101010101`,
19979	);
19980	let e = _mm512_setr_ph(
19981	`2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`,
19982	`3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`, `2.0`, `3.0`, `0.0`, `3.0`,
19983	);
19984	assert_eq_m512h(r, e);
19985	}
19986
19987	#[simd_test(enable = "avx512fp16")]
19988	unsafe fn test_mm512_maskz_fcmadd_round_pch() {
19989	let a = _mm512_set1_pch(`0.0`, `1.0`);
19990	let b = _mm512_set1_pch(`0.0`, `2.0`);
19991	let c = _mm512_set1_pch(`0.0`, `3.0`);
19992	let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
19993	`0b0101010101010101`,
19994	a,
19995	b,
19996	c,
19997	);
19998	let e = _mm512_setr_ph(
19999	`2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`,
20000	`3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`, `2.0`, `3.0`, `0.0`, `0.0`,
20001	);
20002	assert_eq_m512h(r, e);
20003	}
20004
20005	#[simd_test(enable = "avx512fp16")]
20006	unsafe fn test_mm_fcmadd_sch() {
20007	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20008	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20009	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20010	let r = _mm_fcmadd_sch(a, b, c);
20011	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20012	assert_eq_m128h(r, e);
20013	}
20014
20015	#[simd_test(enable = "avx512fp16")]
20016	unsafe fn test_mm_mask_fcmadd_sch() {
20017	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20018	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20019	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20020	let r = _mm_mask_fcmadd_sch(a, `0`, b, c);
20021	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20022	assert_eq_m128h(r, e);
20023	let r = _mm_mask_fcmadd_sch(a, `1`, b, c);
20024	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20025	assert_eq_m128h(r, e);
20026	}
20027
20028	#[simd_test(enable = "avx512fp16")]
20029	unsafe fn test_mm_mask3_fcmadd_sch() {
20030	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20031	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20032	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20033	let r = _mm_mask3_fcmadd_sch(a, b, c, `0`);
20034	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20035	assert_eq_m128h(r, e);
20036	let r = _mm_mask3_fcmadd_sch(a, b, c, `1`);
20037	let e = _mm_setr_ph(`2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20038	assert_eq_m128h(r, e);
20039	}
20040
20041	#[simd_test(enable = "avx512fp16")]
20042	unsafe fn test_mm_maskz_fcmadd_sch() {
20043	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20044	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20045	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20046	let r = _mm_maskz_fcmadd_sch(`0`, a, b, c);
20047	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20048	assert_eq_m128h(r, e);
20049	let r = _mm_maskz_fcmadd_sch(`1`, a, b, c);
20050	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20051	assert_eq_m128h(r, e);
20052	}
20053
20054	#[simd_test(enable = "avx512fp16")]
20055	unsafe fn test_mm_fcmadd_round_sch() {
20056	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20057	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20058	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20059	let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20060	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20061	assert_eq_m128h(r, e);
20062	}
20063
20064	#[simd_test(enable = "avx512fp16")]
20065	unsafe fn test_mm_mask_fcmadd_round_sch() {
20066	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20067	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20068	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20069	let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20070	a, `0`, b, c,
20071	);
20072	let e = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20073	assert_eq_m128h(r, e);
20074	let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20075	a, `1`, b, c,
20076	);
20077	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20078	assert_eq_m128h(r, e);
20079	}
20080
20081	#[simd_test(enable = "avx512fp16")]
20082	unsafe fn test_mm_mask3_fcmadd_round_sch() {
20083	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20084	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20085	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20086	let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20087	a, b, c, `0`,
20088	);
20089	let e = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20090	assert_eq_m128h(r, e);
20091	let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20092	a, b, c, `1`,
20093	);
20094	let e = _mm_setr_ph(`2.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20095	assert_eq_m128h(r, e);
20096	}
20097
20098	#[simd_test(enable = "avx512fp16")]
20099	unsafe fn test_mm_maskz_fcmadd_round_sch() {
20100	let a = _mm_setr_ph(`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20101	let b = _mm_setr_ph(`0.0`, `2.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`);
20102	let c = _mm_setr_ph(`0.0`, `3.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`);
20103	let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20104	`0`, a, b, c,
20105	);
20106	let e = _mm_setr_ph(`0.0`, `0.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20107	assert_eq_m128h(r, e);
20108	let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20109	`1`, a, b, c,
20110	);
20111	let e = _mm_setr_ph(`2.0`, `3.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`);
20112	assert_eq_m128h(r, e);
20113	}
20114
20115	#[simd_test(enable = "avx512fp16,avx512vl")]
20116	unsafe fn test_mm_fmadd_ph() {
20117	let a = _mm_set1_ph(`1.0`);
20118	let b = _mm_set1_ph(`2.0`);
20119	let c = _mm_set1_ph(`3.0`);
20120	let r = _mm_fmadd_ph(a, b, c);
20121	let e = _mm_set1_ph(`5.0`);
20122	assert_eq_m128h(r, e);
20123	}
20124
20125	#[simd_test(enable = "avx512fp16,avx512vl")]
20126	unsafe fn test_mm_mask_fmadd_ph() {
20127	let a = _mm_set1_ph(`1.0`);
20128	let b = _mm_set1_ph(`2.0`);
20129	let c = _mm_set1_ph(`3.0`);
20130	let r = _mm_mask_fmadd_ph(a, `0b01010101`, b, c);
20131	let e = _mm_set_ph(`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`);
20132	assert_eq_m128h(r, e);
20133	}
20134
20135	#[simd_test(enable = "avx512fp16,avx512vl")]
20136	unsafe fn test_mm_mask3_fmadd_ph() {
20137	let a = _mm_set1_ph(`1.0`);
20138	let b = _mm_set1_ph(`2.0`);
20139	let c = _mm_set1_ph(`3.0`);
20140	let r = _mm_mask3_fmadd_ph(a, b, c, `0b01010101`);
20141	let e = _mm_set_ph(`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`);
20142	assert_eq_m128h(r, e);
20143	}
20144
20145	#[simd_test(enable = "avx512fp16,avx512vl")]
20146	unsafe fn test_mm_maskz_fmadd_ph() {
20147	let a = _mm_set1_ph(`1.0`);
20148	let b = _mm_set1_ph(`2.0`);
20149	let c = _mm_set1_ph(`3.0`);
20150	let r = _mm_maskz_fmadd_ph(`0b01010101`, a, b, c);
20151	let e = _mm_set_ph(`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`);
20152	assert_eq_m128h(r, e);
20153	}
20154
20155	#[simd_test(enable = "avx512fp16,avx512vl")]
20156	unsafe fn test_mm256_fmadd_ph() {
20157	let a = _mm256_set1_ph(`1.0`);
20158	let b = _mm256_set1_ph(`2.0`);
20159	let c = _mm256_set1_ph(`3.0`);
20160	let r = _mm256_fmadd_ph(a, b, c);
20161	let e = _mm256_set1_ph(`5.0`);
20162	assert_eq_m256h(r, e);
20163	}
20164
20165	#[simd_test(enable = "avx512fp16,avx512vl")]
20166	unsafe fn test_mm256_mask_fmadd_ph() {
20167	let a = _mm256_set1_ph(`1.0`);
20168	let b = _mm256_set1_ph(`2.0`);
20169	let c = _mm256_set1_ph(`3.0`);
20170	let r = _mm256_mask_fmadd_ph(a, `0b0101010101010101`, b, c);
20171	let e = _mm256_set_ph(
20172	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20173	);
20174	assert_eq_m256h(r, e);
20175	}
20176
20177	#[simd_test(enable = "avx512fp16,avx512vl")]
20178	unsafe fn test_mm256_mask3_fmadd_ph() {
20179	let a = _mm256_set1_ph(`1.0`);
20180	let b = _mm256_set1_ph(`2.0`);
20181	let c = _mm256_set1_ph(`3.0`);
20182	let r = _mm256_mask3_fmadd_ph(a, b, c, `0b0101010101010101`);
20183	let e = _mm256_set_ph(
20184	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20185	);
20186	assert_eq_m256h(r, e);
20187	}
20188
20189	#[simd_test(enable = "avx512fp16,avx512vl")]
20190	unsafe fn test_mm256_maskz_fmadd_ph() {
20191	let a = _mm256_set1_ph(`1.0`);
20192	let b = _mm256_set1_ph(`2.0`);
20193	let c = _mm256_set1_ph(`3.0`);
20194	let r = _mm256_maskz_fmadd_ph(`0b0101010101010101`, a, b, c);
20195	let e = _mm256_set_ph(
20196	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20197	);
20198	assert_eq_m256h(r, e);
20199	}
20200
20201	#[simd_test(enable = "avx512fp16")]
20202	unsafe fn test_mm512_fmadd_ph() {
20203	let a = _mm512_set1_ph(`1.0`);
20204	let b = _mm512_set1_ph(`2.0`);
20205	let c = _mm512_set1_ph(`3.0`);
20206	let r = _mm512_fmadd_ph(a, b, c);
20207	let e = _mm512_set1_ph(`5.0`);
20208	assert_eq_m512h(r, e);
20209	}
20210
20211	#[simd_test(enable = "avx512fp16")]
20212	unsafe fn test_mm512_mask_fmadd_ph() {
20213	let a = _mm512_set1_ph(`1.0`);
20214	let b = _mm512_set1_ph(`2.0`);
20215	let c = _mm512_set1_ph(`3.0`);
20216	let r = _mm512_mask_fmadd_ph(a, `0b01010101010101010101010101010101`, b, c);
20217	let e = _mm512_set_ph(
20218	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`,
20219	`5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20220	);
20221	assert_eq_m512h(r, e);
20222	}
20223
20224	#[simd_test(enable = "avx512fp16")]
20225	unsafe fn test_mm512_mask3_fmadd_ph() {
20226	let a = _mm512_set1_ph(`1.0`);
20227	let b = _mm512_set1_ph(`2.0`);
20228	let c = _mm512_set1_ph(`3.0`);
20229	let r = _mm512_mask3_fmadd_ph(a, b, c, `0b01010101010101010101010101010101`);
20230	let e = _mm512_set_ph(
20231	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`,
20232	`5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20233	);
20234	assert_eq_m512h(r, e);
20235	}
20236
20237	#[simd_test(enable = "avx512fp16")]
20238	unsafe fn test_mm512_maskz_fmadd_ph() {
20239	let a = _mm512_set1_ph(`1.0`);
20240	let b = _mm512_set1_ph(`2.0`);
20241	let c = _mm512_set1_ph(`3.0`);
20242	let r = _mm512_maskz_fmadd_ph(`0b01010101010101010101010101010101`, a, b, c);
20243	let e = _mm512_set_ph(
20244	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`,
20245	`5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20246	);
20247	assert_eq_m512h(r, e);
20248	}
20249
20250	#[simd_test(enable = "avx512fp16")]
20251	unsafe fn test_mm512_fmadd_round_ph() {
20252	let a = _mm512_set1_ph(`1.0`);
20253	let b = _mm512_set1_ph(`2.0`);
20254	let c = _mm512_set1_ph(`3.0`);
20255	let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20256	let e = _mm512_set1_ph(`5.0`);
20257	assert_eq_m512h(r, e);
20258	}
20259
20260	#[simd_test(enable = "avx512fp16")]
20261	unsafe fn test_mm512_mask_fmadd_round_ph() {
20262	let a = _mm512_set1_ph(`1.0`);
20263	let b = _mm512_set1_ph(`2.0`);
20264	let c = _mm512_set1_ph(`3.0`);
20265	let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20266	a,
20267	`0b01010101010101010101010101010101`,
20268	b,
20269	c,
20270	);
20271	let e = _mm512_set_ph(
20272	`1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`,
20273	`5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`, `1.0`, `5.0`,
20274	);
20275	assert_eq_m512h(r, e);
20276	}
20277
20278	#[simd_test(enable = "avx512fp16")]
20279	unsafe fn test_mm512_mask3_fmadd_round_ph() {
20280	let a = _mm512_set1_ph(`1.0`);
20281	let b = _mm512_set1_ph(`2.0`);
20282	let c = _mm512_set1_ph(`3.0`);
20283	let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20284	a,
20285	b,
20286	c,
20287	`0b01010101010101010101010101010101`,
20288	);
20289	let e = _mm512_set_ph(
20290	`3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`,
20291	`5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`, `3.0`, `5.0`,
20292	);
20293	assert_eq_m512h(r, e);
20294	}
20295
20296	#[simd_test(enable = "avx512fp16")]
20297	unsafe fn test_mm512_maskz_fmadd_round_ph() {
20298	let a = _mm512_set1_ph(`1.0`);
20299	let b = _mm512_set1_ph(`2.0`);
20300	let c = _mm512_set1_ph(`3.0`);
20301	let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20302	`0b01010101010101010101010101010101`,
20303	a,
20304	b,
20305	c,
20306	);
20307	let e = _mm512_set_ph(
20308	`0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`,
20309	`5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`, `0.0`, `5.0`,
20310	);
20311	assert_eq_m512h(r, e);
20312	}
20313
20314	#[simd_test(enable = "avx512fp16")]
20315	unsafe fn test_mm_fmadd_sh() {
20316	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20317	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20318	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20319	let r = _mm_fmadd_sh(a, b, c);
20320	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20321	assert_eq_m128h(r, e);
20322	}
20323
20324	#[simd_test(enable = "avx512fp16")]
20325	unsafe fn test_mm_mask_fmadd_sh() {
20326	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20327	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20328	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20329	let r = _mm_mask_fmadd_sh(a, `0`, b, c);
20330	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20331	assert_eq_m128h(r, e);
20332	let r = _mm_mask_fmadd_sh(a, `1`, b, c);
20333	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20334	assert_eq_m128h(r, e);
20335	}
20336
20337	#[simd_test(enable = "avx512fp16")]
20338	unsafe fn test_mm_mask3_fmadd_sh() {
20339	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20340	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20341	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20342	let r = _mm_mask3_fmadd_sh(a, b, c, `0`);
20343	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20344	assert_eq_m128h(r, e);
20345	let r = _mm_mask3_fmadd_sh(a, b, c, `1`);
20346	let e = _mm_setr_ph(`5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20347	assert_eq_m128h(r, e);
20348	}
20349
20350	#[simd_test(enable = "avx512fp16")]
20351	unsafe fn test_mm_maskz_fmadd_sh() {
20352	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20353	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20354	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20355	let r = _mm_maskz_fmadd_sh(`0`, a, b, c);
20356	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20357	assert_eq_m128h(r, e);
20358	let r = _mm_maskz_fmadd_sh(`1`, a, b, c);
20359	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20360	assert_eq_m128h(r, e);
20361	}
20362
20363	#[simd_test(enable = "avx512fp16")]
20364	unsafe fn test_mm_fmadd_round_sh() {
20365	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20366	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20367	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20368	let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20369	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20370	assert_eq_m128h(r, e);
20371	}
20372
20373	#[simd_test(enable = "avx512fp16")]
20374	unsafe fn test_mm_mask_fmadd_round_sh() {
20375	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20376	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20377	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20378	let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20379	a, `0`, b, c,
20380	);
20381	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20382	assert_eq_m128h(r, e);
20383	let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20384	a, `1`, b, c,
20385	);
20386	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20387	assert_eq_m128h(r, e);
20388	}
20389
20390	#[simd_test(enable = "avx512fp16")]
20391	unsafe fn test_mm_mask3_fmadd_round_sh() {
20392	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20393	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20394	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20395	let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20396	a, b, c, `0`,
20397	);
20398	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20399	assert_eq_m128h(r, e);
20400	let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20401	a, b, c, `1`,
20402	);
20403	let e = _mm_setr_ph(`5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20404	assert_eq_m128h(r, e);
20405	}
20406
20407	#[simd_test(enable = "avx512fp16")]
20408	unsafe fn test_mm_maskz_fmadd_round_sh() {
20409	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20410	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20411	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20412	let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20413	`0`, a, b, c,
20414	);
20415	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20416	assert_eq_m128h(r, e);
20417	let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20418	`1`, a, b, c,
20419	);
20420	let e = _mm_setr_ph(`5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20421	assert_eq_m128h(r, e);
20422	}
20423
20424	#[simd_test(enable = "avx512fp16,avx512vl")]
20425	unsafe fn test_mm_fmsub_ph() {
20426	let a = _mm_set1_ph(`1.0`);
20427	let b = _mm_set1_ph(`2.0`);
20428	let c = _mm_set1_ph(`3.0`);
20429	let r = _mm_fmsub_ph(a, b, c);
20430	let e = _mm_set1_ph(`-1.0`);
20431	assert_eq_m128h(r, e);
20432	}
20433
20434	#[simd_test(enable = "avx512fp16,avx512vl")]
20435	unsafe fn test_mm_mask_fmsub_ph() {
20436	let a = _mm_set1_ph(`1.0`);
20437	let b = _mm_set1_ph(`2.0`);
20438	let c = _mm_set1_ph(`3.0`);
20439	let r = _mm_mask_fmsub_ph(a, `0b01010101`, b, c);
20440	let e = _mm_set_ph(`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`);
20441	assert_eq_m128h(r, e);
20442	}
20443
20444	#[simd_test(enable = "avx512fp16,avx512vl")]
20445	unsafe fn test_mm_mask3_fmsub_ph() {
20446	let a = _mm_set1_ph(`1.0`);
20447	let b = _mm_set1_ph(`2.0`);
20448	let c = _mm_set1_ph(`3.0`);
20449	let r = _mm_mask3_fmsub_ph(a, b, c, `0b01010101`);
20450	let e = _mm_set_ph(`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`);
20451	assert_eq_m128h(r, e);
20452	}
20453
20454	#[simd_test(enable = "avx512fp16,avx512vl")]
20455	unsafe fn test_mm_maskz_fmsub_ph() {
20456	let a = _mm_set1_ph(`1.0`);
20457	let b = _mm_set1_ph(`2.0`);
20458	let c = _mm_set1_ph(`3.0`);
20459	let r = _mm_maskz_fmsub_ph(`0b01010101`, a, b, c);
20460	let e = _mm_set_ph(`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`);
20461	assert_eq_m128h(r, e);
20462	}
20463
20464	#[simd_test(enable = "avx512fp16,avx512vl")]
20465	unsafe fn test_mm256_fmsub_ph() {
20466	let a = _mm256_set1_ph(`1.0`);
20467	let b = _mm256_set1_ph(`2.0`);
20468	let c = _mm256_set1_ph(`3.0`);
20469	let r = _mm256_fmsub_ph(a, b, c);
20470	let e = _mm256_set1_ph(`-1.0`);
20471	assert_eq_m256h(r, e);
20472	}
20473
20474	#[simd_test(enable = "avx512fp16,avx512vl")]
20475	unsafe fn test_mm256_mask_fmsub_ph() {
20476	let a = _mm256_set1_ph(`1.0`);
20477	let b = _mm256_set1_ph(`2.0`);
20478	let c = _mm256_set1_ph(`3.0`);
20479	let r = _mm256_mask_fmsub_ph(a, `0b0101010101010101`, b, c);
20480	let e = _mm256_set_ph(
20481	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20482	);
20483	assert_eq_m256h(r, e);
20484	}
20485
20486	#[simd_test(enable = "avx512fp16,avx512vl")]
20487	unsafe fn test_mm256_mask3_fmsub_ph() {
20488	let a = _mm256_set1_ph(`1.0`);
20489	let b = _mm256_set1_ph(`2.0`);
20490	let c = _mm256_set1_ph(`3.0`);
20491	let r = _mm256_mask3_fmsub_ph(a, b, c, `0b0101010101010101`);
20492	let e = _mm256_set_ph(
20493	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20494	);
20495	assert_eq_m256h(r, e);
20496	}
20497
20498	#[simd_test(enable = "avx512fp16,avx512vl")]
20499	unsafe fn test_mm256_maskz_fmsub_ph() {
20500	let a = _mm256_set1_ph(`1.0`);
20501	let b = _mm256_set1_ph(`2.0`);
20502	let c = _mm256_set1_ph(`3.0`);
20503	let r = _mm256_maskz_fmsub_ph(`0b0101010101010101`, a, b, c);
20504	let e = _mm256_set_ph(
20505	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20506	);
20507	assert_eq_m256h(r, e);
20508	}
20509
20510	#[simd_test(enable = "avx512fp16")]
20511	unsafe fn test_mm512_fmsub_ph() {
20512	let a = _mm512_set1_ph(`1.0`);
20513	let b = _mm512_set1_ph(`2.0`);
20514	let c = _mm512_set1_ph(`3.0`);
20515	let r = _mm512_fmsub_ph(a, b, c);
20516	let e = _mm512_set1_ph(`-1.0`);
20517	assert_eq_m512h(r, e);
20518	}
20519
20520	#[simd_test(enable = "avx512fp16")]
20521	unsafe fn test_mm512_mask_fmsub_ph() {
20522	let a = _mm512_set1_ph(`1.0`);
20523	let b = _mm512_set1_ph(`2.0`);
20524	let c = _mm512_set1_ph(`3.0`);
20525	let r = _mm512_mask_fmsub_ph(a, `0b01010101010101010101010101010101`, b, c);
20526	let e = _mm512_set_ph(
20527	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20528	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20529	);
20530	assert_eq_m512h(r, e);
20531	}
20532
20533	#[simd_test(enable = "avx512fp16")]
20534	unsafe fn test_mm512_mask3_fmsub_ph() {
20535	let a = _mm512_set1_ph(`1.0`);
20536	let b = _mm512_set1_ph(`2.0`);
20537	let c = _mm512_set1_ph(`3.0`);
20538	let r = _mm512_mask3_fmsub_ph(a, b, c, `0b01010101010101010101010101010101`);
20539	let e = _mm512_set_ph(
20540	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20541	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20542	);
20543	assert_eq_m512h(r, e);
20544	}
20545
20546	#[simd_test(enable = "avx512fp16")]
20547	unsafe fn test_mm512_maskz_fmsub_ph() {
20548	let a = _mm512_set1_ph(`1.0`);
20549	let b = _mm512_set1_ph(`2.0`);
20550	let c = _mm512_set1_ph(`3.0`);
20551	let r = _mm512_maskz_fmsub_ph(`0b01010101010101010101010101010101`, a, b, c);
20552	let e = _mm512_set_ph(
20553	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20554	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20555	);
20556	assert_eq_m512h(r, e);
20557	}
20558
20559	#[simd_test(enable = "avx512fp16")]
20560	unsafe fn test_mm512_fmsub_round_ph() {
20561	let a = _mm512_set1_ph(`1.0`);
20562	let b = _mm512_set1_ph(`2.0`);
20563	let c = _mm512_set1_ph(`3.0`);
20564	let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20565	let e = _mm512_set1_ph(`-1.0`);
20566	assert_eq_m512h(r, e);
20567	}
20568
20569	#[simd_test(enable = "avx512fp16")]
20570	unsafe fn test_mm512_mask_fmsub_round_ph() {
20571	let a = _mm512_set1_ph(`1.0`);
20572	let b = _mm512_set1_ph(`2.0`);
20573	let c = _mm512_set1_ph(`3.0`);
20574	let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20575	a,
20576	`0b01010101010101010101010101010101`,
20577	b,
20578	c,
20579	);
20580	let e = _mm512_set_ph(
20581	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20582	`1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`, `1.0`, `-1.0`,
20583	);
20584	assert_eq_m512h(r, e);
20585	}
20586
20587	#[simd_test(enable = "avx512fp16")]
20588	unsafe fn test_mm512_mask3_fmsub_round_ph() {
20589	let a = _mm512_set1_ph(`1.0`);
20590	let b = _mm512_set1_ph(`2.0`);
20591	let c = _mm512_set1_ph(`3.0`);
20592	let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20593	a,
20594	b,
20595	c,
20596	`0b01010101010101010101010101010101`,
20597	);
20598	let e = _mm512_set_ph(
20599	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20600	`3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`, `3.0`, `-1.0`,
20601	);
20602	assert_eq_m512h(r, e);
20603	}
20604
20605	#[simd_test(enable = "avx512fp16")]
20606	unsafe fn test_mm512_maskz_fmsub_round_ph() {
20607	let a = _mm512_set1_ph(`1.0`);
20608	let b = _mm512_set1_ph(`2.0`);
20609	let c = _mm512_set1_ph(`3.0`);
20610	let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20611	`0b01010101010101010101010101010101`,
20612	a,
20613	b,
20614	c,
20615	);
20616	let e = _mm512_set_ph(
20617	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20618	`0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`, `0.0`, `-1.0`,
20619	);
20620	assert_eq_m512h(r, e);
20621	}
20622
20623	#[simd_test(enable = "avx512fp16")]
20624	unsafe fn test_mm_fmsub_sh() {
20625	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20626	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20627	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20628	let r = _mm_fmsub_sh(a, b, c);
20629	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20630	assert_eq_m128h(r, e);
20631	}
20632
20633	#[simd_test(enable = "avx512fp16")]
20634	unsafe fn test_mm_mask_fmsub_sh() {
20635	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20636	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20637	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20638	let r = _mm_mask_fmsub_sh(a, `0`, b, c);
20639	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20640	assert_eq_m128h(r, e);
20641	let r = _mm_mask_fmsub_sh(a, `1`, b, c);
20642	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20643	assert_eq_m128h(r, e);
20644	}
20645
20646	#[simd_test(enable = "avx512fp16")]
20647	unsafe fn test_mm_mask3_fmsub_sh() {
20648	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20649	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20650	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20651	let r = _mm_mask3_fmsub_sh(a, b, c, `0`);
20652	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20653	assert_eq_m128h(r, e);
20654	let r = _mm_mask3_fmsub_sh(a, b, c, `1`);
20655	let e = _mm_setr_ph(`-1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20656	assert_eq_m128h(r, e);
20657	}
20658
20659	#[simd_test(enable = "avx512fp16")]
20660	unsafe fn test_mm_maskz_fmsub_sh() {
20661	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20662	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20663	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20664	let r = _mm_maskz_fmsub_sh(`0`, a, b, c);
20665	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20666	assert_eq_m128h(r, e);
20667	let r = _mm_maskz_fmsub_sh(`1`, a, b, c);
20668	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20669	assert_eq_m128h(r, e);
20670	}
20671
20672	#[simd_test(enable = "avx512fp16")]
20673	unsafe fn test_mm_fmsub_round_sh() {
20674	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20675	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20676	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20677	let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20678	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20679	assert_eq_m128h(r, e);
20680	}
20681
20682	#[simd_test(enable = "avx512fp16")]
20683	unsafe fn test_mm_mask_fmsub_round_sh() {
20684	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20685	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20686	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20687	let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20688	a, `0`, b, c,
20689	);
20690	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20691	assert_eq_m128h(r, e);
20692	let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20693	a, `1`, b, c,
20694	);
20695	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20696	assert_eq_m128h(r, e);
20697	}
20698
20699	#[simd_test(enable = "avx512fp16")]
20700	unsafe fn test_mm_mask3_fmsub_round_sh() {
20701	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20702	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20703	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20704	let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20705	a, b, c, `0`,
20706	);
20707	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20708	assert_eq_m128h(r, e);
20709	let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20710	a, b, c, `1`,
20711	);
20712	let e = _mm_setr_ph(`-1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20713	assert_eq_m128h(r, e);
20714	}
20715
20716	#[simd_test(enable = "avx512fp16")]
20717	unsafe fn test_mm_maskz_fmsub_round_sh() {
20718	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20719	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20720	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20721	let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20722	`0`, a, b, c,
20723	);
20724	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20725	assert_eq_m128h(r, e);
20726	let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20727	`1`, a, b, c,
20728	);
20729	let e = _mm_setr_ph(`-1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20730	assert_eq_m128h(r, e);
20731	}
20732
20733	#[simd_test(enable = "avx512fp16,avx512vl")]
20734	unsafe fn test_mm_fnmadd_ph() {
20735	let a = _mm_set1_ph(`1.0`);
20736	let b = _mm_set1_ph(`2.0`);
20737	let c = _mm_set1_ph(`3.0`);
20738	let r = _mm_fnmadd_ph(a, b, c);
20739	let e = _mm_set1_ph(`1.0`);
20740	assert_eq_m128h(r, e);
20741	}
20742
20743	#[simd_test(enable = "avx512fp16,avx512vl")]
20744	unsafe fn test_mm_mask_fnmadd_ph() {
20745	let a = _mm_set1_ph(`1.0`);
20746	let b = _mm_set1_ph(`2.0`);
20747	let c = _mm_set1_ph(`3.0`);
20748	let r = _mm_mask_fnmadd_ph(a, `0b01010101`, b, c);
20749	let e = _mm_set_ph(`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`);
20750	assert_eq_m128h(r, e);
20751	}
20752
20753	#[simd_test(enable = "avx512fp16,avx512vl")]
20754	unsafe fn test_mm_mask3_fnmadd_ph() {
20755	let a = _mm_set1_ph(`1.0`);
20756	let b = _mm_set1_ph(`2.0`);
20757	let c = _mm_set1_ph(`3.0`);
20758	let r = _mm_mask3_fnmadd_ph(a, b, c, `0b01010101`);
20759	let e = _mm_set_ph(`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`);
20760	assert_eq_m128h(r, e);
20761	}
20762
20763	#[simd_test(enable = "avx512fp16,avx512vl")]
20764	unsafe fn test_mm_maskz_fnmadd_ph() {
20765	let a = _mm_set1_ph(`1.0`);
20766	let b = _mm_set1_ph(`2.0`);
20767	let c = _mm_set1_ph(`3.0`);
20768	let r = _mm_maskz_fnmadd_ph(`0b01010101`, a, b, c);
20769	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
20770	assert_eq_m128h(r, e);
20771	}
20772
20773	#[simd_test(enable = "avx512fp16,avx512vl")]
20774	unsafe fn test_mm256_fnmadd_ph() {
20775	let a = _mm256_set1_ph(`1.0`);
20776	let b = _mm256_set1_ph(`2.0`);
20777	let c = _mm256_set1_ph(`3.0`);
20778	let r = _mm256_fnmadd_ph(a, b, c);
20779	let e = _mm256_set1_ph(`1.0`);
20780	assert_eq_m256h(r, e);
20781	}
20782
20783	#[simd_test(enable = "avx512fp16,avx512vl")]
20784	unsafe fn test_mm256_mask_fnmadd_ph() {
20785	let a = _mm256_set1_ph(`1.0`);
20786	let b = _mm256_set1_ph(`2.0`);
20787	let c = _mm256_set1_ph(`3.0`);
20788	let r = _mm256_mask_fnmadd_ph(a, `0b0101010101010101`, b, c);
20789	let e = _mm256_set_ph(
20790	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20791	);
20792	assert_eq_m256h(r, e);
20793	}
20794
20795	#[simd_test(enable = "avx512fp16,avx512vl")]
20796	unsafe fn test_mm256_mask3_fnmadd_ph() {
20797	let a = _mm256_set1_ph(`1.0`);
20798	let b = _mm256_set1_ph(`2.0`);
20799	let c = _mm256_set1_ph(`3.0`);
20800	let r = _mm256_mask3_fnmadd_ph(a, b, c, `0b0101010101010101`);
20801	let e = _mm256_set_ph(
20802	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20803	);
20804	assert_eq_m256h(r, e);
20805	}
20806
20807	#[simd_test(enable = "avx512fp16,avx512vl")]
20808	unsafe fn test_mm256_maskz_fnmadd_ph() {
20809	let a = _mm256_set1_ph(`1.0`);
20810	let b = _mm256_set1_ph(`2.0`);
20811	let c = _mm256_set1_ph(`3.0`);
20812	let r = _mm256_maskz_fnmadd_ph(`0b0101010101010101`, a, b, c);
20813	let e = _mm256_set_ph(
20814	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20815	);
20816	assert_eq_m256h(r, e);
20817	}
20818
20819	#[simd_test(enable = "avx512fp16")]
20820	unsafe fn test_mm512_fnmadd_ph() {
20821	let a = _mm512_set1_ph(`1.0`);
20822	let b = _mm512_set1_ph(`2.0`);
20823	let c = _mm512_set1_ph(`3.0`);
20824	let r = _mm512_fnmadd_ph(a, b, c);
20825	let e = _mm512_set1_ph(`1.0`);
20826	assert_eq_m512h(r, e);
20827	}
20828
20829	#[simd_test(enable = "avx512fp16")]
20830	unsafe fn test_mm512_mask_fnmadd_ph() {
20831	let a = _mm512_set1_ph(`1.0`);
20832	let b = _mm512_set1_ph(`2.0`);
20833	let c = _mm512_set1_ph(`3.0`);
20834	let r = _mm512_mask_fnmadd_ph(a, `0b01010101010101010101010101010101`, b, c);
20835	let e = _mm512_set_ph(
20836	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20837	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20838	);
20839	assert_eq_m512h(r, e);
20840	}
20841
20842	#[simd_test(enable = "avx512fp16")]
20843	unsafe fn test_mm512_mask3_fnmadd_ph() {
20844	let a = _mm512_set1_ph(`1.0`);
20845	let b = _mm512_set1_ph(`2.0`);
20846	let c = _mm512_set1_ph(`3.0`);
20847	let r = _mm512_mask3_fnmadd_ph(a, b, c, `0b01010101010101010101010101010101`);
20848	let e = _mm512_set_ph(
20849	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
20850	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20851	);
20852	assert_eq_m512h(r, e);
20853	}
20854
20855	#[simd_test(enable = "avx512fp16")]
20856	unsafe fn test_mm512_maskz_fnmadd_ph() {
20857	let a = _mm512_set1_ph(`1.0`);
20858	let b = _mm512_set1_ph(`2.0`);
20859	let c = _mm512_set1_ph(`3.0`);
20860	let r = _mm512_maskz_fnmadd_ph(`0b01010101010101010101010101010101`, a, b, c);
20861	let e = _mm512_set_ph(
20862	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
20863	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20864	);
20865	assert_eq_m512h(r, e);
20866	}
20867
20868	#[simd_test(enable = "avx512fp16")]
20869	unsafe fn test_mm512_fnmadd_round_ph() {
20870	let a = _mm512_set1_ph(`1.0`);
20871	let b = _mm512_set1_ph(`2.0`);
20872	let c = _mm512_set1_ph(`3.0`);
20873	let r =
20874	_mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20875	let e = _mm512_set1_ph(`1.0`);
20876	assert_eq_m512h(r, e);
20877	}
20878
20879	#[simd_test(enable = "avx512fp16")]
20880	unsafe fn test_mm512_mask_fnmadd_round_ph() {
20881	let a = _mm512_set1_ph(`1.0`);
20882	let b = _mm512_set1_ph(`2.0`);
20883	let c = _mm512_set1_ph(`3.0`);
20884	let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20885	a,
20886	`0b01010101010101010101010101010101`,
20887	b,
20888	c,
20889	);
20890	let e = _mm512_set_ph(
20891	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20892	`1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`, `1.0`,
20893	);
20894	assert_eq_m512h(r, e);
20895	}
20896
20897	#[simd_test(enable = "avx512fp16")]
20898	unsafe fn test_mm512_mask3_fnmadd_round_ph() {
20899	let a = _mm512_set1_ph(`1.0`);
20900	let b = _mm512_set1_ph(`2.0`);
20901	let c = _mm512_set1_ph(`3.0`);
20902	let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20903	a,
20904	b,
20905	c,
20906	`0b01010101010101010101010101010101`,
20907	);
20908	let e = _mm512_set_ph(
20909	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
20910	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
20911	);
20912	assert_eq_m512h(r, e);
20913	}
20914
20915	#[simd_test(enable = "avx512fp16")]
20916	unsafe fn test_mm512_maskz_fnmadd_round_ph() {
20917	let a = _mm512_set1_ph(`1.0`);
20918	let b = _mm512_set1_ph(`2.0`);
20919	let c = _mm512_set1_ph(`3.0`);
20920	let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20921	`0b01010101010101010101010101010101`,
20922	a,
20923	b,
20924	c,
20925	);
20926	let e = _mm512_set_ph(
20927	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
20928	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
20929	);
20930	assert_eq_m512h(r, e);
20931	}
20932
20933	#[simd_test(enable = "avx512fp16")]
20934	unsafe fn test_mm_fnmadd_sh() {
20935	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20936	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20937	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20938	let r = _mm_fnmadd_sh(a, b, c);
20939	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20940	assert_eq_m128h(r, e);
20941	}
20942
20943	#[simd_test(enable = "avx512fp16")]
20944	unsafe fn test_mm_mask_fnmadd_sh() {
20945	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20946	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20947	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20948	let r = _mm_mask_fnmadd_sh(a, `0`, b, c);
20949	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20950	assert_eq_m128h(r, e);
20951	let r = _mm_mask_fnmadd_sh(a, `1`, b, c);
20952	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20953	assert_eq_m128h(r, e);
20954	}
20955
20956	#[simd_test(enable = "avx512fp16")]
20957	unsafe fn test_mm_mask3_fnmadd_sh() {
20958	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20959	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20960	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20961	let r = _mm_mask3_fnmadd_sh(a, b, c, `0`);
20962	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20963	assert_eq_m128h(r, e);
20964	let r = _mm_mask3_fnmadd_sh(a, b, c, `1`);
20965	let e = _mm_setr_ph(`1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20966	assert_eq_m128h(r, e);
20967	}
20968
20969	#[simd_test(enable = "avx512fp16")]
20970	unsafe fn test_mm_maskz_fnmadd_sh() {
20971	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20972	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20973	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20974	let r = _mm_maskz_fnmadd_sh(`0`, a, b, c);
20975	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20976	assert_eq_m128h(r, e);
20977	let r = _mm_maskz_fnmadd_sh(`1`, a, b, c);
20978	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20979	assert_eq_m128h(r, e);
20980	}
20981
20982	#[simd_test(enable = "avx512fp16")]
20983	unsafe fn test_mm_fnmadd_round_sh() {
20984	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20985	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20986	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20987	let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
20988	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20989	assert_eq_m128h(r, e);
20990	}
20991
20992	#[simd_test(enable = "avx512fp16")]
20993	unsafe fn test_mm_mask_fnmadd_round_sh() {
20994	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
20995	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
20996	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
20997	let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
20998	a, `0`, b, c,
20999	);
21000	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21001	assert_eq_m128h(r, e);
21002	let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21003	a, `1`, b, c,
21004	);
21005	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21006	assert_eq_m128h(r, e);
21007	}
21008
21009	#[simd_test(enable = "avx512fp16")]
21010	unsafe fn test_mm_mask3_fnmadd_round_sh() {
21011	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21012	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21013	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21014	let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21015	a, b, c, `0`,
21016	);
21017	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21018	assert_eq_m128h(r, e);
21019	let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21020	a, b, c, `1`,
21021	);
21022	let e = _mm_setr_ph(`1.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21023	assert_eq_m128h(r, e);
21024	}
21025
21026	#[simd_test(enable = "avx512fp16")]
21027	unsafe fn test_mm_maskz_fnmadd_round_sh() {
21028	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21029	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21030	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21031	let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21032	`0`, a, b, c,
21033	);
21034	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21035	assert_eq_m128h(r, e);
21036	let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21037	`1`, a, b, c,
21038	);
21039	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21040	assert_eq_m128h(r, e);
21041	}
21042
21043	#[simd_test(enable = "avx512fp16,avx512vl")]
21044	unsafe fn test_mm_fnmsub_ph() {
21045	let a = _mm_set1_ph(`1.0`);
21046	let b = _mm_set1_ph(`2.0`);
21047	let c = _mm_set1_ph(`3.0`);
21048	let r = _mm_fnmsub_ph(a, b, c);
21049	let e = _mm_set1_ph(`-5.0`);
21050	assert_eq_m128h(r, e);
21051	}
21052
21053	#[simd_test(enable = "avx512fp16,avx512vl")]
21054	unsafe fn test_mm_mask_fnmsub_ph() {
21055	let a = _mm_set1_ph(`1.0`);
21056	let b = _mm_set1_ph(`2.0`);
21057	let c = _mm_set1_ph(`3.0`);
21058	let r = _mm_mask_fnmsub_ph(a, `0b01010101`, b, c);
21059	let e = _mm_set_ph(`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`);
21060	assert_eq_m128h(r, e);
21061	}
21062
21063	#[simd_test(enable = "avx512fp16,avx512vl")]
21064	unsafe fn test_mm_mask3_fnmsub_ph() {
21065	let a = _mm_set1_ph(`1.0`);
21066	let b = _mm_set1_ph(`2.0`);
21067	let c = _mm_set1_ph(`3.0`);
21068	let r = _mm_mask3_fnmsub_ph(a, b, c, `0b01010101`);
21069	let e = _mm_set_ph(`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`);
21070	assert_eq_m128h(r, e);
21071	}
21072
21073	#[simd_test(enable = "avx512fp16,avx512vl")]
21074	unsafe fn test_mm_maskz_fnmsub_ph() {
21075	let a = _mm_set1_ph(`1.0`);
21076	let b = _mm_set1_ph(`2.0`);
21077	let c = _mm_set1_ph(`3.0`);
21078	let r = _mm_maskz_fnmsub_ph(`0b01010101`, a, b, c);
21079	let e = _mm_set_ph(`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`);
21080	assert_eq_m128h(r, e);
21081	}
21082
21083	#[simd_test(enable = "avx512fp16,avx512vl")]
21084	unsafe fn test_mm256_fnmsub_ph() {
21085	let a = _mm256_set1_ph(`1.0`);
21086	let b = _mm256_set1_ph(`2.0`);
21087	let c = _mm256_set1_ph(`3.0`);
21088	let r = _mm256_fnmsub_ph(a, b, c);
21089	let e = _mm256_set1_ph(`-5.0`);
21090	assert_eq_m256h(r, e);
21091	}
21092
21093	#[simd_test(enable = "avx512fp16,avx512vl")]
21094	unsafe fn test_mm256_mask_fnmsub_ph() {
21095	let a = _mm256_set1_ph(`1.0`);
21096	let b = _mm256_set1_ph(`2.0`);
21097	let c = _mm256_set1_ph(`3.0`);
21098	let r = _mm256_mask_fnmsub_ph(a, `0b0101010101010101`, b, c);
21099	let e = _mm256_set_ph(
21100	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21101	);
21102	assert_eq_m256h(r, e);
21103	}
21104
21105	#[simd_test(enable = "avx512fp16,avx512vl")]
21106	unsafe fn test_mm256_mask3_fnmsub_ph() {
21107	let a = _mm256_set1_ph(`1.0`);
21108	let b = _mm256_set1_ph(`2.0`);
21109	let c = _mm256_set1_ph(`3.0`);
21110	let r = _mm256_mask3_fnmsub_ph(a, b, c, `0b0101010101010101`);
21111	let e = _mm256_set_ph(
21112	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21113	);
21114	assert_eq_m256h(r, e);
21115	}
21116
21117	#[simd_test(enable = "avx512fp16,avx512vl")]
21118	unsafe fn test_mm256_maskz_fnmsub_ph() {
21119	let a = _mm256_set1_ph(`1.0`);
21120	let b = _mm256_set1_ph(`2.0`);
21121	let c = _mm256_set1_ph(`3.0`);
21122	let r = _mm256_maskz_fnmsub_ph(`0b0101010101010101`, a, b, c);
21123	let e = _mm256_set_ph(
21124	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21125	);
21126	assert_eq_m256h(r, e);
21127	}
21128
21129	#[simd_test(enable = "avx512fp16")]
21130	unsafe fn test_mm512_fnmsub_ph() {
21131	let a = _mm512_set1_ph(`1.0`);
21132	let b = _mm512_set1_ph(`2.0`);
21133	let c = _mm512_set1_ph(`3.0`);
21134	let r = _mm512_fnmsub_ph(a, b, c);
21135	let e = _mm512_set1_ph(`-5.0`);
21136	assert_eq_m512h(r, e);
21137	}
21138
21139	#[simd_test(enable = "avx512fp16")]
21140	unsafe fn test_mm512_mask_fnmsub_ph() {
21141	let a = _mm512_set1_ph(`1.0`);
21142	let b = _mm512_set1_ph(`2.0`);
21143	let c = _mm512_set1_ph(`3.0`);
21144	let r = _mm512_mask_fnmsub_ph(a, `0b01010101010101010101010101010101`, b, c);
21145	let e = _mm512_set_ph(
21146	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21147	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21148	);
21149	assert_eq_m512h(r, e);
21150	}
21151
21152	#[simd_test(enable = "avx512fp16")]
21153	unsafe fn test_mm512_mask3_fnmsub_ph() {
21154	let a = _mm512_set1_ph(`1.0`);
21155	let b = _mm512_set1_ph(`2.0`);
21156	let c = _mm512_set1_ph(`3.0`);
21157	let r = _mm512_mask3_fnmsub_ph(a, b, c, `0b01010101010101010101010101010101`);
21158	let e = _mm512_set_ph(
21159	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21160	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21161	);
21162	assert_eq_m512h(r, e);
21163	}
21164
21165	#[simd_test(enable = "avx512fp16")]
21166	unsafe fn test_mm512_maskz_fnmsub_ph() {
21167	let a = _mm512_set1_ph(`1.0`);
21168	let b = _mm512_set1_ph(`2.0`);
21169	let c = _mm512_set1_ph(`3.0`);
21170	let r = _mm512_maskz_fnmsub_ph(`0b01010101010101010101010101010101`, a, b, c);
21171	let e = _mm512_set_ph(
21172	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21173	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21174	);
21175	assert_eq_m512h(r, e);
21176	}
21177
21178	#[simd_test(enable = "avx512fp16")]
21179	unsafe fn test_mm512_fnmsub_round_ph() {
21180	let a = _mm512_set1_ph(`1.0`);
21181	let b = _mm512_set1_ph(`2.0`);
21182	let c = _mm512_set1_ph(`3.0`);
21183	let r =
21184	_mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21185	let e = _mm512_set1_ph(`-5.0`);
21186	assert_eq_m512h(r, e);
21187	}
21188
21189	#[simd_test(enable = "avx512fp16")]
21190	unsafe fn test_mm512_mask_fnmsub_round_ph() {
21191	let a = _mm512_set1_ph(`1.0`);
21192	let b = _mm512_set1_ph(`2.0`);
21193	let c = _mm512_set1_ph(`3.0`);
21194	let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21195	a,
21196	`0b01010101010101010101010101010101`,
21197	b,
21198	c,
21199	);
21200	let e = _mm512_set_ph(
21201	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21202	`1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`, `1.0`, `-5.0`,
21203	);
21204	assert_eq_m512h(r, e);
21205	}
21206
21207	#[simd_test(enable = "avx512fp16")]
21208	unsafe fn test_mm512_mask3_fnmsub_round_ph() {
21209	let a = _mm512_set1_ph(`1.0`);
21210	let b = _mm512_set1_ph(`2.0`);
21211	let c = _mm512_set1_ph(`3.0`);
21212	let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21213	a,
21214	b,
21215	c,
21216	`0b01010101010101010101010101010101`,
21217	);
21218	let e = _mm512_set_ph(
21219	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21220	`3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`, `3.0`, `-5.0`,
21221	);
21222	assert_eq_m512h(r, e);
21223	}
21224
21225	#[simd_test(enable = "avx512fp16")]
21226	unsafe fn test_mm512_maskz_fnmsub_round_ph() {
21227	let a = _mm512_set1_ph(`1.0`);
21228	let b = _mm512_set1_ph(`2.0`);
21229	let c = _mm512_set1_ph(`3.0`);
21230	let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21231	`0b01010101010101010101010101010101`,
21232	a,
21233	b,
21234	c,
21235	);
21236	let e = _mm512_set_ph(
21237	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21238	`0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`, `0.0`, `-5.0`,
21239	);
21240	assert_eq_m512h(r, e);
21241	}
21242
21243	#[simd_test(enable = "avx512fp16")]
21244	unsafe fn test_mm_fnmsub_sh() {
21245	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21246	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21247	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21248	let r = _mm_fnmsub_sh(a, b, c);
21249	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21250	assert_eq_m128h(r, e);
21251	}
21252
21253	#[simd_test(enable = "avx512fp16")]
21254	unsafe fn test_mm_mask_fnmsub_sh() {
21255	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21256	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21257	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21258	let r = _mm_mask_fnmsub_sh(a, `0`, b, c);
21259	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21260	assert_eq_m128h(r, e);
21261	let r = _mm_mask_fnmsub_sh(a, `1`, b, c);
21262	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21263	assert_eq_m128h(r, e);
21264	}
21265
21266	#[simd_test(enable = "avx512fp16")]
21267	unsafe fn test_mm_mask3_fnmsub_sh() {
21268	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21269	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21270	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21271	let r = _mm_mask3_fnmsub_sh(a, b, c, `0`);
21272	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21273	assert_eq_m128h(r, e);
21274	let r = _mm_mask3_fnmsub_sh(a, b, c, `1`);
21275	let e = _mm_setr_ph(`-5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21276	assert_eq_m128h(r, e);
21277	}
21278
21279	#[simd_test(enable = "avx512fp16")]
21280	unsafe fn test_mm_maskz_fnmsub_sh() {
21281	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21282	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21283	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21284	let r = _mm_maskz_fnmsub_sh(`0`, a, b, c);
21285	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21286	assert_eq_m128h(r, e);
21287	let r = _mm_maskz_fnmsub_sh(`1`, a, b, c);
21288	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21289	assert_eq_m128h(r, e);
21290	}
21291
21292	#[simd_test(enable = "avx512fp16")]
21293	unsafe fn test_mm_fnmsub_round_sh() {
21294	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21295	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21296	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21297	let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21298	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21299	assert_eq_m128h(r, e);
21300	}
21301
21302	#[simd_test(enable = "avx512fp16")]
21303	unsafe fn test_mm_mask_fnmsub_round_sh() {
21304	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21305	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21306	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21307	let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21308	a, `0`, b, c,
21309	);
21310	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21311	assert_eq_m128h(r, e);
21312	let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21313	a, `1`, b, c,
21314	);
21315	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21316	assert_eq_m128h(r, e);
21317	}
21318
21319	#[simd_test(enable = "avx512fp16")]
21320	unsafe fn test_mm_mask3_fnmsub_round_sh() {
21321	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21322	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21323	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21324	let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21325	a, b, c, `0`,
21326	);
21327	let e = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21328	assert_eq_m128h(r, e);
21329	let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21330	a, b, c, `1`,
21331	);
21332	let e = _mm_setr_ph(`-5.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21333	assert_eq_m128h(r, e);
21334	}
21335
21336	#[simd_test(enable = "avx512fp16")]
21337	unsafe fn test_mm_maskz_fnmsub_round_sh() {
21338	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21339	let b = _mm_setr_ph(`2.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
21340	let c = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
21341	let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21342	`0`, a, b, c,
21343	);
21344	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21345	assert_eq_m128h(r, e);
21346	let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21347	`1`, a, b, c,
21348	);
21349	let e = _mm_setr_ph(`-5.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
21350	assert_eq_m128h(r, e);
21351	}
21352
21353	#[simd_test(enable = "avx512fp16,avx512vl")]
21354	unsafe fn test_mm_fmaddsub_ph() {
21355	let a = _mm_set1_ph(`1.0`);
21356	let b = _mm_set1_ph(`2.0`);
21357	let c = _mm_set1_ph(`3.0`);
21358	let r = _mm_fmaddsub_ph(a, b, c);
21359	let e = _mm_set_ph(`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`);
21360	assert_eq_m128h(r, e);
21361	}
21362
21363	#[simd_test(enable = "avx512fp16,avx512vl")]
21364	unsafe fn test_mm_mask_fmaddsub_ph() {
21365	let a = _mm_set1_ph(`1.0`);
21366	let b = _mm_set1_ph(`2.0`);
21367	let c = _mm_set1_ph(`3.0`);
21368	let r = _mm_mask_fmaddsub_ph(a, `0b00110011`, b, c);
21369	let e = _mm_set_ph(`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`);
21370	assert_eq_m128h(r, e);
21371	}
21372
21373	#[simd_test(enable = "avx512fp16,avx512vl")]
21374	unsafe fn test_mm_mask3_fmaddsub_ph() {
21375	let a = _mm_set1_ph(`1.0`);
21376	let b = _mm_set1_ph(`2.0`);
21377	let c = _mm_set1_ph(`3.0`);
21378	let r = _mm_mask3_fmaddsub_ph(a, b, c, `0b00110011`);
21379	let e = _mm_set_ph(`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`);
21380	assert_eq_m128h(r, e);
21381	}
21382
21383	#[simd_test(enable = "avx512fp16,avx512vl")]
21384	unsafe fn test_mm_maskz_fmaddsub_ph() {
21385	let a = _mm_set1_ph(`1.0`);
21386	let b = _mm_set1_ph(`2.0`);
21387	let c = _mm_set1_ph(`3.0`);
21388	let r = _mm_maskz_fmaddsub_ph(`0b00110011`, a, b, c);
21389	let e = _mm_set_ph(`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`);
21390	assert_eq_m128h(r, e);
21391	}
21392
21393	#[simd_test(enable = "avx512fp16,avx512vl")]
21394	unsafe fn test_mm256_fmaddsub_ph() {
21395	let a = _mm256_set1_ph(`1.0`);
21396	let b = _mm256_set1_ph(`2.0`);
21397	let c = _mm256_set1_ph(`3.0`);
21398	let r = _mm256_fmaddsub_ph(a, b, c);
21399	let e = _mm256_set_ph(
21400	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21401	);
21402	assert_eq_m256h(r, e);
21403	}
21404
21405	#[simd_test(enable = "avx512fp16,avx512vl")]
21406	unsafe fn test_mm256_mask_fmaddsub_ph() {
21407	let a = _mm256_set1_ph(`1.0`);
21408	let b = _mm256_set1_ph(`2.0`);
21409	let c = _mm256_set1_ph(`3.0`);
21410	let r = _mm256_mask_fmaddsub_ph(a, `0b0011001100110011`, b, c);
21411	let e = _mm256_set_ph(
21412	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21413	);
21414	assert_eq_m256h(r, e);
21415	}
21416
21417	#[simd_test(enable = "avx512fp16,avx512vl")]
21418	unsafe fn test_mm256_mask3_fmaddsub_ph() {
21419	let a = _mm256_set1_ph(`1.0`);
21420	let b = _mm256_set1_ph(`2.0`);
21421	let c = _mm256_set1_ph(`3.0`);
21422	let r = _mm256_mask3_fmaddsub_ph(a, b, c, `0b0011001100110011`);
21423	let e = _mm256_set_ph(
21424	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21425	);
21426	assert_eq_m256h(r, e);
21427	}
21428
21429	#[simd_test(enable = "avx512fp16,avx512vl")]
21430	unsafe fn test_mm256_maskz_fmaddsub_ph() {
21431	let a = _mm256_set1_ph(`1.0`);
21432	let b = _mm256_set1_ph(`2.0`);
21433	let c = _mm256_set1_ph(`3.0`);
21434	let r = _mm256_maskz_fmaddsub_ph(`0b0011001100110011`, a, b, c);
21435	let e = _mm256_set_ph(
21436	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21437	);
21438	assert_eq_m256h(r, e);
21439	}
21440
21441	#[simd_test(enable = "avx512fp16")]
21442	unsafe fn test_mm512_fmaddsub_ph() {
21443	let a = _mm512_set1_ph(`1.0`);
21444	let b = _mm512_set1_ph(`2.0`);
21445	let c = _mm512_set1_ph(`3.0`);
21446	let r = _mm512_fmaddsub_ph(a, b, c);
21447	let e = _mm512_set_ph(
21448	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21449	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21450	);
21451	assert_eq_m512h(r, e);
21452	}
21453
21454	#[simd_test(enable = "avx512fp16")]
21455	unsafe fn test_mm512_mask_fmaddsub_ph() {
21456	let a = _mm512_set1_ph(`1.0`);
21457	let b = _mm512_set1_ph(`2.0`);
21458	let c = _mm512_set1_ph(`3.0`);
21459	let r = _mm512_mask_fmaddsub_ph(a, `0b00110011001100110011001100110011`, b, c);
21460	let e = _mm512_set_ph(
21461	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21462	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21463	);
21464	assert_eq_m512h(r, e);
21465	}
21466
21467	#[simd_test(enable = "avx512fp16")]
21468	unsafe fn test_mm512_mask3_fmaddsub_ph() {
21469	let a = _mm512_set1_ph(`1.0`);
21470	let b = _mm512_set1_ph(`2.0`);
21471	let c = _mm512_set1_ph(`3.0`);
21472	let r = _mm512_mask3_fmaddsub_ph(a, b, c, `0b00110011001100110011001100110011`);
21473	let e = _mm512_set_ph(
21474	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21475	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21476	);
21477	assert_eq_m512h(r, e);
21478	}
21479
21480	#[simd_test(enable = "avx512fp16")]
21481	unsafe fn test_mm512_maskz_fmaddsub_ph() {
21482	let a = _mm512_set1_ph(`1.0`);
21483	let b = _mm512_set1_ph(`2.0`);
21484	let c = _mm512_set1_ph(`3.0`);
21485	let r = _mm512_maskz_fmaddsub_ph(`0b00110011001100110011001100110011`, a, b, c);
21486	let e = _mm512_set_ph(
21487	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21488	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21489	);
21490	assert_eq_m512h(r, e);
21491	}
21492
21493	#[simd_test(enable = "avx512fp16")]
21494	unsafe fn test_mm512_fmaddsub_round_ph() {
21495	let a = _mm512_set1_ph(`1.0`);
21496	let b = _mm512_set1_ph(`2.0`);
21497	let c = _mm512_set1_ph(`3.0`);
21498	let r =
21499	_mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21500	let e = _mm512_set_ph(
21501	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21502	`5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`,
21503	);
21504	assert_eq_m512h(r, e);
21505	}
21506
21507	#[simd_test(enable = "avx512fp16")]
21508	unsafe fn test_mm512_mask_fmaddsub_round_ph() {
21509	let a = _mm512_set1_ph(`1.0`);
21510	let b = _mm512_set1_ph(`2.0`);
21511	let c = _mm512_set1_ph(`3.0`);
21512	let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21513	a,
21514	`0b00110011001100110011001100110011`,
21515	b,
21516	c,
21517	);
21518	let e = _mm512_set_ph(
21519	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21520	`1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`, `1.0`, `1.0`, `5.0`, `-1.0`,
21521	);
21522	assert_eq_m512h(r, e);
21523	}
21524
21525	#[simd_test(enable = "avx512fp16")]
21526	unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
21527	let a = _mm512_set1_ph(`1.0`);
21528	let b = _mm512_set1_ph(`2.0`);
21529	let c = _mm512_set1_ph(`3.0`);
21530	let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21531	a,
21532	b,
21533	c,
21534	`0b00110011001100110011001100110011`,
21535	);
21536	let e = _mm512_set_ph(
21537	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21538	`3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`, `3.0`, `3.0`, `5.0`, `-1.0`,
21539	);
21540	assert_eq_m512h(r, e);
21541	}
21542
21543	#[simd_test(enable = "avx512fp16")]
21544	unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
21545	let a = _mm512_set1_ph(`1.0`);
21546	let b = _mm512_set1_ph(`2.0`);
21547	let c = _mm512_set1_ph(`3.0`);
21548	let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21549	`0b00110011001100110011001100110011`,
21550	a,
21551	b,
21552	c,
21553	);
21554	let e = _mm512_set_ph(
21555	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21556	`0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`, `0.0`, `0.0`, `5.0`, `-1.0`,
21557	);
21558	assert_eq_m512h(r, e);
21559	}
21560
21561	#[simd_test(enable = "avx512fp16,avx512vl")]
21562	unsafe fn test_mm_fmsubadd_ph() {
21563	let a = _mm_set1_ph(`1.0`);
21564	let b = _mm_set1_ph(`2.0`);
21565	let c = _mm_set1_ph(`3.0`);
21566	let r = _mm_fmsubadd_ph(a, b, c);
21567	let e = _mm_set_ph(`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`);
21568	assert_eq_m128h(r, e);
21569	}
21570
21571	#[simd_test(enable = "avx512fp16,avx512vl")]
21572	unsafe fn test_mm_mask_fmsubadd_ph() {
21573	let a = _mm_set1_ph(`1.0`);
21574	let b = _mm_set1_ph(`2.0`);
21575	let c = _mm_set1_ph(`3.0`);
21576	let r = _mm_mask_fmsubadd_ph(a, `0b00110011`, b, c);
21577	let e = _mm_set_ph(`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`);
21578	assert_eq_m128h(r, e);
21579	}
21580
21581	#[simd_test(enable = "avx512fp16,avx512vl")]
21582	unsafe fn test_mm_mask3_fmsubadd_ph() {
21583	let a = _mm_set1_ph(`1.0`);
21584	let b = _mm_set1_ph(`2.0`);
21585	let c = _mm_set1_ph(`3.0`);
21586	let r = _mm_mask3_fmsubadd_ph(a, b, c, `0b00110011`);
21587	let e = _mm_set_ph(`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`);
21588	assert_eq_m128h(r, e);
21589	}
21590
21591	#[simd_test(enable = "avx512fp16,avx512vl")]
21592	unsafe fn test_mm_maskz_fmsubadd_ph() {
21593	let a = _mm_set1_ph(`1.0`);
21594	let b = _mm_set1_ph(`2.0`);
21595	let c = _mm_set1_ph(`3.0`);
21596	let r = _mm_maskz_fmsubadd_ph(`0b00110011`, a, b, c);
21597	let e = _mm_set_ph(`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`);
21598	assert_eq_m128h(r, e);
21599	}
21600
21601	#[simd_test(enable = "avx512fp16,avx512vl")]
21602	unsafe fn test_mm256_fmsubadd_ph() {
21603	let a = _mm256_set1_ph(`1.0`);
21604	let b = _mm256_set1_ph(`2.0`);
21605	let c = _mm256_set1_ph(`3.0`);
21606	let r = _mm256_fmsubadd_ph(a, b, c);
21607	let e = _mm256_set_ph(
21608	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21609	);
21610	assert_eq_m256h(r, e);
21611	}
21612
21613	#[simd_test(enable = "avx512fp16,avx512vl")]
21614	unsafe fn test_mm256_mask_fmsubadd_ph() {
21615	let a = _mm256_set1_ph(`1.0`);
21616	let b = _mm256_set1_ph(`2.0`);
21617	let c = _mm256_set1_ph(`3.0`);
21618	let r = _mm256_mask_fmsubadd_ph(a, `0b0011001100110011`, b, c);
21619	let e = _mm256_set_ph(
21620	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21621	);
21622	assert_eq_m256h(r, e);
21623	}
21624
21625	#[simd_test(enable = "avx512fp16,avx512vl")]
21626	unsafe fn test_mm256_mask3_fmsubadd_ph() {
21627	let a = _mm256_set1_ph(`1.0`);
21628	let b = _mm256_set1_ph(`2.0`);
21629	let c = _mm256_set1_ph(`3.0`);
21630	let r = _mm256_mask3_fmsubadd_ph(a, b, c, `0b0011001100110011`);
21631	let e = _mm256_set_ph(
21632	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21633	);
21634	assert_eq_m256h(r, e);
21635	}
21636
21637	#[simd_test(enable = "avx512fp16,avx512vl")]
21638	unsafe fn test_mm256_maskz_fmsubadd_ph() {
21639	let a = _mm256_set1_ph(`1.0`);
21640	let b = _mm256_set1_ph(`2.0`);
21641	let c = _mm256_set1_ph(`3.0`);
21642	let r = _mm256_maskz_fmsubadd_ph(`0b0011001100110011`, a, b, c);
21643	let e = _mm256_set_ph(
21644	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21645	);
21646	assert_eq_m256h(r, e);
21647	}
21648
21649	#[simd_test(enable = "avx512fp16")]
21650	unsafe fn test_mm512_fmsubadd_ph() {
21651	let a = _mm512_set1_ph(`1.0`);
21652	let b = _mm512_set1_ph(`2.0`);
21653	let c = _mm512_set1_ph(`3.0`);
21654	let r = _mm512_fmsubadd_ph(a, b, c);
21655	let e = _mm512_set_ph(
21656	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21657	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21658	);
21659	assert_eq_m512h(r, e);
21660	}
21661
21662	#[simd_test(enable = "avx512fp16")]
21663	unsafe fn test_mm512_mask_fmsubadd_ph() {
21664	let a = _mm512_set1_ph(`1.0`);
21665	let b = _mm512_set1_ph(`2.0`);
21666	let c = _mm512_set1_ph(`3.0`);
21667	let r = _mm512_mask_fmsubadd_ph(a, `0b00110011001100110011001100110011`, b, c);
21668	let e = _mm512_set_ph(
21669	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21670	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21671	);
21672	assert_eq_m512h(r, e);
21673	}
21674
21675	#[simd_test(enable = "avx512fp16")]
21676	unsafe fn test_mm512_mask3_fmsubadd_ph() {
21677	let a = _mm512_set1_ph(`1.0`);
21678	let b = _mm512_set1_ph(`2.0`);
21679	let c = _mm512_set1_ph(`3.0`);
21680	let r = _mm512_mask3_fmsubadd_ph(a, b, c, `0b00110011001100110011001100110011`);
21681	let e = _mm512_set_ph(
21682	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21683	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21684	);
21685	assert_eq_m512h(r, e);
21686	}
21687
21688	#[simd_test(enable = "avx512fp16")]
21689	unsafe fn test_mm512_maskz_fmsubadd_ph() {
21690	let a = _mm512_set1_ph(`1.0`);
21691	let b = _mm512_set1_ph(`2.0`);
21692	let c = _mm512_set1_ph(`3.0`);
21693	let r = _mm512_maskz_fmsubadd_ph(`0b00110011001100110011001100110011`, a, b, c);
21694	let e = _mm512_set_ph(
21695	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21696	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21697	);
21698	assert_eq_m512h(r, e);
21699	}
21700
21701	#[simd_test(enable = "avx512fp16")]
21702	unsafe fn test_mm512_fmsubadd_round_ph() {
21703	let a = _mm512_set1_ph(`1.0`);
21704	let b = _mm512_set1_ph(`2.0`);
21705	let c = _mm512_set1_ph(`3.0`);
21706	let r =
21707	_mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
21708	let e = _mm512_set_ph(
21709	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21710	`-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`, `-1.0`, `5.0`,
21711	);
21712	assert_eq_m512h(r, e);
21713	}
21714
21715	#[simd_test(enable = "avx512fp16")]
21716	unsafe fn test_mm512_mask_fmsubadd_round_ph() {
21717	let a = _mm512_set1_ph(`1.0`);
21718	let b = _mm512_set1_ph(`2.0`);
21719	let c = _mm512_set1_ph(`3.0`);
21720	let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21721	a,
21722	`0b00110011001100110011001100110011`,
21723	b,
21724	c,
21725	);
21726	let e = _mm512_set_ph(
21727	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21728	`1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`, `1.0`, `1.0`, `-1.0`, `5.0`,
21729	);
21730	assert_eq_m512h(r, e);
21731	}
21732
21733	#[simd_test(enable = "avx512fp16")]
21734	unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
21735	let a = _mm512_set1_ph(`1.0`);
21736	let b = _mm512_set1_ph(`2.0`);
21737	let c = _mm512_set1_ph(`3.0`);
21738	let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21739	a,
21740	b,
21741	c,
21742	`0b00110011001100110011001100110011`,
21743	);
21744	let e = _mm512_set_ph(
21745	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21746	`3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`, `3.0`, `3.0`, `-1.0`, `5.0`,
21747	);
21748	assert_eq_m512h(r, e);
21749	}
21750
21751	#[simd_test(enable = "avx512fp16")]
21752	unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
21753	let a = _mm512_set1_ph(`1.0`);
21754	let b = _mm512_set1_ph(`2.0`);
21755	let c = _mm512_set1_ph(`3.0`);
21756	let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
21757	`0b00110011001100110011001100110011`,
21758	a,
21759	b,
21760	c,
21761	);
21762	let e = _mm512_set_ph(
21763	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21764	`0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`, `0.0`, `0.0`, `-1.0`, `5.0`,
21765	);
21766	assert_eq_m512h(r, e);
21767	}
21768
21769	#[simd_test(enable = "avx512fp16,avx512vl")]
21770	unsafe fn test_mm_rcp_ph() {
21771	let a = _mm_set1_ph(`2.0`);
21772	let r = _mm_rcp_ph(a);
21773	let e = _mm_set1_ph(`0.5`);
21774	assert_eq_m128h(r, e);
21775	}
21776
21777	#[simd_test(enable = "avx512fp16,avx512vl")]
21778	unsafe fn test_mm_mask_rcp_ph() {
21779	let a = _mm_set1_ph(`2.0`);
21780	let src = _mm_set1_ph(`1.0`);
21781	let r = _mm_mask_rcp_ph(src, `0b01010101`, a);
21782	let e = _mm_set_ph(`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`);
21783	assert_eq_m128h(r, e);
21784	}
21785
21786	#[simd_test(enable = "avx512fp16,avx512vl")]
21787	unsafe fn test_mm_maskz_rcp_ph() {
21788	let a = _mm_set1_ph(`2.0`);
21789	let r = _mm_maskz_rcp_ph(`0b01010101`, a);
21790	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
21791	assert_eq_m128h(r, e);
21792	}
21793
21794	#[simd_test(enable = "avx512fp16,avx512vl")]
21795	unsafe fn test_mm256_rcp_ph() {
21796	let a = _mm256_set1_ph(`2.0`);
21797	let r = _mm256_rcp_ph(a);
21798	let e = _mm256_set1_ph(`0.5`);
21799	assert_eq_m256h(r, e);
21800	}
21801
21802	#[simd_test(enable = "avx512fp16,avx512vl")]
21803	unsafe fn test_mm256_mask_rcp_ph() {
21804	let a = _mm256_set1_ph(`2.0`);
21805	let src = _mm256_set1_ph(`1.0`);
21806	let r = _mm256_mask_rcp_ph(src, `0b0101010101010101`, a);
21807	let e = _mm256_set_ph(
21808	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21809	);
21810	assert_eq_m256h(r, e);
21811	}
21812
21813	#[simd_test(enable = "avx512fp16,avx512vl")]
21814	unsafe fn test_mm256_maskz_rcp_ph() {
21815	let a = _mm256_set1_ph(`2.0`);
21816	let r = _mm256_maskz_rcp_ph(`0b0101010101010101`, a);
21817	let e = _mm256_set_ph(
21818	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21819	);
21820	assert_eq_m256h(r, e);
21821	}
21822
21823	#[simd_test(enable = "avx512fp16")]
21824	unsafe fn test_mm512_rcp_ph() {
21825	let a = _mm512_set1_ph(`2.0`);
21826	let r = _mm512_rcp_ph(a);
21827	let e = _mm512_set1_ph(`0.5`);
21828	assert_eq_m512h(r, e);
21829	}
21830
21831	#[simd_test(enable = "avx512fp16")]
21832	unsafe fn test_mm512_mask_rcp_ph() {
21833	let a = _mm512_set1_ph(`2.0`);
21834	let src = _mm512_set1_ph(`1.0`);
21835	let r = _mm512_mask_rcp_ph(src, `0b01010101010101010101010101010101`, a);
21836	let e = _mm512_set_ph(
21837	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`,
21838	`0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21839	);
21840	assert_eq_m512h(r, e);
21841	}
21842
21843	#[simd_test(enable = "avx512fp16")]
21844	unsafe fn test_mm512_maskz_rcp_ph() {
21845	let a = _mm512_set1_ph(`2.0`);
21846	let r = _mm512_maskz_rcp_ph(`0b01010101010101010101010101010101`, a);
21847	let e = _mm512_set_ph(
21848	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
21849	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21850	);
21851	assert_eq_m512h(r, e);
21852	}
21853
21854	#[simd_test(enable = "avx512fp16")]
21855	unsafe fn test_mm_rcp_sh() {
21856	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21857	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21858	let r = _mm_rcp_sh(a, b);
21859	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21860	assert_eq_m128h(r, e);
21861	}
21862
21863	#[simd_test(enable = "avx512fp16")]
21864	unsafe fn test_mm_mask_rcp_sh() {
21865	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21866	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21867	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
21868	let r = _mm_mask_rcp_sh(src, `0`, a, b);
21869	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21870	assert_eq_m128h(r, e);
21871	let r = _mm_mask_rcp_sh(src, `1`, a, b);
21872	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21873	assert_eq_m128h(r, e);
21874	}
21875
21876	#[simd_test(enable = "avx512fp16")]
21877	unsafe fn test_mm_maskz_rcp_sh() {
21878	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21879	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
21880	let r = _mm_maskz_rcp_sh(`0`, a, b);
21881	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21882	assert_eq_m128h(r, e);
21883	let r = _mm_maskz_rcp_sh(`1`, a, b);
21884	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21885	assert_eq_m128h(r, e);
21886	}
21887
21888	#[simd_test(enable = "avx512fp16,avx512vl")]
21889	unsafe fn test_mm_rsqrt_ph() {
21890	let a = _mm_set1_ph(`4.0`);
21891	let r = _mm_rsqrt_ph(a);
21892	let e = _mm_set1_ph(`0.5`);
21893	assert_eq_m128h(r, e);
21894	}
21895
21896	#[simd_test(enable = "avx512fp16,avx512vl")]
21897	unsafe fn test_mm_mask_rsqrt_ph() {
21898	let a = _mm_set1_ph(`4.0`);
21899	let src = _mm_set1_ph(`1.0`);
21900	let r = _mm_mask_rsqrt_ph(src, `0b01010101`, a);
21901	let e = _mm_set_ph(`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`);
21902	assert_eq_m128h(r, e);
21903	}
21904
21905	#[simd_test(enable = "avx512fp16,avx512vl")]
21906	unsafe fn test_mm_maskz_rsqrt_ph() {
21907	let a = _mm_set1_ph(`4.0`);
21908	let r = _mm_maskz_rsqrt_ph(`0b01010101`, a);
21909	let e = _mm_set_ph(`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`);
21910	assert_eq_m128h(r, e);
21911	}
21912
21913	#[simd_test(enable = "avx512fp16,avx512vl")]
21914	unsafe fn test_mm256_rsqrt_ph() {
21915	let a = _mm256_set1_ph(`4.0`);
21916	let r = _mm256_rsqrt_ph(a);
21917	let e = _mm256_set1_ph(`0.5`);
21918	assert_eq_m256h(r, e);
21919	}
21920
21921	#[simd_test(enable = "avx512fp16,avx512vl")]
21922	unsafe fn test_mm256_mask_rsqrt_ph() {
21923	let a = _mm256_set1_ph(`4.0`);
21924	let src = _mm256_set1_ph(`1.0`);
21925	let r = _mm256_mask_rsqrt_ph(src, `0b0101010101010101`, a);
21926	let e = _mm256_set_ph(
21927	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21928	);
21929	assert_eq_m256h(r, e);
21930	}
21931
21932	#[simd_test(enable = "avx512fp16,avx512vl")]
21933	unsafe fn test_mm256_maskz_rsqrt_ph() {
21934	let a = _mm256_set1_ph(`4.0`);
21935	let r = _mm256_maskz_rsqrt_ph(`0b0101010101010101`, a);
21936	let e = _mm256_set_ph(
21937	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21938	);
21939	assert_eq_m256h(r, e);
21940	}
21941
21942	#[simd_test(enable = "avx512fp16")]
21943	unsafe fn test_mm512_rsqrt_ph() {
21944	let a = _mm512_set1_ph(`4.0`);
21945	let r = _mm512_rsqrt_ph(a);
21946	let e = _mm512_set1_ph(`0.5`);
21947	assert_eq_m512h(r, e);
21948	}
21949
21950	#[simd_test(enable = "avx512fp16")]
21951	unsafe fn test_mm512_mask_rsqrt_ph() {
21952	let a = _mm512_set1_ph(`4.0`);
21953	let src = _mm512_set1_ph(`1.0`);
21954	let r = _mm512_mask_rsqrt_ph(src, `0b01010101010101010101010101010101`, a);
21955	let e = _mm512_set_ph(
21956	`1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`,
21957	`0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`, `1.0`, `0.5`,
21958	);
21959	assert_eq_m512h(r, e);
21960	}
21961
21962	#[simd_test(enable = "avx512fp16")]
21963	unsafe fn test_mm512_maskz_rsqrt_ph() {
21964	let a = _mm512_set1_ph(`4.0`);
21965	let r = _mm512_maskz_rsqrt_ph(`0b01010101010101010101010101010101`, a);
21966	let e = _mm512_set_ph(
21967	`0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`,
21968	`0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`, `0.0`, `0.5`,
21969	);
21970	assert_eq_m512h(r, e);
21971	}
21972
21973	#[simd_test(enable = "avx512fp16")]
21974	unsafe fn test_mm_rsqrt_sh() {
21975	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21976	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
21977	let r = _mm_rsqrt_sh(a, b);
21978	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21979	assert_eq_m128h(r, e);
21980	}
21981
21982	#[simd_test(enable = "avx512fp16")]
21983	unsafe fn test_mm_mask_rsqrt_sh() {
21984	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21985	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
21986	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
21987	let r = _mm_mask_rsqrt_sh(src, `0`, a, b);
21988	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21989	assert_eq_m128h(r, e);
21990	let r = _mm_mask_rsqrt_sh(src, `1`, a, b);
21991	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21992	assert_eq_m128h(r, e);
21993	}
21994
21995	#[simd_test(enable = "avx512fp16")]
21996	unsafe fn test_mm_maskz_rsqrt_sh() {
21997	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
21998	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
21999	let r = _mm_maskz_rsqrt_sh(`0`, a, b);
22000	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22001	assert_eq_m128h(r, e);
22002	let r = _mm_maskz_rsqrt_sh(`1`, a, b);
22003	let e = _mm_setr_ph(`0.5`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22004	assert_eq_m128h(r, e);
22005	}
22006
22007	#[simd_test(enable = "avx512fp16,avx512vl")]
22008	unsafe fn test_mm_sqrt_ph() {
22009	let a = _mm_set1_ph(`4.0`);
22010	let r = _mm_sqrt_ph(a);
22011	let e = _mm_set1_ph(`2.0`);
22012	assert_eq_m128h(r, e);
22013	}
22014
22015	#[simd_test(enable = "avx512fp16,avx512vl")]
22016	unsafe fn test_mm_mask_sqrt_ph() {
22017	let a = _mm_set1_ph(`4.0`);
22018	let src = _mm_set1_ph(`1.0`);
22019	let r = _mm_mask_sqrt_ph(src, `0b01010101`, a);
22020	let e = _mm_set_ph(`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`);
22021	assert_eq_m128h(r, e);
22022	}
22023
22024	#[simd_test(enable = "avx512fp16,avx512vl")]
22025	unsafe fn test_mm_maskz_sqrt_ph() {
22026	let a = _mm_set1_ph(`4.0`);
22027	let r = _mm_maskz_sqrt_ph(`0b01010101`, a);
22028	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`);
22029	assert_eq_m128h(r, e);
22030	}
22031
22032	#[simd_test(enable = "avx512fp16,avx512vl")]
22033	unsafe fn test_mm256_sqrt_ph() {
22034	let a = _mm256_set1_ph(`4.0`);
22035	let r = _mm256_sqrt_ph(a);
22036	let e = _mm256_set1_ph(`2.0`);
22037	assert_eq_m256h(r, e);
22038	}
22039
22040	#[simd_test(enable = "avx512fp16,avx512vl")]
22041	unsafe fn test_mm256_mask_sqrt_ph() {
22042	let a = _mm256_set1_ph(`4.0`);
22043	let src = _mm256_set1_ph(`1.0`);
22044	let r = _mm256_mask_sqrt_ph(src, `0b0101010101010101`, a);
22045	let e = _mm256_set_ph(
22046	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22047	);
22048	assert_eq_m256h(r, e);
22049	}
22050
22051	#[simd_test(enable = "avx512fp16,avx512vl")]
22052	unsafe fn test_mm256_maskz_sqrt_ph() {
22053	let a = _mm256_set1_ph(`4.0`);
22054	let r = _mm256_maskz_sqrt_ph(`0b0101010101010101`, a);
22055	let e = _mm256_set_ph(
22056	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22057	);
22058	assert_eq_m256h(r, e);
22059	}
22060
22061	#[simd_test(enable = "avx512fp16")]
22062	unsafe fn test_mm512_sqrt_ph() {
22063	let a = _mm512_set1_ph(`4.0`);
22064	let r = _mm512_sqrt_ph(a);
22065	let e = _mm512_set1_ph(`2.0`);
22066	assert_eq_m512h(r, e);
22067	}
22068
22069	#[simd_test(enable = "avx512fp16")]
22070	unsafe fn test_mm512_mask_sqrt_ph() {
22071	let a = _mm512_set1_ph(`4.0`);
22072	let src = _mm512_set1_ph(`1.0`);
22073	let r = _mm512_mask_sqrt_ph(src, `0b01010101010101010101010101010101`, a);
22074	let e = _mm512_set_ph(
22075	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
22076	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22077	);
22078	assert_eq_m512h(r, e);
22079	}
22080
22081	#[simd_test(enable = "avx512fp16")]
22082	unsafe fn test_mm512_maskz_sqrt_ph() {
22083	let a = _mm512_set1_ph(`4.0`);
22084	let r = _mm512_maskz_sqrt_ph(`0b01010101010101010101010101010101`, a);
22085	let e = _mm512_set_ph(
22086	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22087	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22088	);
22089	assert_eq_m512h(r, e);
22090	}
22091
22092	#[simd_test(enable = "avx512fp16")]
22093	unsafe fn test_mm512_sqrt_round_ph() {
22094	let a = _mm512_set1_ph(`4.0`);
22095	let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
22096	let e = _mm512_set1_ph(`2.0`);
22097	assert_eq_m512h(r, e);
22098	}
22099
22100	#[simd_test(enable = "avx512fp16")]
22101	unsafe fn test_mm512_mask_sqrt_round_ph() {
22102	let a = _mm512_set1_ph(`4.0`);
22103	let src = _mm512_set1_ph(`1.0`);
22104	let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22105	src,
22106	`0b01010101010101010101010101010101`,
22107	a,
22108	);
22109	let e = _mm512_set_ph(
22110	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
22111	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
22112	);
22113	assert_eq_m512h(r, e);
22114	}
22115
22116	#[simd_test(enable = "avx512fp16")]
22117	unsafe fn test_mm512_maskz_sqrt_round_ph() {
22118	let a = _mm512_set1_ph(`4.0`);
22119	let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22120	`0b01010101010101010101010101010101`,
22121	a,
22122	);
22123	let e = _mm512_set_ph(
22124	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22125	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22126	);
22127	assert_eq_m512h(r, e);
22128	}
22129
22130	#[simd_test(enable = "avx512fp16")]
22131	unsafe fn test_mm_sqrt_sh() {
22132	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22133	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22134	let r = _mm_sqrt_sh(a, b);
22135	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22136	assert_eq_m128h(r, e);
22137	}
22138
22139	#[simd_test(enable = "avx512fp16")]
22140	unsafe fn test_mm_mask_sqrt_sh() {
22141	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22142	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22143	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22144	let r = _mm_mask_sqrt_sh(src, `0`, a, b);
22145	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22146	assert_eq_m128h(r, e);
22147	let r = _mm_mask_sqrt_sh(src, `1`, a, b);
22148	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22149	assert_eq_m128h(r, e);
22150	}
22151
22152	#[simd_test(enable = "avx512fp16")]
22153	unsafe fn test_mm_maskz_sqrt_sh() {
22154	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22155	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22156	let r = _mm_maskz_sqrt_sh(`0`, a, b);
22157	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22158	assert_eq_m128h(r, e);
22159	let r = _mm_maskz_sqrt_sh(`1`, a, b);
22160	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22161	assert_eq_m128h(r, e);
22162	}
22163
22164	#[simd_test(enable = "avx512fp16")]
22165	unsafe fn test_mm_sqrt_round_sh() {
22166	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22167	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22168	let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22169	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22170	assert_eq_m128h(r, e);
22171	}
22172
22173	#[simd_test(enable = "avx512fp16")]
22174	unsafe fn test_mm_mask_sqrt_round_sh() {
22175	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22176	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22177	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22178	let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22179	src, `0`, a, b,
22180	);
22181	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22182	assert_eq_m128h(r, e);
22183	let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22184	src, `1`, a, b,
22185	);
22186	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22187	assert_eq_m128h(r, e);
22188	}
22189
22190	#[simd_test(enable = "avx512fp16")]
22191	unsafe fn test_mm_maskz_sqrt_round_sh() {
22192	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22193	let b = _mm_setr_ph(`4.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`);
22194	let r =
22195	_mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22196	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22197	assert_eq_m128h(r, e);
22198	let r =
22199	_mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22200	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22201	assert_eq_m128h(r, e);
22202	}
22203
22204	#[simd_test(enable = "avx512fp16,avx512vl")]
22205	unsafe fn test_mm_max_ph() {
22206	let a = _mm_set1_ph(`2.0`);
22207	let b = _mm_set1_ph(`1.0`);
22208	let r = _mm_max_ph(a, b);
22209	let e = _mm_set1_ph(`2.0`);
22210	assert_eq_m128h(r, e);
22211	}
22212
22213	#[simd_test(enable = "avx512fp16,avx512vl")]
22214	unsafe fn test_mm_mask_max_ph() {
22215	let a = _mm_set1_ph(`2.0`);
22216	let b = _mm_set1_ph(`1.0`);
22217	let src = _mm_set1_ph(`3.0`);
22218	let r = _mm_mask_max_ph(src, `0b01010101`, a, b);
22219	let e = _mm_set_ph(`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`);
22220	assert_eq_m128h(r, e);
22221	}
22222
22223	#[simd_test(enable = "avx512fp16,avx512vl")]
22224	unsafe fn test_mm_maskz_max_ph() {
22225	let a = _mm_set1_ph(`2.0`);
22226	let b = _mm_set1_ph(`1.0`);
22227	let r = _mm_maskz_max_ph(`0b01010101`, a, b);
22228	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`);
22229	assert_eq_m128h(r, e);
22230	}
22231
22232	#[simd_test(enable = "avx512fp16,avx512vl")]
22233	unsafe fn test_mm256_max_ph() {
22234	let a = _mm256_set1_ph(`2.0`);
22235	let b = _mm256_set1_ph(`1.0`);
22236	let r = _mm256_max_ph(a, b);
22237	let e = _mm256_set1_ph(`2.0`);
22238	assert_eq_m256h(r, e);
22239	}
22240
22241	#[simd_test(enable = "avx512fp16,avx512vl")]
22242	unsafe fn test_mm256_mask_max_ph() {
22243	let a = _mm256_set1_ph(`2.0`);
22244	let b = _mm256_set1_ph(`1.0`);
22245	let src = _mm256_set1_ph(`3.0`);
22246	let r = _mm256_mask_max_ph(src, `0b0101010101010101`, a, b);
22247	let e = _mm256_set_ph(
22248	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22249	);
22250	assert_eq_m256h(r, e);
22251	}
22252
22253	#[simd_test(enable = "avx512fp16,avx512vl")]
22254	unsafe fn test_mm256_maskz_max_ph() {
22255	let a = _mm256_set1_ph(`2.0`);
22256	let b = _mm256_set1_ph(`1.0`);
22257	let r = _mm256_maskz_max_ph(`0b0101010101010101`, a, b);
22258	let e = _mm256_set_ph(
22259	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22260	);
22261	assert_eq_m256h(r, e);
22262	}
22263
22264	#[simd_test(enable = "avx512fp16")]
22265	unsafe fn test_mm512_max_ph() {
22266	let a = _mm512_set1_ph(`2.0`);
22267	let b = _mm512_set1_ph(`1.0`);
22268	let r = _mm512_max_ph(a, b);
22269	let e = _mm512_set1_ph(`2.0`);
22270	assert_eq_m512h(r, e);
22271	}
22272
22273	#[simd_test(enable = "avx512fp16")]
22274	unsafe fn test_mm512_mask_max_ph() {
22275	let a = _mm512_set1_ph(`2.0`);
22276	let b = _mm512_set1_ph(`1.0`);
22277	let src = _mm512_set1_ph(`3.0`);
22278	let r = _mm512_mask_max_ph(src, `0b01010101010101010101010101010101`, a, b);
22279	let e = _mm512_set_ph(
22280	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`,
22281	`2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22282	);
22283	assert_eq_m512h(r, e);
22284	}
22285
22286	#[simd_test(enable = "avx512fp16")]
22287	unsafe fn test_mm512_maskz_max_ph() {
22288	let a = _mm512_set1_ph(`2.0`);
22289	let b = _mm512_set1_ph(`1.0`);
22290	let r = _mm512_maskz_max_ph(`0b01010101010101010101010101010101`, a, b);
22291	let e = _mm512_set_ph(
22292	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22293	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22294	);
22295	assert_eq_m512h(r, e);
22296	}
22297
22298	#[simd_test(enable = "avx512fp16")]
22299	unsafe fn test_mm512_max_round_ph() {
22300	let a = _mm512_set1_ph(`2.0`);
22301	let b = _mm512_set1_ph(`1.0`);
22302	let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22303	let e = _mm512_set1_ph(`2.0`);
22304	assert_eq_m512h(r, e);
22305	}
22306
22307	#[simd_test(enable = "avx512fp16")]
22308	unsafe fn test_mm512_mask_max_round_ph() {
22309	let a = _mm512_set1_ph(`2.0`);
22310	let b = _mm512_set1_ph(`1.0`);
22311	let src = _mm512_set1_ph(`3.0`);
22312	let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22313	src,
22314	`0b01010101010101010101010101010101`,
22315	a,
22316	b,
22317	);
22318	let e = _mm512_set_ph(
22319	`3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`,
22320	`2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`, `3.0`, `2.0`,
22321	);
22322	assert_eq_m512h(r, e);
22323	}
22324
22325	#[simd_test(enable = "avx512fp16")]
22326	unsafe fn test_mm512_maskz_max_round_ph() {
22327	let a = _mm512_set1_ph(`2.0`);
22328	let b = _mm512_set1_ph(`1.0`);
22329	let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22330	`0b01010101010101010101010101010101`,
22331	a,
22332	b,
22333	);
22334	let e = _mm512_set_ph(
22335	`0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`,
22336	`2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`, `0.0`, `2.0`,
22337	);
22338	assert_eq_m512h(r, e);
22339	}
22340
22341	#[simd_test(enable = "avx512fp16")]
22342	unsafe fn test_mm_max_sh() {
22343	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22344	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22345	let r = _mm_max_sh(a, b);
22346	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22347	assert_eq_m128h(r, e);
22348	}
22349
22350	#[simd_test(enable = "avx512fp16")]
22351	unsafe fn test_mm_mask_max_sh() {
22352	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22353	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22354	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22355	let r = _mm_mask_max_sh(src, `0`, a, b);
22356	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22357	assert_eq_m128h(r, e);
22358	let r = _mm_mask_max_sh(src, `1`, a, b);
22359	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22360	assert_eq_m128h(r, e);
22361	}
22362
22363	#[simd_test(enable = "avx512fp16")]
22364	unsafe fn test_mm_maskz_max_sh() {
22365	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22366	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22367	let r = _mm_maskz_max_sh(`0`, a, b);
22368	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22369	assert_eq_m128h(r, e);
22370	let r = _mm_maskz_max_sh(`1`, a, b);
22371	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22372	assert_eq_m128h(r, e);
22373	}
22374
22375	#[simd_test(enable = "avx512fp16")]
22376	unsafe fn test_mm_max_round_sh() {
22377	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22378	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22379	let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22380	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22381	assert_eq_m128h(r, e);
22382	}
22383
22384	#[simd_test(enable = "avx512fp16")]
22385	unsafe fn test_mm_mask_max_round_sh() {
22386	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22387	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22388	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22389	let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22390	src, `0`, a, b,
22391	);
22392	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22393	assert_eq_m128h(r, e);
22394	let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22395	src, `1`, a, b,
22396	);
22397	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22398	assert_eq_m128h(r, e);
22399	}
22400
22401	#[simd_test(enable = "avx512fp16")]
22402	unsafe fn test_mm_maskz_max_round_sh() {
22403	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22404	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22405	let r =
22406	_mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22407	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22408	assert_eq_m128h(r, e);
22409	let r =
22410	_mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22411	let e = _mm_setr_ph(`2.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22412	assert_eq_m128h(r, e);
22413	}
22414
22415	#[simd_test(enable = "avx512fp16,avx512vl")]
22416	unsafe fn test_mm_min_ph() {
22417	let a = _mm_set1_ph(`2.0`);
22418	let b = _mm_set1_ph(`1.0`);
22419	let r = _mm_min_ph(a, b);
22420	let e = _mm_set1_ph(`1.0`);
22421	assert_eq_m128h(r, e);
22422	}
22423
22424	#[simd_test(enable = "avx512fp16,avx512vl")]
22425	unsafe fn test_mm_mask_min_ph() {
22426	let a = _mm_set1_ph(`2.0`);
22427	let b = _mm_set1_ph(`1.0`);
22428	let src = _mm_set1_ph(`3.0`);
22429	let r = _mm_mask_min_ph(src, `0b01010101`, a, b);
22430	let e = _mm_set_ph(`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`);
22431	assert_eq_m128h(r, e);
22432	}
22433
22434	#[simd_test(enable = "avx512fp16,avx512vl")]
22435	unsafe fn test_mm_maskz_min_ph() {
22436	let a = _mm_set1_ph(`2.0`);
22437	let b = _mm_set1_ph(`1.0`);
22438	let r = _mm_maskz_min_ph(`0b01010101`, a, b);
22439	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
22440	assert_eq_m128h(r, e);
22441	}
22442
22443	#[simd_test(enable = "avx512fp16,avx512vl")]
22444	unsafe fn test_mm256_min_ph() {
22445	let a = _mm256_set1_ph(`2.0`);
22446	let b = _mm256_set1_ph(`1.0`);
22447	let r = _mm256_min_ph(a, b);
22448	let e = _mm256_set1_ph(`1.0`);
22449	assert_eq_m256h(r, e);
22450	}
22451
22452	#[simd_test(enable = "avx512fp16,avx512vl")]
22453	unsafe fn test_mm256_mask_min_ph() {
22454	let a = _mm256_set1_ph(`2.0`);
22455	let b = _mm256_set1_ph(`1.0`);
22456	let src = _mm256_set1_ph(`3.0`);
22457	let r = _mm256_mask_min_ph(src, `0b0101010101010101`, a, b);
22458	let e = _mm256_set_ph(
22459	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22460	);
22461	assert_eq_m256h(r, e);
22462	}
22463
22464	#[simd_test(enable = "avx512fp16,avx512vl")]
22465	unsafe fn test_mm256_maskz_min_ph() {
22466	let a = _mm256_set1_ph(`2.0`);
22467	let b = _mm256_set1_ph(`1.0`);
22468	let r = _mm256_maskz_min_ph(`0b0101010101010101`, a, b);
22469	let e = _mm256_set_ph(
22470	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22471	);
22472	assert_eq_m256h(r, e);
22473	}
22474
22475	#[simd_test(enable = "avx512fp16")]
22476	unsafe fn test_mm512_min_ph() {
22477	let a = _mm512_set1_ph(`2.0`);
22478	let b = _mm512_set1_ph(`1.0`);
22479	let r = _mm512_min_ph(a, b);
22480	let e = _mm512_set1_ph(`1.0`);
22481	assert_eq_m512h(r, e);
22482	}
22483
22484	#[simd_test(enable = "avx512fp16")]
22485	unsafe fn test_mm512_mask_min_ph() {
22486	let a = _mm512_set1_ph(`2.0`);
22487	let b = _mm512_set1_ph(`1.0`);
22488	let src = _mm512_set1_ph(`3.0`);
22489	let r = _mm512_mask_min_ph(src, `0b01010101010101010101010101010101`, a, b);
22490	let e = _mm512_set_ph(
22491	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
22492	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22493	);
22494	assert_eq_m512h(r, e);
22495	}
22496
22497	#[simd_test(enable = "avx512fp16")]
22498	unsafe fn test_mm512_maskz_min_ph() {
22499	let a = _mm512_set1_ph(`2.0`);
22500	let b = _mm512_set1_ph(`1.0`);
22501	let r = _mm512_maskz_min_ph(`0b01010101010101010101010101010101`, a, b);
22502	let e = _mm512_set_ph(
22503	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22504	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22505	);
22506	assert_eq_m512h(r, e);
22507	}
22508
22509	#[simd_test(enable = "avx512fp16")]
22510	unsafe fn test_mm512_min_round_ph() {
22511	let a = _mm512_set1_ph(`2.0`);
22512	let b = _mm512_set1_ph(`1.0`);
22513	let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22514	let e = _mm512_set1_ph(`1.0`);
22515	assert_eq_m512h(r, e);
22516	}
22517
22518	#[simd_test(enable = "avx512fp16")]
22519	unsafe fn test_mm512_mask_min_round_ph() {
22520	let a = _mm512_set1_ph(`2.0`);
22521	let b = _mm512_set1_ph(`1.0`);
22522	let src = _mm512_set1_ph(`3.0`);
22523	let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22524	src,
22525	`0b01010101010101010101010101010101`,
22526	a,
22527	b,
22528	);
22529	let e = _mm512_set_ph(
22530	`3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`,
22531	`1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`, `3.0`, `1.0`,
22532	);
22533	assert_eq_m512h(r, e);
22534	}
22535
22536	#[simd_test(enable = "avx512fp16")]
22537	unsafe fn test_mm512_maskz_min_round_ph() {
22538	let a = _mm512_set1_ph(`2.0`);
22539	let b = _mm512_set1_ph(`1.0`);
22540	let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22541	`0b01010101010101010101010101010101`,
22542	a,
22543	b,
22544	);
22545	let e = _mm512_set_ph(
22546	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22547	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22548	);
22549	assert_eq_m512h(r, e);
22550	}
22551
22552	#[simd_test(enable = "avx512fp16")]
22553	unsafe fn test_mm_min_sh() {
22554	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22555	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22556	let r = _mm_min_sh(a, b);
22557	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22558	assert_eq_m128h(r, e);
22559	}
22560
22561	#[simd_test(enable = "avx512fp16")]
22562	unsafe fn test_mm_mask_min_sh() {
22563	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22564	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22565	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22566	let r = _mm_mask_min_sh(src, `0`, a, b);
22567	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22568	assert_eq_m128h(r, e);
22569	let r = _mm_mask_min_sh(src, `1`, a, b);
22570	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22571	assert_eq_m128h(r, e);
22572	}
22573
22574	#[simd_test(enable = "avx512fp16")]
22575	unsafe fn test_mm_maskz_min_sh() {
22576	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22577	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22578	let r = _mm_maskz_min_sh(`0`, a, b);
22579	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22580	assert_eq_m128h(r, e);
22581	let r = _mm_maskz_min_sh(`1`, a, b);
22582	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22583	assert_eq_m128h(r, e);
22584	}
22585
22586	#[simd_test(enable = "avx512fp16")]
22587	unsafe fn test_mm_min_round_sh() {
22588	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22589	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22590	let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
22591	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22592	assert_eq_m128h(r, e);
22593	}
22594
22595	#[simd_test(enable = "avx512fp16")]
22596	unsafe fn test_mm_mask_min_round_sh() {
22597	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22598	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22599	let src = _mm_setr_ph(`3.0`, `30.0`, `31.0`, `32.0`, `33.0`, `34.0`, `35.0`, `36.0`);
22600	let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22601	src, `0`, a, b,
22602	);
22603	let e = _mm_setr_ph(`3.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22604	assert_eq_m128h(r, e);
22605	let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
22606	src, `1`, a, b,
22607	);
22608	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22609	assert_eq_m128h(r, e);
22610	}
22611
22612	#[simd_test(enable = "avx512fp16")]
22613	unsafe fn test_mm_maskz_min_round_sh() {
22614	let a = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22615	let b = _mm_setr_ph(`2.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`);
22616	let r =
22617	_mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
22618	let e = _mm_setr_ph(`0.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22619	assert_eq_m128h(r, e);
22620	let r =
22621	_mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
22622	let e = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
22623	assert_eq_m128h(r, e);
22624	}
22625
22626	#[simd_test(enable = "avx512fp16,avx512vl")]
22627	unsafe fn test_mm_getexp_ph() {
22628	let a = _mm_set1_ph(`3.0`);
22629	let r = _mm_getexp_ph(a);
22630	let e = _mm_set1_ph(`1.0`);
22631	assert_eq_m128h(r, e);
22632	}
22633
22634	#[simd_test(enable = "avx512fp16,avx512vl")]
22635	unsafe fn test_mm_mask_getexp_ph() {
22636	let a = _mm_set1_ph(`3.0`);
22637	let src = _mm_set1_ph(`4.0`);
22638	let r = _mm_mask_getexp_ph(src, `0b01010101`, a);
22639	let e = _mm_set_ph(`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`);
22640	assert_eq_m128h(r, e);
22641	}
22642
22643	#[simd_test(enable = "avx512fp16,avx512vl")]
22644	unsafe fn test_mm_maskz_getexp_ph() {
22645	let a = _mm_set1_ph(`3.0`);
22646	let r = _mm_maskz_getexp_ph(`0b01010101`, a);
22647	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
22648	assert_eq_m128h(r, e);
22649	}
22650
22651	#[simd_test(enable = "avx512fp16,avx512vl")]
22652	unsafe fn test_mm256_getexp_ph() {
22653	let a = _mm256_set1_ph(`3.0`);
22654	let r = _mm256_getexp_ph(a);
22655	let e = _mm256_set1_ph(`1.0`);
22656	assert_eq_m256h(r, e);
22657	}
22658
22659	#[simd_test(enable = "avx512fp16,avx512vl")]
22660	unsafe fn test_mm256_mask_getexp_ph() {
22661	let a = _mm256_set1_ph(`3.0`);
22662	let src = _mm256_set1_ph(`4.0`);
22663	let r = _mm256_mask_getexp_ph(src, `0b0101010101010101`, a);
22664	let e = _mm256_set_ph(
22665	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22666	);
22667	assert_eq_m256h(r, e);
22668	}
22669
22670	#[simd_test(enable = "avx512fp16,avx512vl")]
22671	unsafe fn test_mm256_maskz_getexp_ph() {
22672	let a = _mm256_set1_ph(`3.0`);
22673	let r = _mm256_maskz_getexp_ph(`0b0101010101010101`, a);
22674	let e = _mm256_set_ph(
22675	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22676	);
22677	assert_eq_m256h(r, e);
22678	}
22679
22680	#[simd_test(enable = "avx512fp16")]
22681	unsafe fn test_mm512_getexp_ph() {
22682	let a = _mm512_set1_ph(`3.0`);
22683	let r = _mm512_getexp_ph(a);
22684	let e = _mm512_set1_ph(`1.0`);
22685	assert_eq_m512h(r, e);
22686	}
22687
22688	#[simd_test(enable = "avx512fp16")]
22689	unsafe fn test_mm512_mask_getexp_ph() {
22690	let a = _mm512_set1_ph(`3.0`);
22691	let src = _mm512_set1_ph(`4.0`);
22692	let r = _mm512_mask_getexp_ph(src, `0b01010101010101010101010101010101`, a);
22693	let e = _mm512_set_ph(
22694	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`,
22695	`1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22696	);
22697	assert_eq_m512h(r, e);
22698	}
22699
22700	#[simd_test(enable = "avx512fp16")]
22701	unsafe fn test_mm512_maskz_getexp_ph() {
22702	let a = _mm512_set1_ph(`3.0`);
22703	let r = _mm512_maskz_getexp_ph(`0b01010101010101010101010101010101`, a);
22704	let e = _mm512_set_ph(
22705	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22706	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22707	);
22708	assert_eq_m512h(r, e);
22709	}
22710
22711	#[simd_test(enable = "avx512fp16")]
22712	unsafe fn test_mm512_getexp_round_ph() {
22713	let a = _mm512_set1_ph(`3.0`);
22714	let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
22715	let e = _mm512_set1_ph(`1.0`);
22716	assert_eq_m512h(r, e);
22717	}
22718
22719	#[simd_test(enable = "avx512fp16")]
22720	unsafe fn test_mm512_mask_getexp_round_ph() {
22721	let a = _mm512_set1_ph(`3.0`);
22722	let src = _mm512_set1_ph(`4.0`);
22723	let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22724	src,
22725	`0b01010101010101010101010101010101`,
22726	a,
22727	);
22728	let e = _mm512_set_ph(
22729	`4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`,
22730	`1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`, `4.0`, `1.0`,
22731	);
22732	assert_eq_m512h(r, e);
22733	}
22734
22735	#[simd_test(enable = "avx512fp16")]
22736	unsafe fn test_mm512_maskz_getexp_round_ph() {
22737	let a = _mm512_set1_ph(`3.0`);
22738	let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22739	`0b01010101010101010101010101010101`,
22740	a,
22741	);
22742	let e = _mm512_set_ph(
22743	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
22744	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
22745	);
22746	assert_eq_m512h(r, e);
22747	}
22748
22749	#[simd_test(enable = "avx512fp16")]
22750	unsafe fn test_mm_getexp_sh() {
22751	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22752	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22753	let r = _mm_getexp_sh(a, b);
22754	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22755	assert_eq_m128h(r, e);
22756	}
22757
22758	#[simd_test(enable = "avx512fp16")]
22759	unsafe fn test_mm_mask_getexp_sh() {
22760	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22761	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22762	let src = _mm_setr_ph(`4.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22763	let r = _mm_mask_getexp_sh(src, `0`, a, b);
22764	let e = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22765	assert_eq_m128h(r, e);
22766	let r = _mm_mask_getexp_sh(src, `1`, a, b);
22767	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22768	assert_eq_m128h(r, e);
22769	}
22770
22771	#[simd_test(enable = "avx512fp16")]
22772	unsafe fn test_mm_maskz_getexp_sh() {
22773	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22774	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22775	let r = _mm_maskz_getexp_sh(`0`, a, b);
22776	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22777	assert_eq_m128h(r, e);
22778	let r = _mm_maskz_getexp_sh(`1`, a, b);
22779	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22780	assert_eq_m128h(r, e);
22781	}
22782
22783	#[simd_test(enable = "avx512fp16")]
22784	unsafe fn test_mm_getexp_round_sh() {
22785	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22786	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22787	let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
22788	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22789	assert_eq_m128h(r, e);
22790	}
22791
22792	#[simd_test(enable = "avx512fp16")]
22793	unsafe fn test_mm_mask_getexp_round_sh() {
22794	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22795	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22796	let src = _mm_setr_ph(`4.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22797	let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
22798	let e = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22799	assert_eq_m128h(r, e);
22800	let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
22801	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22802	assert_eq_m128h(r, e);
22803	}
22804
22805	#[simd_test(enable = "avx512fp16")]
22806	unsafe fn test_mm_maskz_getexp_round_sh() {
22807	let a = _mm_setr_ph(`4.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22808	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22809	let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(`0`, a, b);
22810	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22811	assert_eq_m128h(r, e);
22812	let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(`1`, a, b);
22813	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22814	assert_eq_m128h(r, e);
22815	}
22816
22817	#[simd_test(enable = "avx512fp16,avx512vl")]
22818	unsafe fn test_mm_getmant_ph() {
22819	let a = _mm_set1_ph(`10.0`);
22820	let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22821	let e = _mm_set1_ph(`1.25`);
22822	assert_eq_m128h(r, e);
22823	}
22824
22825	#[simd_test(enable = "avx512fp16,avx512vl")]
22826	unsafe fn test_mm_mask_getmant_ph() {
22827	let a = _mm_set1_ph(`10.0`);
22828	let src = _mm_set1_ph(`20.0`);
22829	let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `0b01010101`, a);
22830	let e = _mm_set_ph(`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`);
22831	assert_eq_m128h(r, e);
22832	}
22833
22834	#[simd_test(enable = "avx512fp16,avx512vl")]
22835	unsafe fn test_mm_maskz_getmant_ph() {
22836	let a = _mm_set1_ph(`10.0`);
22837	let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`0b01010101`, a);
22838	let e = _mm_set_ph(`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`);
22839	assert_eq_m128h(r, e);
22840	}
22841
22842	#[simd_test(enable = "avx512fp16,avx512vl")]
22843	unsafe fn test_mm256_getmant_ph() {
22844	let a = _mm256_set1_ph(`10.0`);
22845	let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22846	let e = _mm256_set1_ph(`1.25`);
22847	assert_eq_m256h(r, e);
22848	}
22849
22850	#[simd_test(enable = "avx512fp16,avx512vl")]
22851	unsafe fn test_mm256_mask_getmant_ph() {
22852	let a = _mm256_set1_ph(`10.0`);
22853	let src = _mm256_set1_ph(`20.0`);
22854	let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22855	src,
22856	`0b0101010101010101`,
22857	a,
22858	);
22859	let e = _mm256_set_ph(
22860	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22861	`20.0`, `1.25`,
22862	);
22863	assert_eq_m256h(r, e);
22864	}
22865
22866	#[simd_test(enable = "avx512fp16,avx512vl")]
22867	unsafe fn test_mm256_maskz_getmant_ph() {
22868	let a = _mm256_set1_ph(`10.0`);
22869	let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22870	`0b0101010101010101`,
22871	a,
22872	);
22873	let e = _mm256_set_ph(
22874	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22875	);
22876	assert_eq_m256h(r, e);
22877	}
22878
22879	#[simd_test(enable = "avx512fp16")]
22880	unsafe fn test_mm512_getmant_ph() {
22881	let a = _mm512_set1_ph(`10.0`);
22882	let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22883	let e = _mm512_set1_ph(`1.25`);
22884	assert_eq_m512h(r, e);
22885	}
22886
22887	#[simd_test(enable = "avx512fp16")]
22888	unsafe fn test_mm512_mask_getmant_ph() {
22889	let a = _mm512_set1_ph(`10.0`);
22890	let src = _mm512_set1_ph(`20.0`);
22891	let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22892	src,
22893	`0b01010101010101010101010101010101`,
22894	a,
22895	);
22896	let e = _mm512_set_ph(
22897	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22898	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22899	`20.0`, `1.25`, `20.0`, `1.25`,
22900	);
22901	assert_eq_m512h(r, e);
22902	}
22903
22904	#[simd_test(enable = "avx512fp16")]
22905	unsafe fn test_mm512_maskz_getmant_ph() {
22906	let a = _mm512_set1_ph(`10.0`);
22907	let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22908	`0b01010101010101010101010101010101`,
22909	a,
22910	);
22911	let e = _mm512_set_ph(
22912	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22913	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22914	);
22915	assert_eq_m512h(r, e);
22916	}
22917
22918	#[simd_test(enable = "avx512fp16")]
22919	unsafe fn test_mm512_getmant_round_ph() {
22920	let a = _mm512_set1_ph(`10.0`);
22921	let r =
22922	_mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
22923	a,
22924	);
22925	let e = _mm512_set1_ph(`1.25`);
22926	assert_eq_m512h(r, e);
22927	}
22928
22929	#[simd_test(enable = "avx512fp16")]
22930	unsafe fn test_mm512_mask_getmant_round_ph() {
22931	let a = _mm512_set1_ph(`10.0`);
22932	let src = _mm512_set1_ph(`20.0`);
22933	let r = _mm512_mask_getmant_round_ph::<
22934	_MM_MANT_NORM_P75_1P5,
22935	_MM_MANT_SIGN_NAN,
22936	_MM_FROUND_NO_EXC,
22937	>(src, `0b01010101010101010101010101010101`, a);
22938	let e = _mm512_set_ph(
22939	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22940	`20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`, `20.0`, `1.25`,
22941	`20.0`, `1.25`, `20.0`, `1.25`,
22942	);
22943	assert_eq_m512h(r, e);
22944	}
22945
22946	#[simd_test(enable = "avx512fp16")]
22947	unsafe fn test_mm512_maskz_getmant_round_ph() {
22948	let a = _mm512_set1_ph(`10.0`);
22949	let r = _mm512_maskz_getmant_round_ph::<
22950	_MM_MANT_NORM_P75_1P5,
22951	_MM_MANT_SIGN_NAN,
22952	_MM_FROUND_NO_EXC,
22953	>(`0b01010101010101010101010101010101`, a);
22954	let e = _mm512_set_ph(
22955	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22956	`0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`, `0.0`, `1.25`,
22957	);
22958	assert_eq_m512h(r, e);
22959	}
22960
22961	#[simd_test(enable = "avx512fp16")]
22962	unsafe fn test_mm_getmant_sh() {
22963	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22964	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22965	let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
22966	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22967	assert_eq_m128h(r, e);
22968	}
22969
22970	#[simd_test(enable = "avx512fp16")]
22971	unsafe fn test_mm_mask_getmant_sh() {
22972	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22973	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22974	let src = _mm_setr_ph(`20.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
22975	let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `0`, a, b);
22976	let e = _mm_setr_ph(`20.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22977	assert_eq_m128h(r, e);
22978	let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, `1`, a, b);
22979	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22980	assert_eq_m128h(r, e);
22981	}
22982
22983	#[simd_test(enable = "avx512fp16")]
22984	unsafe fn test_mm_maskz_getmant_sh() {
22985	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22986	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22987	let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`0`, a, b);
22988	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22989	assert_eq_m128h(r, e);
22990	let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(`1`, a, b);
22991	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22992	assert_eq_m128h(r, e);
22993	}
22994
22995	#[simd_test(enable = "avx512fp16")]
22996	unsafe fn test_mm_getmant_round_sh() {
22997	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
22998	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
22999	let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23000	a, b,
23001	);
23002	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23003	assert_eq_m128h(r, e);
23004	}
23005
23006	#[simd_test(enable = "avx512fp16")]
23007	unsafe fn test_mm_mask_getmant_round_sh() {
23008	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23009	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23010	let src = _mm_setr_ph(`20.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23011	let r = _mm_mask_getmant_round_sh::<
23012	_MM_MANT_NORM_P75_1P5,
23013	_MM_MANT_SIGN_NAN,
23014	_MM_FROUND_NO_EXC,
23015	>(src, `0`, a, b);
23016	let e = _mm_setr_ph(`20.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23017	assert_eq_m128h(r, e);
23018	let r = _mm_mask_getmant_round_sh::<
23019	_MM_MANT_NORM_P75_1P5,
23020	_MM_MANT_SIGN_NAN,
23021	_MM_FROUND_NO_EXC,
23022	>(src, `1`, a, b);
23023	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23024	assert_eq_m128h(r, e);
23025	}
23026
23027	#[simd_test(enable = "avx512fp16")]
23028	unsafe fn test_mm_maskz_getmant_round_sh() {
23029	let a = _mm_setr_ph(`15.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23030	let b = _mm_setr_ph(`10.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23031	let r = _mm_maskz_getmant_round_sh::<
23032	_MM_MANT_NORM_P75_1P5,
23033	_MM_MANT_SIGN_NAN,
23034	_MM_FROUND_NO_EXC,
23035	>(`0`, a, b);
23036	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23037	assert_eq_m128h(r, e);
23038	let r = _mm_maskz_getmant_round_sh::<
23039	_MM_MANT_NORM_P75_1P5,
23040	_MM_MANT_SIGN_NAN,
23041	_MM_FROUND_NO_EXC,
23042	>(`1`, a, b);
23043	let e = _mm_setr_ph(`1.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23044	assert_eq_m128h(r, e);
23045	}
23046
23047	#[simd_test(enable = "avx512fp16,avx512vl")]
23048	unsafe fn test_mm_roundscale_ph() {
23049	let a = _mm_set1_ph(`1.1`);
23050	let r = _mm_roundscale_ph::<`0`>(a);
23051	let e = _mm_set1_ph(`1.0`);
23052	assert_eq_m128h(r, e);
23053	}
23054
23055	#[simd_test(enable = "avx512fp16,avx512vl")]
23056	unsafe fn test_mm_mask_roundscale_ph() {
23057	let a = _mm_set1_ph(`1.1`);
23058	let src = _mm_set1_ph(`2.0`);
23059	let r = _mm_mask_roundscale_ph::<`0`>(src, `0b01010101`, a);
23060	let e = _mm_set_ph(`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`);
23061	assert_eq_m128h(r, e);
23062	}
23063
23064	#[simd_test(enable = "avx512fp16,avx512vl")]
23065	unsafe fn test_mm_maskz_roundscale_ph() {
23066	let a = _mm_set1_ph(`1.1`);
23067	let r = _mm_maskz_roundscale_ph::<`0`>(`0b01010101`, a);
23068	let e = _mm_set_ph(`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`);
23069	assert_eq_m128h(r, e);
23070	}
23071
23072	#[simd_test(enable = "avx512fp16,avx512vl")]
23073	unsafe fn test_mm256_roundscale_ph() {
23074	let a = _mm256_set1_ph(`1.1`);
23075	let r = _mm256_roundscale_ph::<`0`>(a);
23076	let e = _mm256_set1_ph(`1.0`);
23077	assert_eq_m256h(r, e);
23078	}
23079
23080	#[simd_test(enable = "avx512fp16,avx512vl")]
23081	unsafe fn test_mm256_mask_roundscale_ph() {
23082	let a = _mm256_set1_ph(`1.1`);
23083	let src = _mm256_set1_ph(`2.0`);
23084	let r = _mm256_mask_roundscale_ph::<`0`>(src, `0b0101010101010101`, a);
23085	let e = _mm256_set_ph(
23086	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23087	);
23088	assert_eq_m256h(r, e);
23089	}
23090
23091	#[simd_test(enable = "avx512fp16,avx512vl")]
23092	unsafe fn test_mm256_maskz_roundscale_ph() {
23093	let a = _mm256_set1_ph(`1.1`);
23094	let r = _mm256_maskz_roundscale_ph::<`0`>(`0b0101010101010101`, a);
23095	let e = _mm256_set_ph(
23096	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23097	);
23098	assert_eq_m256h(r, e);
23099	}
23100
23101	#[simd_test(enable = "avx512fp16")]
23102	unsafe fn test_mm512_roundscale_ph() {
23103	let a = _mm512_set1_ph(`1.1`);
23104	let r = _mm512_roundscale_ph::<`0`>(a);
23105	let e = _mm512_set1_ph(`1.0`);
23106	assert_eq_m512h(r, e);
23107	}
23108
23109	#[simd_test(enable = "avx512fp16")]
23110	unsafe fn test_mm512_mask_roundscale_ph() {
23111	let a = _mm512_set1_ph(`1.1`);
23112	let src = _mm512_set1_ph(`2.0`);
23113	let r = _mm512_mask_roundscale_ph::<`0`>(src, `0b01010101010101010101010101010101`, a);
23114	let e = _mm512_set_ph(
23115	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
23116	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23117	);
23118	assert_eq_m512h(r, e);
23119	}
23120
23121	#[simd_test(enable = "avx512fp16")]
23122	unsafe fn test_mm512_maskz_roundscale_ph() {
23123	let a = _mm512_set1_ph(`1.1`);
23124	let r = _mm512_maskz_roundscale_ph::<`0`>(`0b01010101010101010101010101010101`, a);
23125	let e = _mm512_set_ph(
23126	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
23127	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23128	);
23129	assert_eq_m512h(r, e);
23130	}
23131
23132	#[simd_test(enable = "avx512fp16")]
23133	unsafe fn test_mm512_roundscale_round_ph() {
23134	let a = _mm512_set1_ph(`1.1`);
23135	let r = _mm512_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(a);
23136	let e = _mm512_set1_ph(`1.0`);
23137	assert_eq_m512h(r, e);
23138	}
23139
23140	#[simd_test(enable = "avx512fp16")]
23141	unsafe fn test_mm512_mask_roundscale_round_ph() {
23142	let a = _mm512_set1_ph(`1.1`);
23143	let src = _mm512_set1_ph(`2.0`);
23144	let r = _mm512_mask_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(
23145	src,
23146	`0b01010101010101010101010101010101`,
23147	a,
23148	);
23149	let e = _mm512_set_ph(
23150	`2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`,
23151	`1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`, `2.0`, `1.0`,
23152	);
23153	assert_eq_m512h(r, e);
23154	}
23155
23156	#[simd_test(enable = "avx512fp16")]
23157	unsafe fn test_mm512_maskz_roundscale_round_ph() {
23158	let a = _mm512_set1_ph(`1.1`);
23159	let r = _mm512_maskz_roundscale_round_ph::<`0`, _MM_FROUND_NO_EXC>(
23160	`0b01010101010101010101010101010101`,
23161	a,
23162	);
23163	let e = _mm512_set_ph(
23164	`0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`,
23165	`1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`, `0.0`, `1.0`,
23166	);
23167	assert_eq_m512h(r, e);
23168	}
23169
23170	#[simd_test(enable = "avx512fp16")]
23171	unsafe fn test_mm_roundscale_sh() {
23172	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23173	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23174	let r = _mm_roundscale_sh::<`0`>(a, b);
23175	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23176	assert_eq_m128h(r, e);
23177	}
23178
23179	#[simd_test(enable = "avx512fp16")]
23180	unsafe fn test_mm_mask_roundscale_sh() {
23181	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23182	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23183	let src = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23184	let r = _mm_mask_roundscale_sh::<`0`>(src, `0`, a, b);
23185	let e = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23186	assert_eq_m128h(r, e);
23187	let r = _mm_mask_roundscale_sh::<`0`>(src, `1`, a, b);
23188	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23189	assert_eq_m128h(r, e);
23190	}
23191
23192	#[simd_test(enable = "avx512fp16")]
23193	unsafe fn test_mm_maskz_roundscale_sh() {
23194	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23195	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23196	let r = _mm_maskz_roundscale_sh::<`0`>(`0`, a, b);
23197	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23198	assert_eq_m128h(r, e);
23199	let r = _mm_maskz_roundscale_sh::<`0`>(`1`, a, b);
23200	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23201	assert_eq_m128h(r, e);
23202	}
23203
23204	#[simd_test(enable = "avx512fp16")]
23205	unsafe fn test_mm_roundscale_round_sh() {
23206	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23207	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23208	let r = _mm_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(a, b);
23209	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23210	assert_eq_m128h(r, e);
23211	}
23212
23213	#[simd_test(enable = "avx512fp16")]
23214	unsafe fn test_mm_mask_roundscale_round_sh() {
23215	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23216	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23217	let src = _mm_setr_ph(`3.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23218	let r = _mm_mask_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(src, `0`, a, b);
23219	let e = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23220	assert_eq_m128h(r, e);
23221	let r = _mm_mask_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(src, `1`, a, b);
23222	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23223	assert_eq_m128h(r, e);
23224	}
23225
23226	#[simd_test(enable = "avx512fp16")]
23227	unsafe fn test_mm_maskz_roundscale_round_sh() {
23228	let a = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23229	let b = _mm_setr_ph(`1.1`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23230	let r = _mm_maskz_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(`0`, a, b);
23231	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23232	assert_eq_m128h(r, e);
23233	let r = _mm_maskz_roundscale_round_sh::<`0`, _MM_FROUND_NO_EXC>(`1`, a, b);
23234	let e = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23235	assert_eq_m128h(r, e);
23236	}
23237
23238	#[simd_test(enable = "avx512fp16,avx512vl")]
23239	unsafe fn test_mm_scalef_ph() {
23240	let a = _mm_set1_ph(`1.`);
23241	let b = _mm_set1_ph(`3.`);
23242	let r = _mm_scalef_ph(a, b);
23243	let e = _mm_set1_ph(`8.0`);
23244	assert_eq_m128h(r, e);
23245	}
23246
23247	#[simd_test(enable = "avx512fp16,avx512vl")]
23248	unsafe fn test_mm_mask_scalef_ph() {
23249	let a = _mm_set1_ph(`1.`);
23250	let b = _mm_set1_ph(`3.`);
23251	let src = _mm_set1_ph(`2.`);
23252	let r = _mm_mask_scalef_ph(src, `0b01010101`, a, b);
23253	let e = _mm_set_ph(`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`);
23254	assert_eq_m128h(r, e);
23255	}
23256
23257	#[simd_test(enable = "avx512fp16,avx512vl")]
23258	unsafe fn test_mm_maskz_scalef_ph() {
23259	let a = _mm_set1_ph(`1.`);
23260	let b = _mm_set1_ph(`3.`);
23261	let r = _mm_maskz_scalef_ph(`0b01010101`, a, b);
23262	let e = _mm_set_ph(`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`);
23263	assert_eq_m128h(r, e);
23264	}
23265
23266	#[simd_test(enable = "avx512fp16,avx512vl")]
23267	unsafe fn test_mm256_scalef_ph() {
23268	let a = _mm256_set1_ph(`1.`);
23269	let b = _mm256_set1_ph(`3.`);
23270	let r = _mm256_scalef_ph(a, b);
23271	let e = _mm256_set1_ph(`8.0`);
23272	assert_eq_m256h(r, e);
23273	}
23274
23275	#[simd_test(enable = "avx512fp16,avx512vl")]
23276	unsafe fn test_mm256_mask_scalef_ph() {
23277	let a = _mm256_set1_ph(`1.`);
23278	let b = _mm256_set1_ph(`3.`);
23279	let src = _mm256_set1_ph(`2.`);
23280	let r = _mm256_mask_scalef_ph(src, `0b0101010101010101`, a, b);
23281	let e = _mm256_set_ph(
23282	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23283	);
23284	assert_eq_m256h(r, e);
23285	}
23286
23287	#[simd_test(enable = "avx512fp16,avx512vl")]
23288	unsafe fn test_mm256_maskz_scalef_ph() {
23289	let a = _mm256_set1_ph(`1.`);
23290	let b = _mm256_set1_ph(`3.`);
23291	let r = _mm256_maskz_scalef_ph(`0b0101010101010101`, a, b);
23292	let e = _mm256_set_ph(
23293	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23294	);
23295	assert_eq_m256h(r, e);
23296	}
23297
23298	#[simd_test(enable = "avx512fp16")]
23299	unsafe fn test_mm512_scalef_ph() {
23300	let a = _mm512_set1_ph(`1.`);
23301	let b = _mm512_set1_ph(`3.`);
23302	let r = _mm512_scalef_ph(a, b);
23303	let e = _mm512_set1_ph(`8.0`);
23304	assert_eq_m512h(r, e);
23305	}
23306
23307	#[simd_test(enable = "avx512fp16")]
23308	unsafe fn test_mm512_mask_scalef_ph() {
23309	let a = _mm512_set1_ph(`1.`);
23310	let b = _mm512_set1_ph(`3.`);
23311	let src = _mm512_set1_ph(`2.`);
23312	let r = _mm512_mask_scalef_ph(src, `0b01010101010101010101010101010101`, a, b);
23313	let e = _mm512_set_ph(
23314	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`,
23315	`8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23316	);
23317	assert_eq_m512h(r, e);
23318	}
23319
23320	#[simd_test(enable = "avx512fp16")]
23321	unsafe fn test_mm512_maskz_scalef_ph() {
23322	let a = _mm512_set1_ph(`1.`);
23323	let b = _mm512_set1_ph(`3.`);
23324	let r = _mm512_maskz_scalef_ph(`0b01010101010101010101010101010101`, a, b);
23325	let e = _mm512_set_ph(
23326	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`,
23327	`8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23328	);
23329	assert_eq_m512h(r, e);
23330	}
23331
23332	#[simd_test(enable = "avx512fp16")]
23333	unsafe fn test_mm512_scalef_round_ph() {
23334	let a = _mm512_set1_ph(`1.`);
23335	let b = _mm512_set1_ph(`3.`);
23336	let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
23337	let e = _mm512_set1_ph(`8.0`);
23338	assert_eq_m512h(r, e);
23339	}
23340
23341	#[simd_test(enable = "avx512fp16")]
23342	unsafe fn test_mm512_mask_scalef_round_ph() {
23343	let a = _mm512_set1_ph(`1.`);
23344	let b = _mm512_set1_ph(`3.`);
23345	let src = _mm512_set1_ph(`2.`);
23346	let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23347	src,
23348	`0b01010101010101010101010101010101`,
23349	a,
23350	b,
23351	);
23352	let e = _mm512_set_ph(
23353	`2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`,
23354	`8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`, `2.0`, `8.0`,
23355	);
23356	assert_eq_m512h(r, e);
23357	}
23358
23359	#[simd_test(enable = "avx512fp16")]
23360	unsafe fn test_mm512_maskz_scalef_round_ph() {
23361	let a = _mm512_set1_ph(`1.`);
23362	let b = _mm512_set1_ph(`3.`);
23363	let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23364	`0b01010101010101010101010101010101`,
23365	a,
23366	b,
23367	);
23368	let e = _mm512_set_ph(
23369	`0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`,
23370	`8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`, `0.0`, `8.0`,
23371	);
23372	assert_eq_m512h(r, e);
23373	}
23374
23375	#[simd_test(enable = "avx512fp16")]
23376	unsafe fn test_mm_scalef_sh() {
23377	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23378	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23379	let r = _mm_scalef_sh(a, b);
23380	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23381	assert_eq_m128h(r, e);
23382	}
23383
23384	#[simd_test(enable = "avx512fp16")]
23385	unsafe fn test_mm_mask_scalef_sh() {
23386	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23387	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23388	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23389	let r = _mm_mask_scalef_sh(src, `0`, a, b);
23390	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23391	assert_eq_m128h(r, e);
23392	let r = _mm_mask_scalef_sh(src, `1`, a, b);
23393	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23394	assert_eq_m128h(r, e);
23395	}
23396
23397	#[simd_test(enable = "avx512fp16")]
23398	unsafe fn test_mm_maskz_scalef_sh() {
23399	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23400	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23401	let r = _mm_maskz_scalef_sh(`0`, a, b);
23402	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23403	assert_eq_m128h(r, e);
23404	let r = _mm_maskz_scalef_sh(`1`, a, b);
23405	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23406	assert_eq_m128h(r, e);
23407	}
23408
23409	#[simd_test(enable = "avx512fp16")]
23410	unsafe fn test_mm_scalef_round_sh() {
23411	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23412	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23413	let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
23414	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23415	assert_eq_m128h(r, e);
23416	}
23417
23418	#[simd_test(enable = "avx512fp16")]
23419	unsafe fn test_mm_mask_scalef_round_sh() {
23420	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23421	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23422	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23423	let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23424	src, `0`, a, b,
23425	);
23426	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23427	assert_eq_m128h(r, e);
23428	let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
23429	src, `1`, a, b,
23430	);
23431	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23432	assert_eq_m128h(r, e);
23433	}
23434
23435	#[simd_test(enable = "avx512fp16")]
23436	unsafe fn test_mm_maskz_scalef_round_sh() {
23437	let a = _mm_setr_ph(`1.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23438	let b = _mm_setr_ph(`3.0`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23439	let r =
23440	_mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
23441	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23442	assert_eq_m128h(r, e);
23443	let r =
23444	_mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
23445	let e = _mm_setr_ph(`8.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23446	assert_eq_m128h(r, e);
23447	}
23448
23449	#[simd_test(enable = "avx512fp16,avx512vl")]
23450	unsafe fn test_mm_reduce_ph() {
23451	let a = _mm_set1_ph(`1.25`);
23452	let r = _mm_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23453	let e = _mm_set1_ph(`0.25`);
23454	assert_eq_m128h(r, e);
23455	}
23456
23457	#[simd_test(enable = "avx512fp16,avx512vl")]
23458	unsafe fn test_mm_mask_reduce_ph() {
23459	let a = _mm_set1_ph(`1.25`);
23460	let src = _mm_set1_ph(`2.0`);
23461	let r = _mm_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b01010101`, a);
23462	let e = _mm_set_ph(`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`);
23463	assert_eq_m128h(r, e);
23464	}
23465
23466	#[simd_test(enable = "avx512fp16,avx512vl")]
23467	unsafe fn test_mm_maskz_reduce_ph() {
23468	let a = _mm_set1_ph(`1.25`);
23469	let r = _mm_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b01010101`, a);
23470	let e = _mm_set_ph(`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`);
23471	assert_eq_m128h(r, e);
23472	}
23473
23474	#[simd_test(enable = "avx512fp16,avx512vl")]
23475	unsafe fn test_mm256_reduce_ph() {
23476	let a = _mm256_set1_ph(`1.25`);
23477	let r = _mm256_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23478	let e = _mm256_set1_ph(`0.25`);
23479	assert_eq_m256h(r, e);
23480	}
23481
23482	#[simd_test(enable = "avx512fp16,avx512vl")]
23483	unsafe fn test_mm256_mask_reduce_ph() {
23484	let a = _mm256_set1_ph(`1.25`);
23485	let src = _mm256_set1_ph(`2.0`);
23486	let r = _mm256_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0b0101010101010101`, a);
23487	let e = _mm256_set_ph(
23488	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23489	);
23490	assert_eq_m256h(r, e);
23491	}
23492
23493	#[simd_test(enable = "avx512fp16,avx512vl")]
23494	unsafe fn test_mm256_maskz_reduce_ph() {
23495	let a = _mm256_set1_ph(`1.25`);
23496	let r = _mm256_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0b0101010101010101`, a);
23497	let e = _mm256_set_ph(
23498	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23499	);
23500	assert_eq_m256h(r, e);
23501	}
23502
23503	#[simd_test(enable = "avx512fp16")]
23504	unsafe fn test_mm512_reduce_ph() {
23505	let a = _mm512_set1_ph(`1.25`);
23506	let r = _mm512_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(a);
23507	let e = _mm512_set1_ph(`0.25`);
23508	assert_eq_m512h(r, e);
23509	}
23510
23511	#[simd_test(enable = "avx512fp16")]
23512	unsafe fn test_mm512_mask_reduce_ph() {
23513	let a = _mm512_set1_ph(`1.25`);
23514	let src = _mm512_set1_ph(`2.0`);
23515	let r = _mm512_mask_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(
23516	src,
23517	`0b01010101010101010101010101010101`,
23518	a,
23519	);
23520	let e = _mm512_set_ph(
23521	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23522	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23523	);
23524	assert_eq_m512h(r, e);
23525	}
23526
23527	#[simd_test(enable = "avx512fp16")]
23528	unsafe fn test_mm512_maskz_reduce_ph() {
23529	let a = _mm512_set1_ph(`1.25`);
23530	let r = _mm512_maskz_reduce_ph::<{ `16` \| _MM_FROUND_TO_ZERO }>(
23531	`0b01010101010101010101010101010101`,
23532	a,
23533	);
23534	let e = _mm512_set_ph(
23535	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23536	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23537	);
23538	assert_eq_m512h(r, e);
23539	}
23540
23541	#[simd_test(enable = "avx512fp16")]
23542	unsafe fn test_mm512_reduce_round_ph() {
23543	let a = _mm512_set1_ph(`1.25`);
23544	let r = _mm512_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23545	let e = _mm512_set1_ph(`0.25`);
23546	assert_eq_m512h(r, e);
23547	}
23548
23549	#[simd_test(enable = "avx512fp16")]
23550	unsafe fn test_mm512_mask_reduce_round_ph() {
23551	let a = _mm512_set1_ph(`1.25`);
23552	let src = _mm512_set1_ph(`2.0`);
23553	let r = _mm512_mask_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23554	src,
23555	`0b01010101010101010101010101010101`,
23556	a,
23557	);
23558	let e = _mm512_set_ph(
23559	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23560	`2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`, `2.0`, `0.25`,
23561	);
23562	assert_eq_m512h(r, e);
23563	}
23564
23565	#[simd_test(enable = "avx512fp16")]
23566	unsafe fn test_mm512_maskz_reduce_round_ph() {
23567	let a = _mm512_set1_ph(`1.25`);
23568	let r = _mm512_maskz_reduce_round_ph::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23569	`0b01010101010101010101010101010101`,
23570	a,
23571	);
23572	let e = _mm512_set_ph(
23573	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23574	`0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`, `0.0`, `0.25`,
23575	);
23576	assert_eq_m512h(r, e);
23577	}
23578
23579	#[simd_test(enable = "avx512fp16")]
23580	unsafe fn test_mm_reduce_sh() {
23581	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23582	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23583	let r = _mm_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(a, b);
23584	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23585	assert_eq_m128h(r, e);
23586	}
23587
23588	#[simd_test(enable = "avx512fp16")]
23589	unsafe fn test_mm_mask_reduce_sh() {
23590	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23591	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23592	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23593	let r = _mm_mask_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `0`, a, b);
23594	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23595	assert_eq_m128h(r, e);
23596	let r = _mm_mask_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(src, `1`, a, b);
23597	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23598	assert_eq_m128h(r, e);
23599	}
23600
23601	#[simd_test(enable = "avx512fp16")]
23602	unsafe fn test_mm_maskz_reduce_sh() {
23603	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23604	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23605	let r = _mm_maskz_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(`0`, a, b);
23606	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23607	assert_eq_m128h(r, e);
23608	let r = _mm_maskz_reduce_sh::<{ `16` \| _MM_FROUND_TO_ZERO }>(`1`, a, b);
23609	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23610	assert_eq_m128h(r, e);
23611	}
23612
23613	#[simd_test(enable = "avx512fp16")]
23614	unsafe fn test_mm_reduce_round_sh() {
23615	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23616	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23617	let r = _mm_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23618	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23619	assert_eq_m128h(r, e);
23620	}
23621
23622	#[simd_test(enable = "avx512fp16")]
23623	unsafe fn test_mm_mask_reduce_round_sh() {
23624	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23625	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23626	let src = _mm_setr_ph(`2.0`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`);
23627	let r = _mm_mask_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23628	src, `0`, a, b,
23629	);
23630	let e = _mm_setr_ph(`2.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23631	assert_eq_m128h(r, e);
23632	let r = _mm_mask_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23633	src, `1`, a, b,
23634	);
23635	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23636	assert_eq_m128h(r, e);
23637	}
23638
23639	#[simd_test(enable = "avx512fp16")]
23640	unsafe fn test_mm_maskz_reduce_round_sh() {
23641	let a = _mm_setr_ph(`3.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23642	let b = _mm_setr_ph(`1.25`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`);
23643	let r =
23644	_mm_maskz_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`0`, a, b);
23645	let e = _mm_setr_ph(`0.0`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23646	assert_eq_m128h(r, e);
23647	let r =
23648	_mm_maskz_reduce_round_sh::<{ `16` \| _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(`1`, a, b);
23649	let e = _mm_setr_ph(`0.25`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`);
23650	assert_eq_m128h(r, e);
23651	}
23652
23653	#[simd_test(enable = "avx512fp16,avx512vl")]
23654	unsafe fn test_mm_reduce_add_ph() {
23655	let a = _mm_set1_ph(`2.0`);
23656	let r = _mm_reduce_add_ph(a);
23657	assert_eq!(r, `16.0`);
23658	}
23659
23660	#[simd_test(enable = "avx512fp16,avx512vl")]
23661	unsafe fn test_mm256_reduce_add_ph() {
23662	let a = _mm256_set1_ph(`2.0`);
23663	let r = _mm256_reduce_add_ph(a);
23664	assert_eq!(r, `32.0`);
23665	}
23666
23667	#[simd_test(enable = "avx512fp16")]
23668	unsafe fn test_mm512_reduce_add_ph() {
23669	let a = _mm512_set1_ph(`2.0`);
23670	let r = _mm512_reduce_add_ph(a);
23671	assert_eq!(r, `64.0`);
23672	}
23673
23674	#[simd_test(enable = "avx512fp16,avx512vl")]
23675	unsafe fn test_mm_reduce_mul_ph() {
23676	let a = _mm_set1_ph(`2.0`);
23677	let r = _mm_reduce_mul_ph(a);
23678	assert_eq!(r, `256.0`);
23679	}
23680
23681	#[simd_test(enable = "avx512fp16,avx512vl")]
23682	unsafe fn test_mm256_reduce_mul_ph() {
23683	let a = _mm256_set1_ph(`2.0`);
23684	let r = _mm256_reduce_mul_ph(a);
23685	assert_eq!(r, `65536.0`);
23686	}
23687
23688	#[simd_test(enable = "avx512fp16")]
23689	unsafe fn test_mm512_reduce_mul_ph() {
23690	let a = _mm512_set1_ph(`2.0`);
23691	let r = _mm512_reduce_mul_ph(a);
23692	assert_eq!(r, `16777216.0`);
23693	}
23694
23695	#[simd_test(enable = "avx512fp16,avx512vl")]
23696	unsafe fn test_mm_reduce_max_ph() {
23697	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23698	let r = _mm_reduce_max_ph(a);
23699	assert_eq!(r, `8.0`);
23700	}
23701
23702	#[simd_test(enable = "avx512fp16,avx512vl")]
23703	unsafe fn test_mm256_reduce_max_ph() {
23704	let a = _mm256_set_ph(
23705	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23706	);
23707	let r = _mm256_reduce_max_ph(a);
23708	assert_eq!(r, `16.0`);
23709	}
23710
23711	#[simd_test(enable = "avx512fp16")]
23712	unsafe fn test_mm512_reduce_max_ph() {
23713	let a = _mm512_set_ph(
23714	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23715	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23716	`31.0`, `32.0`,
23717	);
23718	let r = _mm512_reduce_max_ph(a);
23719	assert_eq!(r, `32.0`);
23720	}
23721
23722	#[simd_test(enable = "avx512fp16,avx512vl")]
23723	unsafe fn test_mm_reduce_min_ph() {
23724	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23725	let r = _mm_reduce_min_ph(a);
23726	assert_eq!(r, `1.0`);
23727	}
23728
23729	#[simd_test(enable = "avx512fp16,avx512vl")]
23730	unsafe fn test_mm256_reduce_min_ph() {
23731	let a = _mm256_set_ph(
23732	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23733	);
23734	let r = _mm256_reduce_min_ph(a);
23735	assert_eq!(r, `1.0`);
23736	}
23737
23738	#[simd_test(enable = "avx512fp16")]
23739	unsafe fn test_mm512_reduce_min_ph() {
23740	let a = _mm512_set_ph(
23741	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23742	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23743	`31.0`, `32.0`,
23744	);
23745	let r = _mm512_reduce_min_ph(a);
23746	assert_eq!(r, `1.0`);
23747	}
23748
23749	#[simd_test(enable = "avx512fp16,avx512vl")]
23750	unsafe fn test_mm_fpclass_ph_mask() {
23751	let a = _mm_set_ph(
23752	`1.`,
23753	f16::INFINITY,
23754	f16::NEG_INFINITY,
23755	`0.0`,
23756	`-0.0`,
23757	`-2.0`,
23758	f16::NAN,
23759	`5.9e-8`, // Denormal
23760	);
23761	let r = _mm_fpclass_ph_mask::<`0x18`>(a); // infinities
23762	assert_eq!(r, `0b01100000`);
23763	}
23764
23765	#[simd_test(enable = "avx512fp16,avx512vl")]
23766	unsafe fn test_mm_mask_fpclass_ph_mask() {
23767	let a = _mm_set_ph(
23768	`1.`,
23769	f16::INFINITY,
23770	f16::NEG_INFINITY,
23771	`0.0`,
23772	`-0.0`,
23773	`-2.0`,
23774	f16::NAN,
23775	`5.9e-8`, // Denormal
23776	);
23777	let r = _mm_mask_fpclass_ph_mask::<`0x18`>(`0b01010101`, a);
23778	assert_eq!(r, `0b01000000`);
23779	}
23780
23781	#[simd_test(enable = "avx512fp16,avx512vl")]
23782	unsafe fn test_mm256_fpclass_ph_mask() {
23783	let a = _mm256_set_ph(
23784	`1.`,
23785	f16::INFINITY,
23786	f16::NEG_INFINITY,
23787	`0.0`,
23788	`-0.0`,
23789	`-2.0`,
23790	f16::NAN,
23791	`5.9e-8`, // Denormal
23792	`1.`,
23793	f16::INFINITY,
23794	f16::NEG_INFINITY,
23795	`0.0`,
23796	`-0.0`,
23797	`-2.0`,
23798	f16::NAN,
23799	`5.9e-8`, // Denormal
23800	);
23801	let r = _mm256_fpclass_ph_mask::<`0x18`>(a); // infinities
23802	assert_eq!(r, `0b0110000001100000`);
23803	}
23804
23805	#[simd_test(enable = "avx512fp16,avx512vl")]
23806	unsafe fn test_mm256_mask_fpclass_ph_mask() {
23807	let a = _mm256_set_ph(
23808	`1.`,
23809	f16::INFINITY,
23810	f16::NEG_INFINITY,
23811	`0.0`,
23812	`-0.0`,
23813	`-2.0`,
23814	f16::NAN,
23815	`5.9e-8`, // Denormal
23816	`1.`,
23817	f16::INFINITY,
23818	f16::NEG_INFINITY,
23819	`0.0`,
23820	`-0.0`,
23821	`-2.0`,
23822	f16::NAN,
23823	`5.9e-8`, // Denormal
23824	);
23825	let r = _mm256_mask_fpclass_ph_mask::<`0x18`>(`0b0101010101010101`, a);
23826	assert_eq!(r, `0b0100000001000000`);
23827	}
23828
23829	#[simd_test(enable = "avx512fp16")]
23830	unsafe fn test_mm512_fpclass_ph_mask() {
23831	let a = _mm512_set_ph(
23832	`1.`,
23833	f16::INFINITY,
23834	f16::NEG_INFINITY,
23835	`0.0`,
23836	`-0.0`,
23837	`-2.0`,
23838	f16::NAN,
23839	`5.9e-8`, // Denormal
23840	`1.`,
23841	f16::INFINITY,
23842	f16::NEG_INFINITY,
23843	`0.0`,
23844	`-0.0`,
23845	`-2.0`,
23846	f16::NAN,
23847	`5.9e-8`, // Denormal
23848	`1.`,
23849	f16::INFINITY,
23850	f16::NEG_INFINITY,
23851	`0.0`,
23852	`-0.0`,
23853	`-2.0`,
23854	f16::NAN,
23855	`5.9e-8`, // Denormal
23856	`1.`,
23857	f16::INFINITY,
23858	f16::NEG_INFINITY,
23859	`0.0`,
23860	`-0.0`,
23861	`-2.0`,
23862	f16::NAN,
23863	`5.9e-8`, // Denormal
23864	);
23865	let r = _mm512_fpclass_ph_mask::<`0x18`>(a); // infinities
23866	assert_eq!(r, `0b01100000011000000110000001100000`);
23867	}
23868
23869	#[simd_test(enable = "avx512fp16")]
23870	unsafe fn test_mm512_mask_fpclass_ph_mask() {
23871	let a = _mm512_set_ph(
23872	`1.`,
23873	f16::INFINITY,
23874	f16::NEG_INFINITY,
23875	`0.0`,
23876	`-0.0`,
23877	`-2.0`,
23878	f16::NAN,
23879	`5.9e-8`, // Denormal
23880	`1.`,
23881	f16::INFINITY,
23882	f16::NEG_INFINITY,
23883	`0.0`,
23884	`-0.0`,
23885	`-2.0`,
23886	f16::NAN,
23887	`5.9e-8`, // Denormal
23888	`1.`,
23889	f16::INFINITY,
23890	f16::NEG_INFINITY,
23891	`0.0`,
23892	`-0.0`,
23893	`-2.0`,
23894	f16::NAN,
23895	`5.9e-8`, // Denormal
23896	`1.`,
23897	f16::INFINITY,
23898	f16::NEG_INFINITY,
23899	`0.0`,
23900	`-0.0`,
23901	`-2.0`,
23902	f16::NAN,
23903	`5.9e-8`, // Denormal
23904	);
23905	let r = _mm512_mask_fpclass_ph_mask::<`0x18`>(`0b01010101010101010101010101010101`, a);
23906	assert_eq!(r, `0b01000000010000000100000001000000`);
23907	}
23908
23909	#[simd_test(enable = "avx512fp16")]
23910	unsafe fn test_mm_fpclass_sh_mask() {
23911	let a = _mm_set_sh(f16::INFINITY);
23912	let r = _mm_fpclass_sh_mask::<`0x18`>(a);
23913	assert_eq!(r, `1`);
23914	}
23915
23916	#[simd_test(enable = "avx512fp16")]
23917	unsafe fn test_mm_mask_fpclass_sh_mask() {
23918	let a = _mm_set_sh(f16::INFINITY);
23919	let r = _mm_mask_fpclass_sh_mask::<`0x18`>(`0`, a);
23920	assert_eq!(r, `0`);
23921	let r = _mm_mask_fpclass_sh_mask::<`0x18`>(`1`, a);
23922	assert_eq!(r, `1`);
23923	}
23924
23925	#[simd_test(enable = "avx512fp16,avx512vl")]
23926	unsafe fn test_mm_mask_blend_ph() {
23927	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23928	let b = _mm_set_ph(`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`);
23929	let r = _mm_mask_blend_ph(`0b01010101`, a, b);
23930	let e = _mm_set_ph(`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`);
23931	assert_eq_m128h(r, e);
23932	}
23933
23934	#[simd_test(enable = "avx512fp16,avx512vl")]
23935	unsafe fn test_mm256_mask_blend_ph() {
23936	let a = _mm256_set_ph(
23937	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23938	);
23939	let b = _mm256_set_ph(
23940	`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `-9.0`, `-10.0`, `-11.0`, `-12.0`, `-13.0`,
23941	`-14.0`, `-15.0`, `-16.0`,
23942	);
23943	let r = _mm256_mask_blend_ph(`0b0101010101010101`, a, b);
23944	let e = _mm256_set_ph(
23945	`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`, `-14.0`, `15.0`,
23946	`-16.0`,
23947	);
23948	assert_eq_m256h(r, e);
23949	}
23950
23951	#[simd_test(enable = "avx512fp16")]
23952	unsafe fn test_mm512_mask_blend_ph() {
23953	let a = _mm512_set_ph(
23954	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23955	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23956	`31.0`, `32.0`,
23957	);
23958	let b = _mm512_set_ph(
23959	`-1.0`, `-2.0`, `-3.0`, `-4.0`, `-5.0`, `-6.0`, `-7.0`, `-8.0`, `-9.0`, `-10.0`, `-11.0`, `-12.0`, `-13.0`,
23960	`-14.0`, `-15.0`, `-16.0`, `-17.0`, `-18.0`, `-19.0`, `-20.0`, `-21.0`, `-22.0`, `-23.0`, `-24.0`, `-25.0`,
23961	`-26.0`, `-27.0`, `-28.0`, `-29.0`, `-30.0`, `-31.0`, `-32.0`,
23962	);
23963	let r = _mm512_mask_blend_ph(`0b01010101010101010101010101010101`, a, b);
23964	let e = _mm512_set_ph(
23965	`1.0`, `-2.0`, `3.0`, `-4.0`, `5.0`, `-6.0`, `7.0`, `-8.0`, `9.0`, `-10.0`, `11.0`, `-12.0`, `13.0`, `-14.0`, `15.0`,
23966	`-16.0`, `17.0`, `-18.0`, `19.0`, `-20.0`, `21.0`, `-22.0`, `23.0`, `-24.0`, `25.0`, `-26.0`, `27.0`, `-28.0`,
23967	`29.0`, `-30.0`, `31.0`, `-32.0`,
23968	);
23969	assert_eq_m512h(r, e);
23970	}
23971
23972	#[simd_test(enable = "avx512fp16,avx512vl")]
23973	unsafe fn test_mm_permutex2var_ph() {
23974	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
23975	let b = _mm_setr_ph(`9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
23976	let idx = _mm_setr_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`);
23977	let r = _mm_permutex2var_ph(a, idx, b);
23978	let e = _mm_setr_ph(`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`);
23979	assert_eq_m128h(r, e);
23980	}
23981
23982	#[simd_test(enable = "avx512fp16,avx512vl")]
23983	unsafe fn test_mm256_permutex2var_ph() {
23984	let a = _mm256_setr_ph(
23985	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
23986	);
23987	let b = _mm256_setr_ph(
23988	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
23989	`31.0`, `32.0`,
23990	);
23991	let idx = _mm256_setr_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`);
23992	let r = _mm256_permutex2var_ph(a, idx, b);
23993	let e = _mm256_setr_ph(
23994	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
23995	`31.0`,
23996	);
23997	assert_eq_m256h(r, e);
23998	}
23999
24000	#[simd_test(enable = "avx512fp16")]
24001	unsafe fn test_mm512_permutex2var_ph() {
24002	let a = _mm512_setr_ph(
24003	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24004	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24005	`31.0`, `32.0`,
24006	);
24007	let b = _mm512_setr_ph(
24008	`33.0`, `34.0`, `35.0`, `36.0`, `37.0`, `38.0`, `39.0`, `40.0`, `41.0`, `42.0`, `43.0`, `44.0`, `45.0`, `46.0`,
24009	`47.0`, `48.0`, `49.0`, `50.0`, `51.0`, `52.0`, `53.0`, `54.0`, `55.0`, `56.0`, `57.0`, `58.0`, `59.0`, `60.0`,
24010	`61.0`, `62.0`, `63.0`, `64.0`,
24011	);
24012	let idx = _mm512_set_epi16(
24013	`62`, `60`, `58`, `56`, `54`, `52`, `50`, `48`, `46`, `44`, `42`, `40`, `38`, `36`, `34`, `32`, `30`, `28`, `26`, `24`, `22`, `20`,
24014	`18`, `16`, `14`, `12`, `10`, `8`, `6`, `4`, `2`, `0`,
24015	);
24016	let r = _mm512_permutex2var_ph(a, idx, b);
24017	let e = _mm512_setr_ph(
24018	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
24019	`31.0`, `33.0`, `35.0`, `37.0`, `39.0`, `41.0`, `43.0`, `45.0`, `47.0`, `49.0`, `51.0`, `53.0`, `55.0`, `57.0`,
24020	`59.0`, `61.0`, `63.0`,
24021	);
24022	assert_eq_m512h(r, e);
24023	}
24024
24025	#[simd_test(enable = "avx512fp16,avx512vl")]
24026	unsafe fn test_mm_permutexvar_ph() {
24027	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24028	let idx = _mm_set_epi16(`0`, `2`, `4`, `6`, `1`, `3`, `5`, `7`);
24029	let r = _mm_permutexvar_ph(idx, a);
24030	let e = _mm_setr_ph(`1.0`, `3.0`, `5.0`, `7.0`, `2.0`, `4.0`, `6.0`, `8.0`);
24031	assert_eq_m128h(r, e);
24032	}
24033
24034	#[simd_test(enable = "avx512fp16,avx512vl")]
24035	unsafe fn test_mm256_permutexvar_ph() {
24036	let a = _mm256_set_ph(
24037	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24038	);
24039	let idx = _mm256_set_epi16(`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
24040	let r = _mm256_permutexvar_ph(idx, a);
24041	let e = _mm256_setr_ph(
24042	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `2.0`, `4.0`, `6.0`, `8.0`, `10.0`, `12.0`, `14.0`, `16.0`,
24043	);
24044	assert_eq_m256h(r, e);
24045	}
24046
24047	#[simd_test(enable = "avx512fp16")]
24048	unsafe fn test_mm512_permutexvar_ph() {
24049	let a = _mm512_set_ph(
24050	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24051	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24052	`31.0`, `32.0`,
24053	);
24054	let idx = _mm512_set_epi16(
24055	`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
24056	`17`, `19`, `21`, `23`, `25`, `27`, `29`, `31`,
24057	);
24058	let r = _mm512_permutexvar_ph(idx, a);
24059	let e = _mm512_setr_ph(
24060	`1.0`, `3.0`, `5.0`, `7.0`, `9.0`, `11.0`, `13.0`, `15.0`, `17.0`, `19.0`, `21.0`, `23.0`, `25.0`, `27.0`, `29.0`,
24061	`31.0`, `2.0`, `4.0`, `6.0`, `8.0`, `10.0`, `12.0`, `14.0`, `16.0`, `18.0`, `20.0`, `22.0`, `24.0`, `26.0`, `28.0`,
24062	`30.0`, `32.0`,
24063	);
24064	assert_eq_m512h(r, e);
24065	}
24066
24067	#[simd_test(enable = "avx512fp16,avx512vl")]
24068	unsafe fn test_mm_cvtepi16_ph() {
24069	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24070	let r = _mm_cvtepi16_ph(a);
24071	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24072	assert_eq_m128h(r, e);
24073	}
24074
24075	#[simd_test(enable = "avx512fp16,avx512vl")]
24076	unsafe fn test_mm_mask_cvtepi16_ph() {
24077	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24078	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24079	let r = _mm_mask_cvtepi16_ph(src, `0b01010101`, a);
24080	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24081	assert_eq_m128h(r, e);
24082	}
24083
24084	#[simd_test(enable = "avx512fp16,avx512vl")]
24085	unsafe fn test_mm_maskz_cvtepi16_ph() {
24086	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24087	let r = _mm_maskz_cvtepi16_ph(`0b01010101`, a);
24088	let e = _mm_set_ph(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
24089	assert_eq_m128h(r, e);
24090	}
24091
24092	#[simd_test(enable = "avx512fp16,avx512vl")]
24093	unsafe fn test_mm256_cvtepi16_ph() {
24094	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24095	let r = _mm256_cvtepi16_ph(a);
24096	let e = _mm256_set_ph(
24097	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24098	);
24099	assert_eq_m256h(r, e);
24100	}
24101
24102	#[simd_test(enable = "avx512fp16,avx512vl")]
24103	unsafe fn test_mm256_mask_cvtepi16_ph() {
24104	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24105	let src = _mm256_set_ph(
24106	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24107	);
24108	let r = _mm256_mask_cvtepi16_ph(src, `0b0101010101010101`, a);
24109	let e = _mm256_set_ph(
24110	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24111	);
24112	assert_eq_m256h(r, e);
24113	}
24114
24115	#[simd_test(enable = "avx512fp16,avx512vl")]
24116	unsafe fn test_mm256_maskz_cvtepi16_ph() {
24117	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24118	let r = _mm256_maskz_cvtepi16_ph(`0b0101010101010101`, a);
24119	let e = _mm256_set_ph(
24120	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`,
24121	);
24122	assert_eq_m256h(r, e);
24123	}
24124
24125	#[simd_test(enable = "avx512fp16")]
24126	unsafe fn test_mm512_cvtepi16_ph() {
24127	let a = _mm512_set_epi16(
24128	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24129	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24130	);
24131	let r = _mm512_cvtepi16_ph(a);
24132	let e = _mm512_set_ph(
24133	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24134	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24135	`31.0`, `32.0`,
24136	);
24137	assert_eq_m512h(r, e);
24138	}
24139
24140	#[simd_test(enable = "avx512fp16")]
24141	unsafe fn test_mm512_mask_cvtepi16_ph() {
24142	let a = _mm512_set_epi16(
24143	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24144	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24145	);
24146	let src = _mm512_set_ph(
24147	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24148	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24149	);
24150	let r = _mm512_mask_cvtepi16_ph(src, `0b01010101010101010101010101010101`, a);
24151	let e = _mm512_set_ph(
24152	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24153	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24154	);
24155	assert_eq_m512h(r, e);
24156	}
24157
24158	#[simd_test(enable = "avx512fp16")]
24159	unsafe fn test_mm512_maskz_cvtepi16_ph() {
24160	let a = _mm512_set_epi16(
24161	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24162	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24163	);
24164	let r = _mm512_maskz_cvtepi16_ph(`0b01010101010101010101010101010101`, a);
24165	let e = _mm512_set_ph(
24166	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24167	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24168	);
24169	assert_eq_m512h(r, e);
24170	}
24171
24172	#[simd_test(enable = "avx512fp16")]
24173	unsafe fn test_mm512_cvt_roundepi16_ph() {
24174	let a = _mm512_set_epi16(
24175	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24176	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24177	);
24178	let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24179	let e = _mm512_set_ph(
24180	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24181	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24182	`31.0`, `32.0`,
24183	);
24184	assert_eq_m512h(r, e);
24185	}
24186
24187	#[simd_test(enable = "avx512fp16")]
24188	unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
24189	let a = _mm512_set_epi16(
24190	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24191	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24192	);
24193	let src = _mm512_set_ph(
24194	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24195	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24196	);
24197	let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24198	src,
24199	`0b01010101010101010101010101010101`,
24200	a,
24201	);
24202	let e = _mm512_set_ph(
24203	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24204	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24205	);
24206	assert_eq_m512h(r, e);
24207	}
24208
24209	#[simd_test(enable = "avx512fp16")]
24210	unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
24211	let a = _mm512_set_epi16(
24212	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24213	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24214	);
24215	let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24216	`0b01010101010101010101010101010101`,
24217	a,
24218	);
24219	let e = _mm512_set_ph(
24220	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24221	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24222	);
24223	assert_eq_m512h(r, e);
24224	}
24225
24226	#[simd_test(enable = "avx512fp16,avx512vl")]
24227	unsafe fn test_mm_cvtepu16_ph() {
24228	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24229	let r = _mm_cvtepu16_ph(a);
24230	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24231	assert_eq_m128h(r, e);
24232	}
24233
24234	#[simd_test(enable = "avx512fp16,avx512vl")]
24235	unsafe fn test_mm_mask_cvtepu16_ph() {
24236	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24237	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24238	let r = _mm_mask_cvtepu16_ph(src, `0b01010101`, a);
24239	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24240	assert_eq_m128h(r, e);
24241	}
24242
24243	#[simd_test(enable = "avx512fp16,avx512vl")]
24244	unsafe fn test_mm_maskz_cvtepu16_ph() {
24245	let a = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24246	let r = _mm_maskz_cvtepu16_ph(`0b01010101`, a);
24247	let e = _mm_set_ph(`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`);
24248	assert_eq_m128h(r, e);
24249	}
24250
24251	#[simd_test(enable = "avx512fp16,avx512vl")]
24252	unsafe fn test_mm256_cvtepu16_ph() {
24253	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24254	let r = _mm256_cvtepu16_ph(a);
24255	let e = _mm256_set_ph(
24256	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24257	);
24258	assert_eq_m256h(r, e);
24259	}
24260
24261	#[simd_test(enable = "avx512fp16,avx512vl")]
24262	unsafe fn test_mm256_mask_cvtepu16_ph() {
24263	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24264	let src = _mm256_set_ph(
24265	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24266	);
24267	let r = _mm256_mask_cvtepu16_ph(src, `0b0101010101010101`, a);
24268	let e = _mm256_set_ph(
24269	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24270	);
24271	assert_eq_m256h(r, e);
24272	}
24273
24274	#[simd_test(enable = "avx512fp16,avx512vl")]
24275	unsafe fn test_mm256_maskz_cvtepu16_ph() {
24276	let a = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24277	let r = _mm256_maskz_cvtepu16_ph(`0b0101010101010101`, a);
24278	let e = _mm256_set_ph(
24279	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`,
24280	);
24281	assert_eq_m256h(r, e);
24282	}
24283
24284	#[simd_test(enable = "avx512fp16")]
24285	unsafe fn test_mm512_cvtepu16_ph() {
24286	let a = _mm512_set_epi16(
24287	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24288	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24289	);
24290	let r = _mm512_cvtepu16_ph(a);
24291	let e = _mm512_set_ph(
24292	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24293	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24294	`31.0`, `32.0`,
24295	);
24296	assert_eq_m512h(r, e);
24297	}
24298
24299	#[simd_test(enable = "avx512fp16")]
24300	unsafe fn test_mm512_mask_cvtepu16_ph() {
24301	let a = _mm512_set_epi16(
24302	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24303	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24304	);
24305	let src = _mm512_set_ph(
24306	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24307	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24308	);
24309	let r = _mm512_mask_cvtepu16_ph(src, `0b01010101010101010101010101010101`, a);
24310	let e = _mm512_set_ph(
24311	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24312	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24313	);
24314	assert_eq_m512h(r, e);
24315	}
24316
24317	#[simd_test(enable = "avx512fp16")]
24318	unsafe fn test_mm512_maskz_cvtepu16_ph() {
24319	let a = _mm512_set_epi16(
24320	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24321	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24322	);
24323	let r = _mm512_maskz_cvtepu16_ph(`0b01010101010101010101010101010101`, a);
24324	let e = _mm512_set_ph(
24325	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24326	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24327	);
24328	assert_eq_m512h(r, e);
24329	}
24330
24331	#[simd_test(enable = "avx512fp16")]
24332	unsafe fn test_mm512_cvt_roundepu16_ph() {
24333	let a = _mm512_set_epi16(
24334	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24335	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24336	);
24337	let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24338	let e = _mm512_set_ph(
24339	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24340	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
24341	`31.0`, `32.0`,
24342	);
24343	assert_eq_m512h(r, e);
24344	}
24345
24346	#[simd_test(enable = "avx512fp16")]
24347	unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
24348	let a = _mm512_set_epi16(
24349	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24350	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24351	);
24352	let src = _mm512_set_ph(
24353	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`,
24354	`27.`, `28.`, `29.`, `30.`, `31.`, `32.`, `33.`, `34.`, `35.`, `36.`, `37.`, `38.`, `39.`, `40.`, `41.`,
24355	);
24356	let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24357	src,
24358	`0b01010101010101010101010101010101`,
24359	a,
24360	);
24361	let e = _mm512_set_ph(
24362	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`, `26.`, `18.`,
24363	`28.`, `20.`, `30.`, `22.`, `32.`, `24.`, `34.`, `26.`, `36.`, `28.`, `38.`, `30.`, `40.`, `32.`,
24364	);
24365	assert_eq_m512h(r, e);
24366	}
24367
24368	#[simd_test(enable = "avx512fp16")]
24369	unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
24370	let a = _mm512_set_epi16(
24371	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
24372	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
24373	);
24374	let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24375	`0b01010101010101010101010101010101`,
24376	a,
24377	);
24378	let e = _mm512_set_ph(
24379	`0.`, `2.`, `0.`, `4.`, `0.`, `6.`, `0.`, `8.`, `0.`, `10.`, `0.`, `12.`, `0.`, `14.`, `0.`, `16.`, `0.`, `18.`, `0.`, `20.`,
24380	`0.`, `22.`, `0.`, `24.`, `0.`, `26.`, `0.`, `28.`, `0.`, `30.`, `0.`, `32.`,
24381	);
24382	assert_eq_m512h(r, e);
24383	}
24384
24385	#[simd_test(enable = "avx512fp16,avx512vl")]
24386	unsafe fn test_mm_cvtepi32_ph() {
24387	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24388	let r = _mm_cvtepi32_ph(a);
24389	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24390	assert_eq_m128h(r, e);
24391	}
24392
24393	#[simd_test(enable = "avx512fp16,avx512vl")]
24394	unsafe fn test_mm_mask_cvtepi32_ph() {
24395	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24396	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24397	let r = _mm_mask_cvtepi32_ph(src, `0b0101`, a);
24398	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.`, `16.`, `4.`);
24399	assert_eq_m128h(r, e);
24400	}
24401
24402	#[simd_test(enable = "avx512fp16,avx512vl")]
24403	unsafe fn test_mm_maskz_cvtepi32_ph() {
24404	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24405	let r = _mm_maskz_cvtepi32_ph(`0b0101`, a);
24406	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`, `0.0`, `4.`);
24407	assert_eq_m128h(r, e);
24408	}
24409
24410	#[simd_test(enable = "avx512fp16,avx512vl")]
24411	unsafe fn test_mm256_cvtepi32_ph() {
24412	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24413	let r = _mm256_cvtepi32_ph(a);
24414	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24415	assert_eq_m128h(r, e);
24416	}
24417
24418	#[simd_test(enable = "avx512fp16,avx512vl")]
24419	unsafe fn test_mm256_mask_cvtepi32_ph() {
24420	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24421	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24422	let r = _mm256_mask_cvtepi32_ph(src, `0b01010101`, a);
24423	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24424	assert_eq_m128h(r, e);
24425	}
24426
24427	#[simd_test(enable = "avx512fp16,avx512vl")]
24428	unsafe fn test_mm256_maskz_cvtepi32_ph() {
24429	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24430	let r = _mm256_maskz_cvtepi32_ph(`0b01010101`, a);
24431	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24432	assert_eq_m128h(r, e);
24433	}
24434
24435	#[simd_test(enable = "avx512fp16")]
24436	unsafe fn test_mm512_cvtepi32_ph() {
24437	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24438	let r = _mm512_cvtepi32_ph(a);
24439	let e = _mm256_set_ph(
24440	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24441	);
24442	assert_eq_m256h(r, e);
24443	}
24444
24445	#[simd_test(enable = "avx512fp16")]
24446	unsafe fn test_mm512_mask_cvtepi32_ph() {
24447	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24448	let src = _mm256_set_ph(
24449	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24450	);
24451	let r = _mm512_mask_cvtepi32_ph(src, `0b0101010101010101`, a);
24452	let e = _mm256_set_ph(
24453	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24454	);
24455	assert_eq_m256h(r, e);
24456	}
24457
24458	#[simd_test(enable = "avx512fp16")]
24459	unsafe fn test_mm512_maskz_cvtepi32_ph() {
24460	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24461	let r = _mm512_maskz_cvtepi32_ph(`0b0101010101010101`, a);
24462	let e = _mm256_set_ph(
24463	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24464	);
24465	assert_eq_m256h(r, e);
24466	}
24467
24468	#[simd_test(enable = "avx512fp16")]
24469	unsafe fn test_mm512_cvt_roundepi32_ph() {
24470	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24471	let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24472	let e = _mm256_set_ph(
24473	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24474	);
24475	assert_eq_m256h(r, e);
24476	}
24477
24478	#[simd_test(enable = "avx512fp16")]
24479	unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
24480	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24481	let src = _mm256_set_ph(
24482	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24483	);
24484	let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24485	src,
24486	`0b0101010101010101`,
24487	a,
24488	);
24489	let e = _mm256_set_ph(
24490	`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`, `18.`, `10.`, `20.`, `12.`, `22.`, `14.`, `24.`, `16.`,
24491	);
24492	assert_eq_m256h(r, e);
24493	}
24494
24495	#[simd_test(enable = "avx512fp16")]
24496	unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
24497	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24498	let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24499	`0b0101010101010101`,
24500	a,
24501	);
24502	let e = _mm256_set_ph(
24503	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24504	);
24505	assert_eq_m256h(r, e);
24506	}
24507
24508	#[simd_test(enable = "avx512fp16")]
24509	unsafe fn test_mm_cvti32_sh() {
24510	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24511	let r = _mm_cvti32_sh(a, `10`);
24512	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24513	assert_eq_m128h(r, e);
24514	}
24515
24516	#[simd_test(enable = "avx512fp16")]
24517	unsafe fn test_mm_cvt_roundi32_sh() {
24518	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24519	let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
24520	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24521	assert_eq_m128h(r, e);
24522	}
24523
24524	#[simd_test(enable = "avx512fp16,avx512vl")]
24525	unsafe fn test_mm_cvtepu32_ph() {
24526	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24527	let r = _mm_cvtepu32_ph(a);
24528	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24529	assert_eq_m128h(r, e);
24530	}
24531
24532	#[simd_test(enable = "avx512fp16,avx512vl")]
24533	unsafe fn test_mm_mask_cvtepu32_ph() {
24534	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24535	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24536	let r = _mm_mask_cvtepu32_ph(src, `0b0101`, a);
24537	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.`, `16.`, `4.`);
24538	assert_eq_m128h(r, e);
24539	}
24540
24541	#[simd_test(enable = "avx512fp16,avx512vl")]
24542	unsafe fn test_mm_maskz_cvtepu32_ph() {
24543	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
24544	let r = _mm_maskz_cvtepu32_ph(`0b0101`, a);
24545	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`, `0.0`, `4.`);
24546	assert_eq_m128h(r, e);
24547	}
24548
24549	#[simd_test(enable = "avx512fp16,avx512vl")]
24550	unsafe fn test_mm256_cvtepu32_ph() {
24551	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24552	let r = _mm256_cvtepu32_ph(a);
24553	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24554	assert_eq_m128h(r, e);
24555	}
24556
24557	#[simd_test(enable = "avx512fp16,avx512vl")]
24558	unsafe fn test_mm256_mask_cvtepu32_ph() {
24559	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24560	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24561	let r = _mm256_mask_cvtepu32_ph(src, `0b01010101`, a);
24562	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24563	assert_eq_m128h(r, e);
24564	}
24565
24566	#[simd_test(enable = "avx512fp16,avx512vl")]
24567	unsafe fn test_mm256_maskz_cvtepu32_ph() {
24568	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24569	let r = _mm256_maskz_cvtepu32_ph(`0b01010101`, a);
24570	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24571	assert_eq_m128h(r, e);
24572	}
24573
24574	#[simd_test(enable = "avx512fp16")]
24575	unsafe fn test_mm512_cvtepu32_ph() {
24576	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24577	let r = _mm512_cvtepu32_ph(a);
24578	let e = _mm256_set_ph(
24579	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24580	);
24581	assert_eq_m256h(r, e);
24582	}
24583
24584	#[simd_test(enable = "avx512fp16")]
24585	unsafe fn test_mm512_mask_cvtepu32_ph() {
24586	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24587	let src = _mm256_set_ph(
24588	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24589	);
24590	let r = _mm512_mask_cvtepu32_ph(src, `0b0101010101010101`, a);
24591	let e = _mm256_set_ph(
24592	`10.`, `2.0`, `12.`, `4.0`, `14.`, `6.0`, `16.`, `8.0`, `18.`, `10.0`, `20.`, `12.0`, `22.`, `14.0`, `24.`, `16.0`,
24593	);
24594	assert_eq_m256h(r, e);
24595	}
24596
24597	#[simd_test(enable = "avx512fp16")]
24598	unsafe fn test_mm512_maskz_cvtepu32_ph() {
24599	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24600	let r = _mm512_maskz_cvtepu32_ph(`0b0101010101010101`, a);
24601	let e = _mm256_set_ph(
24602	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24603	);
24604	assert_eq_m256h(r, e);
24605	}
24606
24607	#[simd_test(enable = "avx512fp16")]
24608	unsafe fn test_mm512_cvt_roundepu32_ph() {
24609	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24610	let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24611	let e = _mm256_set_ph(
24612	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24613	);
24614	assert_eq_m256h(r, e);
24615	}
24616
24617	#[simd_test(enable = "avx512fp16")]
24618	unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
24619	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24620	let src = _mm256_set_ph(
24621	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24622	);
24623	let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24624	src,
24625	`0b0101010101010101`,
24626	a,
24627	);
24628	let e = _mm256_set_ph(
24629	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
24630	`16.0`,
24631	);
24632	assert_eq_m256h(r, e);
24633	}
24634
24635	#[simd_test(enable = "avx512fp16")]
24636	unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
24637	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
24638	let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24639	`0b0101010101010101`,
24640	a,
24641	);
24642	let e = _mm256_set_ph(
24643	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24644	);
24645	assert_eq_m256h(r, e);
24646	}
24647
24648	#[simd_test(enable = "avx512fp16")]
24649	unsafe fn test_mm_cvtu32_sh() {
24650	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24651	let r = _mm_cvtu32_sh(a, `10`);
24652	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24653	assert_eq_m128h(r, e);
24654	}
24655
24656	#[simd_test(enable = "avx512fp16")]
24657	unsafe fn test_mm_cvt_roundu32_sh() {
24658	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24659	let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `10`);
24660	let e = _mm_setr_ph(`10.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24661	assert_eq_m128h(r, e);
24662	}
24663
24664	#[simd_test(enable = "avx512fp16,avx512vl")]
24665	unsafe fn test_mm_cvtepi64_ph() {
24666	let a = _mm_set_epi64x(`1`, `2`);
24667	let r = _mm_cvtepi64_ph(a);
24668	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
24669	assert_eq_m128h(r, e);
24670	}
24671
24672	#[simd_test(enable = "avx512fp16,avx512vl")]
24673	unsafe fn test_mm_mask_cvtepi64_ph() {
24674	let a = _mm_set_epi64x(`1`, `2`);
24675	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24676	let r = _mm_mask_cvtepi64_ph(src, `0b01`, a);
24677	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
24678	assert_eq_m128h(r, e);
24679	}
24680
24681	#[simd_test(enable = "avx512fp16,avx512vl")]
24682	unsafe fn test_mm_maskz_cvtepi64_ph() {
24683	let a = _mm_set_epi64x(`1`, `2`);
24684	let r = _mm_maskz_cvtepi64_ph(`0b01`, a);
24685	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.`);
24686	assert_eq_m128h(r, e);
24687	}
24688
24689	#[simd_test(enable = "avx512fp16,avx512vl")]
24690	unsafe fn test_mm256_cvtepi64_ph() {
24691	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24692	let r = _mm256_cvtepi64_ph(a);
24693	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24694	assert_eq_m128h(r, e);
24695	}
24696
24697	#[simd_test(enable = "avx512fp16,avx512vl")]
24698	unsafe fn test_mm256_mask_cvtepi64_ph() {
24699	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24700	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24701	let r = _mm256_mask_cvtepi64_ph(src, `0b0101`, a);
24702	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
24703	assert_eq_m128h(r, e);
24704	}
24705
24706	#[simd_test(enable = "avx512fp16,avx512vl")]
24707	unsafe fn test_mm256_maskz_cvtepi64_ph() {
24708	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24709	let r = _mm256_maskz_cvtepi64_ph(`0b0101`, a);
24710	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24711	assert_eq_m128h(r, e);
24712	}
24713
24714	#[simd_test(enable = "avx512fp16")]
24715	unsafe fn test_mm512_cvtepi64_ph() {
24716	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24717	let r = _mm512_cvtepi64_ph(a);
24718	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24719	assert_eq_m128h(r, e);
24720	}
24721
24722	#[simd_test(enable = "avx512fp16")]
24723	unsafe fn test_mm512_mask_cvtepi64_ph() {
24724	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24725	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24726	let r = _mm512_mask_cvtepi64_ph(src, `0b01010101`, a);
24727	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24728	assert_eq_m128h(r, e);
24729	}
24730
24731	#[simd_test(enable = "avx512fp16")]
24732	unsafe fn test_mm512_maskz_cvtepi64_ph() {
24733	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24734	let r = _mm512_maskz_cvtepi64_ph(`0b01010101`, a);
24735	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24736	assert_eq_m128h(r, e);
24737	}
24738
24739	#[simd_test(enable = "avx512fp16")]
24740	unsafe fn test_mm512_cvt_roundepi64_ph() {
24741	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24742	let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24743	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24744	assert_eq_m128h(r, e);
24745	}
24746
24747	#[simd_test(enable = "avx512fp16")]
24748	unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
24749	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24750	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24751	let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24752	src, `0b01010101`, a,
24753	);
24754	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24755	assert_eq_m128h(r, e);
24756	}
24757
24758	#[simd_test(enable = "avx512fp16")]
24759	unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
24760	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24761	let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24762	`0b01010101`, a,
24763	);
24764	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24765	assert_eq_m128h(r, e);
24766	}
24767
24768	#[simd_test(enable = "avx512fp16,avx512vl")]
24769	unsafe fn test_mm_cvtepu64_ph() {
24770	let a = _mm_set_epi64x(`1`, `2`);
24771	let r = _mm_cvtepu64_ph(a);
24772	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
24773	assert_eq_m128h(r, e);
24774	}
24775
24776	#[simd_test(enable = "avx512fp16,avx512vl")]
24777	unsafe fn test_mm_mask_cvtepu64_ph() {
24778	let a = _mm_set_epi64x(`1`, `2`);
24779	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24780	let r = _mm_mask_cvtepu64_ph(src, `0b01`, a);
24781	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
24782	assert_eq_m128h(r, e);
24783	}
24784
24785	#[simd_test(enable = "avx512fp16,avx512vl")]
24786	unsafe fn test_mm_maskz_cvtepu64_ph() {
24787	let a = _mm_set_epi64x(`1`, `2`);
24788	let r = _mm_maskz_cvtepu64_ph(`0b01`, a);
24789	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`);
24790	assert_eq_m128h(r, e);
24791	}
24792
24793	#[simd_test(enable = "avx512fp16,avx512vl")]
24794	unsafe fn test_mm256_cvtepu64_ph() {
24795	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24796	let r = _mm256_cvtepu64_ph(a);
24797	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24798	assert_eq_m128h(r, e);
24799	}
24800
24801	#[simd_test(enable = "avx512fp16,avx512vl")]
24802	unsafe fn test_mm256_mask_cvtepu64_ph() {
24803	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24804	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24805	let r = _mm256_mask_cvtepu64_ph(src, `0b0101`, a);
24806	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
24807	assert_eq_m128h(r, e);
24808	}
24809
24810	#[simd_test(enable = "avx512fp16,avx512vl")]
24811	unsafe fn test_mm256_maskz_cvtepu64_ph() {
24812	let a = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
24813	let r = _mm256_maskz_cvtepu64_ph(`0b0101`, a);
24814	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24815	assert_eq_m128h(r, e);
24816	}
24817
24818	#[simd_test(enable = "avx512fp16")]
24819	unsafe fn test_mm512_cvtepu64_ph() {
24820	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24821	let r = _mm512_cvtepu64_ph(a);
24822	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24823	assert_eq_m128h(r, e);
24824	}
24825
24826	#[simd_test(enable = "avx512fp16")]
24827	unsafe fn test_mm512_mask_cvtepu64_ph() {
24828	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24829	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24830	let r = _mm512_mask_cvtepu64_ph(src, `0b01010101`, a);
24831	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24832	assert_eq_m128h(r, e);
24833	}
24834
24835	#[simd_test(enable = "avx512fp16")]
24836	unsafe fn test_mm512_maskz_cvtepu64_ph() {
24837	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24838	let r = _mm512_maskz_cvtepu64_ph(`0b01010101`, a);
24839	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24840	assert_eq_m128h(r, e);
24841	}
24842
24843	#[simd_test(enable = "avx512fp16")]
24844	unsafe fn test_mm512_cvt_roundepu64_ph() {
24845	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24846	let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24847	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24848	assert_eq_m128h(r, e);
24849	}
24850
24851	#[simd_test(enable = "avx512fp16")]
24852	unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
24853	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24854	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24855	let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24856	src, `0b01010101`, a,
24857	);
24858	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24859	assert_eq_m128h(r, e);
24860	}
24861
24862	#[simd_test(enable = "avx512fp16")]
24863	unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
24864	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
24865	let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24866	`0b01010101`, a,
24867	);
24868	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
24869	assert_eq_m128h(r, e);
24870	}
24871
24872	#[simd_test(enable = "avx512fp16,avx512vl")]
24873	unsafe fn test_mm_cvtxps_ph() {
24874	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24875	let r = _mm_cvtxps_ph(a);
24876	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
24877	assert_eq_m128h(r, e);
24878	}
24879
24880	#[simd_test(enable = "avx512fp16,avx512vl")]
24881	unsafe fn test_mm_mask_cvtxps_ph() {
24882	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24883	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24884	let r = _mm_mask_cvtxps_ph(src, `0b0101`, a);
24885	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.`, `4.0`);
24886	assert_eq_m128h(r, e);
24887	}
24888
24889	#[simd_test(enable = "avx512fp16,avx512vl")]
24890	unsafe fn test_mm_maskz_cvtxps_ph() {
24891	let a = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
24892	let r = _mm_maskz_cvtxps_ph(`0b0101`, a);
24893	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
24894	assert_eq_m128h(r, e);
24895	}
24896
24897	#[simd_test(enable = "avx512fp16,avx512vl")]
24898	unsafe fn test_mm256_cvtxps_ph() {
24899	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24900	let r = _mm256_cvtxps_ph(a);
24901	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24902	assert_eq_m128h(r, e);
24903	}
24904
24905	#[simd_test(enable = "avx512fp16,avx512vl")]
24906	unsafe fn test_mm256_mask_cvtxps_ph() {
24907	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24908	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
24909	let r = _mm256_mask_cvtxps_ph(src, `0b01010101`, a);
24910	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
24911	assert_eq_m128h(r, e);
24912	}
24913
24914	#[simd_test(enable = "avx512fp16,avx512vl")]
24915	unsafe fn test_mm256_maskz_cvtxps_ph() {
24916	let a = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
24917	let r = _mm256_maskz_cvtxps_ph(`0b01010101`, a);
24918	let e = _mm_set_ph(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
24919	assert_eq_m128h(r, e);
24920	}
24921
24922	#[simd_test(enable = "avx512fp16")]
24923	unsafe fn test_mm512_cvtxps_ph() {
24924	let a = _mm512_set_ps(
24925	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24926	);
24927	let r = _mm512_cvtxps_ph(a);
24928	let e = _mm256_set_ph(
24929	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24930	);
24931	assert_eq_m256h(r, e);
24932	}
24933
24934	#[simd_test(enable = "avx512fp16")]
24935	unsafe fn test_mm512_mask_cvtxps_ph() {
24936	let a = _mm512_set_ps(
24937	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24938	);
24939	let src = _mm256_set_ph(
24940	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24941	);
24942	let r = _mm512_mask_cvtxps_ph(src, `0b0101010101010101`, a);
24943	let e = _mm256_set_ph(
24944	`10.`, `2.0`, `12.`, `4.0`, `14.`, `6.0`, `16.`, `8.0`, `18.`, `10.0`, `20.`, `12.0`, `22.`, `14.0`, `24.`, `16.0`,
24945	);
24946	assert_eq_m256h(r, e);
24947	}
24948
24949	#[simd_test(enable = "avx512fp16")]
24950	unsafe fn test_mm512_maskz_cvtxps_ph() {
24951	let a = _mm512_set_ps(
24952	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24953	);
24954	let r = _mm512_maskz_cvtxps_ph(`0b0101010101010101`, a);
24955	let e = _mm256_set_ph(
24956	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
24957	);
24958	assert_eq_m256h(r, e);
24959	}
24960
24961	#[simd_test(enable = "avx512fp16")]
24962	unsafe fn test_mm512_cvtx_roundps_ph() {
24963	let a = _mm512_set_ps(
24964	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24965	);
24966	let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
24967	let e = _mm256_set_ph(
24968	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24969	);
24970	assert_eq_m256h(r, e);
24971	}
24972
24973	#[simd_test(enable = "avx512fp16")]
24974	unsafe fn test_mm512_mask_cvtx_roundps_ph() {
24975	let a = _mm512_set_ps(
24976	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24977	);
24978	let src = _mm256_set_ph(
24979	`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`,
24980	);
24981	let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24982	src,
24983	`0b0101010101010101`,
24984	a,
24985	);
24986	let e = _mm256_set_ph(
24987	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
24988	`16.0`,
24989	);
24990	assert_eq_m256h(r, e);
24991	}
24992
24993	#[simd_test(enable = "avx512fp16")]
24994	unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
24995	let a = _mm512_set_ps(
24996	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
24997	);
24998	let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
24999	`0b0101010101010101`,
25000	a,
25001	);
25002	let e = _mm256_set_ph(
25003	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
25004	);
25005	assert_eq_m256h(r, e);
25006	}
25007
25008	#[simd_test(enable = "avx512fp16")]
25009	unsafe fn test_mm_cvtss_sh() {
25010	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25011	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25012	let r = _mm_cvtss_sh(a, b);
25013	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25014	assert_eq_m128h(r, e);
25015	}
25016
25017	#[simd_test(enable = "avx512fp16")]
25018	unsafe fn test_mm_mask_cvtss_sh() {
25019	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25020	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25021	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25022	let r = _mm_mask_cvtss_sh(src, `0`, a, b);
25023	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25024	assert_eq_m128h(r, e);
25025	let r = _mm_mask_cvtss_sh(src, `1`, a, b);
25026	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25027	assert_eq_m128h(r, e);
25028	}
25029
25030	#[simd_test(enable = "avx512fp16")]
25031	unsafe fn test_mm_maskz_cvtss_sh() {
25032	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25033	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25034	let r = _mm_maskz_cvtss_sh(`0`, a, b);
25035	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25036	assert_eq_m128h(r, e);
25037	let r = _mm_maskz_cvtss_sh(`1`, a, b);
25038	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25039	assert_eq_m128h(r, e);
25040	}
25041
25042	#[simd_test(enable = "avx512fp16")]
25043	unsafe fn test_mm_cvt_roundss_sh() {
25044	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25045	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25046	let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
25047	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25048	assert_eq_m128h(r, e);
25049	}
25050
25051	#[simd_test(enable = "avx512fp16")]
25052	unsafe fn test_mm_mask_cvt_roundss_sh() {
25053	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25054	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25055	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25056	let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25057	src, `0`, a, b,
25058	);
25059	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25060	assert_eq_m128h(r, e);
25061	let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25062	src, `1`, a, b,
25063	);
25064	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25065	assert_eq_m128h(r, e);
25066	}
25067
25068	#[simd_test(enable = "avx512fp16")]
25069	unsafe fn test_mm_maskz_cvt_roundss_sh() {
25070	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25071	let b = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
25072	let r =
25073	_mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
25074	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25075	assert_eq_m128h(r, e);
25076	let r =
25077	_mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
25078	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25079	assert_eq_m128h(r, e);
25080	}
25081
25082	#[simd_test(enable = "avx512fp16,avx512vl")]
25083	unsafe fn test_mm_cvtpd_ph() {
25084	let a = _mm_set_pd(`1.0`, `2.0`);
25085	let r = _mm_cvtpd_ph(a);
25086	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
25087	assert_eq_m128h(r, e);
25088	}
25089
25090	#[simd_test(enable = "avx512fp16,avx512vl")]
25091	unsafe fn test_mm_mask_cvtpd_ph() {
25092	let a = _mm_set_pd(`1.0`, `2.0`);
25093	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25094	let r = _mm_mask_cvtpd_ph(src, `0b01`, a);
25095	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `16.`, `2.`);
25096	assert_eq_m128h(r, e);
25097	}
25098
25099	#[simd_test(enable = "avx512fp16,avx512vl")]
25100	unsafe fn test_mm_maskz_cvtpd_ph() {
25101	let a = _mm_set_pd(`1.0`, `2.0`);
25102	let r = _mm_maskz_cvtpd_ph(`0b01`, a);
25103	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`);
25104	assert_eq_m128h(r, e);
25105	}
25106
25107	#[simd_test(enable = "avx512fp16,avx512vl")]
25108	unsafe fn test_mm256_cvtpd_ph() {
25109	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25110	let r = _mm256_cvtpd_ph(a);
25111	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25112	assert_eq_m128h(r, e);
25113	}
25114
25115	#[simd_test(enable = "avx512fp16,avx512vl")]
25116	unsafe fn test_mm256_mask_cvtpd_ph() {
25117	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25118	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25119	let r = _mm256_mask_cvtpd_ph(src, `0b0101`, a);
25120	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `14.`, `2.0`, `16.0`, `4.0`);
25121	assert_eq_m128h(r, e);
25122	}
25123
25124	#[simd_test(enable = "avx512fp16,avx512vl")]
25125	unsafe fn test_mm256_maskz_cvtpd_ph() {
25126	let a = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
25127	let r = _mm256_maskz_cvtpd_ph(`0b0101`, a);
25128	let e = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `2.0`, `0.0`, `4.0`);
25129	assert_eq_m128h(r, e);
25130	}
25131
25132	#[simd_test(enable = "avx512fp16")]
25133	unsafe fn test_mm512_cvtpd_ph() {
25134	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25135	let r = _mm512_cvtpd_ph(a);
25136	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25137	assert_eq_m128h(r, e);
25138	}
25139
25140	#[simd_test(enable = "avx512fp16")]
25141	unsafe fn test_mm512_mask_cvtpd_ph() {
25142	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25143	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25144	let r = _mm512_mask_cvtpd_ph(src, `0b01010101`, a);
25145	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
25146	assert_eq_m128h(r, e);
25147	}
25148
25149	#[simd_test(enable = "avx512fp16")]
25150	unsafe fn test_mm512_maskz_cvtpd_ph() {
25151	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25152	let r = _mm512_maskz_cvtpd_ph(`0b01010101`, a);
25153	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
25154	assert_eq_m128h(r, e);
25155	}
25156
25157	#[simd_test(enable = "avx512fp16")]
25158	unsafe fn test_mm512_cvt_roundpd_ph() {
25159	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25160	let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
25161	let e = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25162	assert_eq_m128h(r, e);
25163	}
25164
25165	#[simd_test(enable = "avx512fp16")]
25166	unsafe fn test_mm512_mask_cvt_roundpd_ph() {
25167	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25168	let src = _mm_set_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25169	let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25170	src, `0b01010101`, a,
25171	);
25172	let e = _mm_set_ph(`10.`, `2.`, `12.`, `4.`, `14.`, `6.`, `16.`, `8.`);
25173	assert_eq_m128h(r, e);
25174	}
25175
25176	#[simd_test(enable = "avx512fp16")]
25177	unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
25178	let a = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25179	let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25180	`0b01010101`, a,
25181	);
25182	let e = _mm_set_ph(`0.0`, `2.`, `0.0`, `4.`, `0.0`, `6.`, `0.0`, `8.`);
25183	assert_eq_m128h(r, e);
25184	}
25185
25186	#[simd_test(enable = "avx512fp16")]
25187	unsafe fn test_mm_cvtsd_sh() {
25188	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25189	let b = _mm_setr_pd(`1.0`, `2.0`);
25190	let r = _mm_cvtsd_sh(a, b);
25191	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25192	assert_eq_m128h(r, e);
25193	}
25194
25195	#[simd_test(enable = "avx512fp16")]
25196	unsafe fn test_mm_mask_cvtsd_sh() {
25197	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25198	let b = _mm_setr_pd(`1.0`, `2.0`);
25199	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25200	let r = _mm_mask_cvtsd_sh(src, `0`, a, b);
25201	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25202	assert_eq_m128h(r, e);
25203	let r = _mm_mask_cvtsd_sh(src, `1`, a, b);
25204	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25205	assert_eq_m128h(r, e);
25206	}
25207
25208	#[simd_test(enable = "avx512fp16")]
25209	unsafe fn test_mm_maskz_cvtsd_sh() {
25210	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25211	let b = _mm_setr_pd(`1.0`, `2.0`);
25212	let r = _mm_maskz_cvtsd_sh(`0`, a, b);
25213	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25214	assert_eq_m128h(r, e);
25215	let r = _mm_maskz_cvtsd_sh(`1`, a, b);
25216	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25217	assert_eq_m128h(r, e);
25218	}
25219
25220	#[simd_test(enable = "avx512fp16")]
25221	unsafe fn test_mm_cvt_roundsd_sh() {
25222	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25223	let b = _mm_setr_pd(`1.0`, `2.0`);
25224	let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
25225	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25226	assert_eq_m128h(r, e);
25227	}
25228
25229	#[simd_test(enable = "avx512fp16")]
25230	unsafe fn test_mm_mask_cvt_roundsd_sh() {
25231	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25232	let b = _mm_setr_pd(`1.0`, `2.0`);
25233	let src = _mm_setr_ph(`20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`);
25234	let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25235	src, `0`, a, b,
25236	);
25237	let e = _mm_setr_ph(`20.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25238	assert_eq_m128h(r, e);
25239	let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25240	src, `1`, a, b,
25241	);
25242	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25243	assert_eq_m128h(r, e);
25244	}
25245
25246	#[simd_test(enable = "avx512fp16")]
25247	unsafe fn test_mm_maskz_cvt_roundsd_sh() {
25248	let a = _mm_setr_ph(`10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25249	let b = _mm_setr_pd(`1.0`, `2.0`);
25250	let r =
25251	_mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
25252	let e = _mm_setr_ph(`0.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25253	assert_eq_m128h(r, e);
25254	let r =
25255	_mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`1`, a, b);
25256	let e = _mm_setr_ph(`1.0`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`, `17.`);
25257	assert_eq_m128h(r, e);
25258	}
25259
25260	#[simd_test(enable = "avx512fp16,avx512vl")]
25261	unsafe fn test_mm_cvtph_epi16() {
25262	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25263	let r = _mm_cvttph_epi16(a);
25264	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25265	assert_eq_m128i(r, e);
25266	}
25267
25268	#[simd_test(enable = "avx512fp16,avx512vl")]
25269	unsafe fn test_mm_mask_cvtph_epi16() {
25270	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25271	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25272	let r = _mm_mask_cvttph_epi16(src, `0b01010101`, a);
25273	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25274	assert_eq_m128i(r, e);
25275	}
25276
25277	#[simd_test(enable = "avx512fp16,avx512vl")]
25278	unsafe fn test_mm_maskz_cvtph_epi16() {
25279	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25280	let r = _mm_maskz_cvttph_epi16(`0b01010101`, a);
25281	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25282	assert_eq_m128i(r, e);
25283	}
25284
25285	#[simd_test(enable = "avx512fp16,avx512vl")]
25286	unsafe fn test_mm256_cvtph_epi16() {
25287	let a = _mm256_set_ph(
25288	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25289	);
25290	let r = _mm256_cvttph_epi16(a);
25291	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25292	assert_eq_m256i(r, e);
25293	}
25294
25295	#[simd_test(enable = "avx512fp16,avx512vl")]
25296	unsafe fn test_mm256_mask_cvtph_epi16() {
25297	let a = _mm256_set_ph(
25298	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25299	);
25300	let src = _mm256_set_epi16(
25301	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25302	);
25303	let r = _mm256_mask_cvttph_epi16(src, `0b0101010101010101`, a);
25304	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25305	assert_eq_m256i(r, e);
25306	}
25307
25308	#[simd_test(enable = "avx512fp16,avx512vl")]
25309	unsafe fn test_mm256_maskz_cvtph_epi16() {
25310	let a = _mm256_set_ph(
25311	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25312	);
25313	let r = _mm256_maskz_cvttph_epi16(`0b0101010101010101`, a);
25314	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25315	assert_eq_m256i(r, e);
25316	}
25317
25318	#[simd_test(enable = "avx512fp16")]
25319	unsafe fn test_mm512_cvtph_epi16() {
25320	let a = _mm512_set_ph(
25321	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25322	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25323	`31.0`, `32.0`,
25324	);
25325	let r = _mm512_cvttph_epi16(a);
25326	let e = _mm512_set_epi16(
25327	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25328	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25329	);
25330	assert_eq_m512i(r, e);
25331	}
25332
25333	#[simd_test(enable = "avx512fp16")]
25334	unsafe fn test_mm512_mask_cvtph_epi16() {
25335	let a = _mm512_set_ph(
25336	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25337	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25338	`31.0`, `32.0`,
25339	);
25340	let src = _mm512_set_epi16(
25341	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25342	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25343	);
25344	let r = _mm512_mask_cvttph_epi16(src, `0b01010101010101010101010101010101`, a);
25345	let e = _mm512_set_epi16(
25346	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25347	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25348	);
25349	assert_eq_m512i(r, e);
25350	}
25351
25352	#[simd_test(enable = "avx512fp16")]
25353	unsafe fn test_mm512_maskz_cvtph_epi16() {
25354	let a = _mm512_set_ph(
25355	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25356	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25357	`31.0`, `32.0`,
25358	);
25359	let r = _mm512_maskz_cvttph_epi16(`0b01010101010101010101010101010101`, a);
25360	let e = _mm512_set_epi16(
25361	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25362	`0`, `28`, `0`, `30`, `0`, `32`,
25363	);
25364	assert_eq_m512i(r, e);
25365	}
25366
25367	#[simd_test(enable = "avx512fp16")]
25368	unsafe fn test_mm512_cvt_roundph_epi16() {
25369	let a = _mm512_set_ph(
25370	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25371	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25372	`31.0`, `32.0`,
25373	);
25374	let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25375	let e = _mm512_set_epi16(
25376	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25377	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25378	);
25379	assert_eq_m512i(r, e);
25380	}
25381
25382	#[simd_test(enable = "avx512fp16")]
25383	unsafe fn test_mm512_mask_cvt_roundph_epi16() {
25384	let a = _mm512_set_ph(
25385	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25386	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25387	`31.0`, `32.0`,
25388	);
25389	let src = _mm512_set_epi16(
25390	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25391	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25392	);
25393	let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25394	src,
25395	`0b01010101010101010101010101010101`,
25396	a,
25397	);
25398	let e = _mm512_set_epi16(
25399	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25400	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25401	);
25402	assert_eq_m512i(r, e);
25403	}
25404
25405	#[simd_test(enable = "avx512fp16")]
25406	unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
25407	let a = _mm512_set_ph(
25408	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25409	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25410	`31.0`, `32.0`,
25411	);
25412	let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25413	`0b01010101010101010101010101010101`,
25414	a,
25415	);
25416	let e = _mm512_set_epi16(
25417	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25418	`0`, `28`, `0`, `30`, `0`, `32`,
25419	);
25420	assert_eq_m512i(r, e);
25421	}
25422
25423	#[simd_test(enable = "avx512fp16,avx512vl")]
25424	unsafe fn test_mm_cvtph_epu16() {
25425	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25426	let r = _mm_cvttph_epu16(a);
25427	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25428	assert_eq_m128i(r, e);
25429	}
25430
25431	#[simd_test(enable = "avx512fp16,avx512vl")]
25432	unsafe fn test_mm_mask_cvtph_epu16() {
25433	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25434	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25435	let r = _mm_mask_cvttph_epu16(src, `0b01010101`, a);
25436	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25437	assert_eq_m128i(r, e);
25438	}
25439
25440	#[simd_test(enable = "avx512fp16,avx512vl")]
25441	unsafe fn test_mm_maskz_cvtph_epu16() {
25442	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25443	let r = _mm_maskz_cvttph_epu16(`0b01010101`, a);
25444	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25445	assert_eq_m128i(r, e);
25446	}
25447
25448	#[simd_test(enable = "avx512fp16,avx512vl")]
25449	unsafe fn test_mm256_cvtph_epu16() {
25450	let a = _mm256_set_ph(
25451	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25452	);
25453	let r = _mm256_cvttph_epu16(a);
25454	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25455	assert_eq_m256i(r, e);
25456	}
25457
25458	#[simd_test(enable = "avx512fp16,avx512vl")]
25459	unsafe fn test_mm256_mask_cvtph_epu16() {
25460	let a = _mm256_set_ph(
25461	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25462	);
25463	let src = _mm256_set_epi16(
25464	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25465	);
25466	let r = _mm256_mask_cvttph_epu16(src, `0b0101010101010101`, a);
25467	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25468	assert_eq_m256i(r, e);
25469	}
25470
25471	#[simd_test(enable = "avx512fp16,avx512vl")]
25472	unsafe fn test_mm256_maskz_cvtph_epu16() {
25473	let a = _mm256_set_ph(
25474	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25475	);
25476	let r = _mm256_maskz_cvttph_epu16(`0b0101010101010101`, a);
25477	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25478	assert_eq_m256i(r, e);
25479	}
25480
25481	#[simd_test(enable = "avx512fp16")]
25482	unsafe fn test_mm512_cvtph_epu16() {
25483	let a = _mm512_set_ph(
25484	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25485	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25486	`31.0`, `32.0`,
25487	);
25488	let r = _mm512_cvttph_epu16(a);
25489	let e = _mm512_set_epi16(
25490	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25491	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25492	);
25493	assert_eq_m512i(r, e);
25494	}
25495
25496	#[simd_test(enable = "avx512fp16")]
25497	unsafe fn test_mm512_mask_cvtph_epu16() {
25498	let a = _mm512_set_ph(
25499	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25500	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25501	`31.0`, `32.0`,
25502	);
25503	let src = _mm512_set_epi16(
25504	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25505	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25506	);
25507	let r = _mm512_mask_cvttph_epu16(src, `0b01010101010101010101010101010101`, a);
25508	let e = _mm512_set_epi16(
25509	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25510	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25511	);
25512	assert_eq_m512i(r, e);
25513	}
25514
25515	#[simd_test(enable = "avx512fp16")]
25516	unsafe fn test_mm512_maskz_cvtph_epu16() {
25517	let a = _mm512_set_ph(
25518	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25519	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25520	`31.0`, `32.0`,
25521	);
25522	let r = _mm512_maskz_cvttph_epu16(`0b01010101010101010101010101010101`, a);
25523	let e = _mm512_set_epi16(
25524	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25525	`0`, `28`, `0`, `30`, `0`, `32`,
25526	);
25527	assert_eq_m512i(r, e);
25528	}
25529
25530	#[simd_test(enable = "avx512fp16")]
25531	unsafe fn test_mm512_cvt_roundph_epu16() {
25532	let a = _mm512_set_ph(
25533	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25534	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25535	`31.0`, `32.0`,
25536	);
25537	let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
25538	let e = _mm512_set_epi16(
25539	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25540	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25541	);
25542	assert_eq_m512i(r, e);
25543	}
25544
25545	#[simd_test(enable = "avx512fp16")]
25546	unsafe fn test_mm512_mask_cvt_roundph_epu16() {
25547	let a = _mm512_set_ph(
25548	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25549	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25550	`31.0`, `32.0`,
25551	);
25552	let src = _mm512_set_epi16(
25553	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25554	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25555	);
25556	let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25557	src,
25558	`0b01010101010101010101010101010101`,
25559	a,
25560	);
25561	let e = _mm512_set_epi16(
25562	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25563	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25564	);
25565	assert_eq_m512i(r, e);
25566	}
25567
25568	#[simd_test(enable = "avx512fp16")]
25569	unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
25570	let a = _mm512_set_ph(
25571	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25572	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25573	`31.0`, `32.0`,
25574	);
25575	let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
25576	`0b01010101010101010101010101010101`,
25577	a,
25578	);
25579	let e = _mm512_set_epi16(
25580	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25581	`0`, `28`, `0`, `30`, `0`, `32`,
25582	);
25583	assert_eq_m512i(r, e);
25584	}
25585
25586	#[simd_test(enable = "avx512fp16,avx512vl")]
25587	unsafe fn test_mm_cvttph_epi16() {
25588	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25589	let r = _mm_cvttph_epi16(a);
25590	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25591	assert_eq_m128i(r, e);
25592	}
25593
25594	#[simd_test(enable = "avx512fp16,avx512vl")]
25595	unsafe fn test_mm_mask_cvttph_epi16() {
25596	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25597	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25598	let r = _mm_mask_cvttph_epi16(src, `0b01010101`, a);
25599	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25600	assert_eq_m128i(r, e);
25601	}
25602
25603	#[simd_test(enable = "avx512fp16,avx512vl")]
25604	unsafe fn test_mm_maskz_cvttph_epi16() {
25605	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25606	let r = _mm_maskz_cvttph_epi16(`0b01010101`, a);
25607	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25608	assert_eq_m128i(r, e);
25609	}
25610
25611	#[simd_test(enable = "avx512fp16,avx512vl")]
25612	unsafe fn test_mm256_cvttph_epi16() {
25613	let a = _mm256_set_ph(
25614	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25615	);
25616	let r = _mm256_cvttph_epi16(a);
25617	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25618	assert_eq_m256i(r, e);
25619	}
25620
25621	#[simd_test(enable = "avx512fp16,avx512vl")]
25622	unsafe fn test_mm256_mask_cvttph_epi16() {
25623	let a = _mm256_set_ph(
25624	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25625	);
25626	let src = _mm256_set_epi16(
25627	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25628	);
25629	let r = _mm256_mask_cvttph_epi16(src, `0b0101010101010101`, a);
25630	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25631	assert_eq_m256i(r, e);
25632	}
25633
25634	#[simd_test(enable = "avx512fp16,avx512vl")]
25635	unsafe fn test_mm256_maskz_cvttph_epi16() {
25636	let a = _mm256_set_ph(
25637	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25638	);
25639	let r = _mm256_maskz_cvttph_epi16(`0b0101010101010101`, a);
25640	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25641	assert_eq_m256i(r, e);
25642	}
25643
25644	#[simd_test(enable = "avx512fp16")]
25645	unsafe fn test_mm512_cvttph_epi16() {
25646	let a = _mm512_set_ph(
25647	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25648	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25649	`31.0`, `32.0`,
25650	);
25651	let r = _mm512_cvttph_epi16(a);
25652	let e = _mm512_set_epi16(
25653	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25654	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25655	);
25656	assert_eq_m512i(r, e);
25657	}
25658
25659	#[simd_test(enable = "avx512fp16")]
25660	unsafe fn test_mm512_mask_cvttph_epi16() {
25661	let a = _mm512_set_ph(
25662	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25663	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25664	`31.0`, `32.0`,
25665	);
25666	let src = _mm512_set_epi16(
25667	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25668	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25669	);
25670	let r = _mm512_mask_cvttph_epi16(src, `0b01010101010101010101010101010101`, a);
25671	let e = _mm512_set_epi16(
25672	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25673	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25674	);
25675	assert_eq_m512i(r, e);
25676	}
25677
25678	#[simd_test(enable = "avx512fp16")]
25679	unsafe fn test_mm512_maskz_cvttph_epi16() {
25680	let a = _mm512_set_ph(
25681	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25682	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25683	`31.0`, `32.0`,
25684	);
25685	let r = _mm512_maskz_cvttph_epi16(`0b01010101010101010101010101010101`, a);
25686	let e = _mm512_set_epi16(
25687	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25688	`0`, `28`, `0`, `30`, `0`, `32`,
25689	);
25690	assert_eq_m512i(r, e);
25691	}
25692
25693	#[simd_test(enable = "avx512fp16")]
25694	unsafe fn test_mm512_cvtt_roundph_epi16() {
25695	let a = _mm512_set_ph(
25696	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25697	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25698	`31.0`, `32.0`,
25699	);
25700	let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25701	let e = _mm512_set_epi16(
25702	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25703	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25704	);
25705	assert_eq_m512i(r, e);
25706	}
25707
25708	#[simd_test(enable = "avx512fp16")]
25709	unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
25710	let a = _mm512_set_ph(
25711	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25712	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25713	`31.0`, `32.0`,
25714	);
25715	let src = _mm512_set_epi16(
25716	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25717	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25718	);
25719	let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25720	src,
25721	`0b01010101010101010101010101010101`,
25722	a,
25723	);
25724	let e = _mm512_set_epi16(
25725	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25726	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25727	);
25728	assert_eq_m512i(r, e);
25729	}
25730
25731	#[simd_test(enable = "avx512fp16")]
25732	unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
25733	let a = _mm512_set_ph(
25734	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25735	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25736	`31.0`, `32.0`,
25737	);
25738	let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25739	`0b01010101010101010101010101010101`,
25740	a,
25741	);
25742	let e = _mm512_set_epi16(
25743	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25744	`0`, `28`, `0`, `30`, `0`, `32`,
25745	);
25746	assert_eq_m512i(r, e);
25747	}
25748
25749	#[simd_test(enable = "avx512fp16,avx512vl")]
25750	unsafe fn test_mm_cvttph_epu16() {
25751	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25752	let r = _mm_cvttph_epu16(a);
25753	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25754	assert_eq_m128i(r, e);
25755	}
25756
25757	#[simd_test(enable = "avx512fp16,avx512vl")]
25758	unsafe fn test_mm_mask_cvttph_epu16() {
25759	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25760	let src = _mm_set_epi16(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25761	let r = _mm_mask_cvttph_epu16(src, `0b01010101`, a);
25762	let e = _mm_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25763	assert_eq_m128i(r, e);
25764	}
25765
25766	#[simd_test(enable = "avx512fp16,avx512vl")]
25767	unsafe fn test_mm_maskz_cvttph_epu16() {
25768	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25769	let r = _mm_maskz_cvttph_epu16(`0b01010101`, a);
25770	let e = _mm_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25771	assert_eq_m128i(r, e);
25772	}
25773
25774	#[simd_test(enable = "avx512fp16,avx512vl")]
25775	unsafe fn test_mm256_cvttph_epu16() {
25776	let a = _mm256_set_ph(
25777	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25778	);
25779	let r = _mm256_cvttph_epu16(a);
25780	let e = _mm256_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25781	assert_eq_m256i(r, e);
25782	}
25783
25784	#[simd_test(enable = "avx512fp16,avx512vl")]
25785	unsafe fn test_mm256_mask_cvttph_epu16() {
25786	let a = _mm256_set_ph(
25787	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25788	);
25789	let src = _mm256_set_epi16(
25790	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25791	);
25792	let r = _mm256_mask_cvttph_epu16(src, `0b0101010101010101`, a);
25793	let e = _mm256_set_epi16(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25794	assert_eq_m256i(r, e);
25795	}
25796
25797	#[simd_test(enable = "avx512fp16,avx512vl")]
25798	unsafe fn test_mm256_maskz_cvttph_epu16() {
25799	let a = _mm256_set_ph(
25800	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25801	);
25802	let r = _mm256_maskz_cvttph_epu16(`0b0101010101010101`, a);
25803	let e = _mm256_set_epi16(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25804	assert_eq_m256i(r, e);
25805	}
25806
25807	#[simd_test(enable = "avx512fp16")]
25808	unsafe fn test_mm512_cvttph_epu16() {
25809	let a = _mm512_set_ph(
25810	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25811	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25812	`31.0`, `32.0`,
25813	);
25814	let r = _mm512_cvttph_epu16(a);
25815	let e = _mm512_set_epi16(
25816	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25817	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25818	);
25819	assert_eq_m512i(r, e);
25820	}
25821
25822	#[simd_test(enable = "avx512fp16")]
25823	unsafe fn test_mm512_mask_cvttph_epu16() {
25824	let a = _mm512_set_ph(
25825	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25826	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25827	`31.0`, `32.0`,
25828	);
25829	let src = _mm512_set_epi16(
25830	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25831	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25832	);
25833	let r = _mm512_mask_cvttph_epu16(src, `0b01010101010101010101010101010101`, a);
25834	let e = _mm512_set_epi16(
25835	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25836	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25837	);
25838	assert_eq_m512i(r, e);
25839	}
25840
25841	#[simd_test(enable = "avx512fp16")]
25842	unsafe fn test_mm512_maskz_cvttph_epu16() {
25843	let a = _mm512_set_ph(
25844	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25845	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25846	`31.0`, `32.0`,
25847	);
25848	let r = _mm512_maskz_cvttph_epu16(`0b01010101010101010101010101010101`, a);
25849	let e = _mm512_set_epi16(
25850	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25851	`0`, `28`, `0`, `30`, `0`, `32`,
25852	);
25853	assert_eq_m512i(r, e);
25854	}
25855
25856	#[simd_test(enable = "avx512fp16")]
25857	unsafe fn test_mm512_cvtt_roundph_epu16() {
25858	let a = _mm512_set_ph(
25859	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25860	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25861	`31.0`, `32.0`,
25862	);
25863	let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
25864	let e = _mm512_set_epi16(
25865	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`,
25866	`25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
25867	);
25868	assert_eq_m512i(r, e);
25869	}
25870
25871	#[simd_test(enable = "avx512fp16")]
25872	unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
25873	let a = _mm512_set_ph(
25874	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25875	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25876	`31.0`, `32.0`,
25877	);
25878	let src = _mm512_set_epi16(
25879	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
25880	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`,
25881	);
25882	let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25883	src,
25884	`0b01010101010101010101010101010101`,
25885	a,
25886	);
25887	let e = _mm512_set_epi16(
25888	`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`, `26`, `18`, `28`, `20`, `30`, `22`, `32`,
25889	`24`, `34`, `26`, `36`, `28`, `38`, `30`, `40`, `32`,
25890	);
25891	assert_eq_m512i(r, e);
25892	}
25893
25894	#[simd_test(enable = "avx512fp16")]
25895	unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
25896	let a = _mm512_set_ph(
25897	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25898	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
25899	`31.0`, `32.0`,
25900	);
25901	let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25902	`0b01010101010101010101010101010101`,
25903	a,
25904	);
25905	let e = _mm512_set_epi16(
25906	`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`, `0`, `18`, `0`, `20`, `0`, `22`, `0`, `24`, `0`, `26`,
25907	`0`, `28`, `0`, `30`, `0`, `32`,
25908	);
25909	assert_eq_m512i(r, e);
25910	}
25911
25912	#[simd_test(enable = "avx512fp16,avx512vl")]
25913	unsafe fn test_mm_cvtph_epi32() {
25914	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25915	let r = _mm_cvtph_epi32(a);
25916	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
25917	assert_eq_m128i(r, e);
25918	}
25919
25920	#[simd_test(enable = "avx512fp16,avx512vl")]
25921	unsafe fn test_mm_mask_cvtph_epi32() {
25922	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25923	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
25924	let r = _mm_mask_cvtph_epi32(src, `0b0101`, a);
25925	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
25926	assert_eq_m128i(r, e);
25927	}
25928
25929	#[simd_test(enable = "avx512fp16,avx512vl")]
25930	unsafe fn test_mm_maskz_cvtph_epi32() {
25931	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
25932	let r = _mm_maskz_cvtph_epi32(`0b0101`, a);
25933	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
25934	assert_eq_m128i(r, e);
25935	}
25936
25937	#[simd_test(enable = "avx512fp16,avx512vl")]
25938	unsafe fn test_mm256_cvtph_epi32() {
25939	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25940	let r = _mm256_cvtph_epi32(a);
25941	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
25942	assert_eq_m256i(r, e);
25943	}
25944
25945	#[simd_test(enable = "avx512fp16,avx512vl")]
25946	unsafe fn test_mm256_mask_cvtph_epi32() {
25947	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25948	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
25949	let r = _mm256_mask_cvtph_epi32(src, `0b01010101`, a);
25950	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
25951	assert_eq_m256i(r, e);
25952	}
25953
25954	#[simd_test(enable = "avx512fp16,avx512vl")]
25955	unsafe fn test_mm256_maskz_cvtph_epi32() {
25956	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
25957	let r = _mm256_maskz_cvtph_epi32(`0b01010101`, a);
25958	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
25959	assert_eq_m256i(r, e);
25960	}
25961
25962	#[simd_test(enable = "avx512fp16")]
25963	unsafe fn test_mm512_cvtph_epi32() {
25964	let a = _mm256_set_ph(
25965	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25966	);
25967	let r = _mm512_cvtph_epi32(a);
25968	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
25969	assert_eq_m512i(r, e);
25970	}
25971
25972	#[simd_test(enable = "avx512fp16")]
25973	unsafe fn test_mm512_mask_cvtph_epi32() {
25974	let a = _mm256_set_ph(
25975	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25976	);
25977	let src = _mm512_set_epi32(
25978	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
25979	);
25980	let r = _mm512_mask_cvtph_epi32(src, `0b0101010101010101`, a);
25981	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
25982	assert_eq_m512i(r, e);
25983	}
25984
25985	#[simd_test(enable = "avx512fp16")]
25986	unsafe fn test_mm512_maskz_cvtph_epi32() {
25987	let a = _mm256_set_ph(
25988	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25989	);
25990	let r = _mm512_maskz_cvtph_epi32(`0b0101010101010101`, a);
25991	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
25992	assert_eq_m512i(r, e);
25993	}
25994
25995	#[simd_test(enable = "avx512fp16")]
25996	unsafe fn test_mm512_cvt_roundph_epi32() {
25997	let a = _mm256_set_ph(
25998	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
25999	);
26000	let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26001	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26002	assert_eq_m512i(r, e);
26003	}
26004
26005	#[simd_test(enable = "avx512fp16")]
26006	unsafe fn test_mm512_mask_cvt_roundph_epi32() {
26007	let a = _mm256_set_ph(
26008	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26009	);
26010	let src = _mm512_set_epi32(
26011	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26012	);
26013	let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26014	src,
26015	`0b0101010101010101`,
26016	a,
26017	);
26018	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26019	assert_eq_m512i(r, e);
26020	}
26021
26022	#[simd_test(enable = "avx512fp16")]
26023	unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
26024	let a = _mm256_set_ph(
26025	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26026	);
26027	let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26028	`0b0101010101010101`,
26029	a,
26030	);
26031	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26032	assert_eq_m512i(r, e);
26033	}
26034
26035	#[simd_test(enable = "avx512fp16")]
26036	unsafe fn test_mm_cvtsh_i32() {
26037	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26038	let r = _mm_cvtsh_i32(a);
26039	assert_eq!(r, `1`);
26040	}
26041
26042	#[simd_test(enable = "avx512fp16")]
26043	unsafe fn test_mm_cvt_roundsh_i32() {
26044	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26045	let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26046	assert_eq!(r, `1`);
26047	}
26048
26049	#[simd_test(enable = "avx512fp16,avx512vl")]
26050	unsafe fn test_mm_cvtph_epu32() {
26051	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26052	let r = _mm_cvtph_epu32(a);
26053	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26054	assert_eq_m128i(r, e);
26055	}
26056
26057	#[simd_test(enable = "avx512fp16,avx512vl")]
26058	unsafe fn test_mm_mask_cvtph_epu32() {
26059	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26060	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26061	let r = _mm_mask_cvtph_epu32(src, `0b0101`, a);
26062	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26063	assert_eq_m128i(r, e);
26064	}
26065
26066	#[simd_test(enable = "avx512fp16,avx512vl")]
26067	unsafe fn test_mm_maskz_cvtph_epu32() {
26068	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26069	let r = _mm_maskz_cvtph_epu32(`0b0101`, a);
26070	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26071	assert_eq_m128i(r, e);
26072	}
26073
26074	#[simd_test(enable = "avx512fp16,avx512vl")]
26075	unsafe fn test_mm256_cvtph_epu32() {
26076	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26077	let r = _mm256_cvtph_epu32(a);
26078	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26079	assert_eq_m256i(r, e);
26080	}
26081
26082	#[simd_test(enable = "avx512fp16,avx512vl")]
26083	unsafe fn test_mm256_mask_cvtph_epu32() {
26084	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26085	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26086	let r = _mm256_mask_cvtph_epu32(src, `0b01010101`, a);
26087	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26088	assert_eq_m256i(r, e);
26089	}
26090
26091	#[simd_test(enable = "avx512fp16,avx512vl")]
26092	unsafe fn test_mm256_maskz_cvtph_epu32() {
26093	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26094	let r = _mm256_maskz_cvtph_epu32(`0b01010101`, a);
26095	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26096	assert_eq_m256i(r, e);
26097	}
26098
26099	#[simd_test(enable = "avx512fp16")]
26100	unsafe fn test_mm512_cvtph_epu32() {
26101	let a = _mm256_set_ph(
26102	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26103	);
26104	let r = _mm512_cvtph_epu32(a);
26105	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26106	assert_eq_m512i(r, e);
26107	}
26108
26109	#[simd_test(enable = "avx512fp16")]
26110	unsafe fn test_mm512_mask_cvtph_epu32() {
26111	let a = _mm256_set_ph(
26112	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26113	);
26114	let src = _mm512_set_epi32(
26115	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26116	);
26117	let r = _mm512_mask_cvtph_epu32(src, `0b0101010101010101`, a);
26118	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26119	assert_eq_m512i(r, e);
26120	}
26121
26122	#[simd_test(enable = "avx512fp16")]
26123	unsafe fn test_mm512_maskz_cvtph_epu32() {
26124	let a = _mm256_set_ph(
26125	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26126	);
26127	let r = _mm512_maskz_cvtph_epu32(`0b0101010101010101`, a);
26128	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26129	assert_eq_m512i(r, e);
26130	}
26131
26132	#[simd_test(enable = "avx512fp16")]
26133	unsafe fn test_mm512_cvt_roundph_epu32() {
26134	let a = _mm256_set_ph(
26135	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26136	);
26137	let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26138	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26139	assert_eq_m512i(r, e);
26140	}
26141
26142	#[simd_test(enable = "avx512fp16")]
26143	unsafe fn test_mm512_mask_cvt_roundph_epu32() {
26144	let a = _mm256_set_ph(
26145	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26146	);
26147	let src = _mm512_set_epi32(
26148	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26149	);
26150	let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26151	src,
26152	`0b0101010101010101`,
26153	a,
26154	);
26155	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26156	assert_eq_m512i(r, e);
26157	}
26158
26159	#[simd_test(enable = "avx512fp16")]
26160	unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
26161	let a = _mm256_set_ph(
26162	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26163	);
26164	let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26165	`0b0101010101010101`,
26166	a,
26167	);
26168	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26169	assert_eq_m512i(r, e);
26170	}
26171
26172	#[simd_test(enable = "avx512fp16")]
26173	unsafe fn test_mm_cvtsh_u32() {
26174	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26175	let r = _mm_cvtsh_u32(a);
26176	assert_eq!(r, `1`);
26177	}
26178
26179	#[simd_test(enable = "avx512fp16")]
26180	unsafe fn test_mm_cvt_roundsh_u32() {
26181	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26182	let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26183	assert_eq!(r, `1`);
26184	}
26185
26186	#[simd_test(enable = "avx512fp16,avx512vl")]
26187	unsafe fn test_mm_cvttph_epi32() {
26188	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26189	let r = _mm_cvttph_epi32(a);
26190	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26191	assert_eq_m128i(r, e);
26192	}
26193
26194	#[simd_test(enable = "avx512fp16,avx512vl")]
26195	unsafe fn test_mm_mask_cvttph_epi32() {
26196	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26197	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26198	let r = _mm_mask_cvttph_epi32(src, `0b0101`, a);
26199	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26200	assert_eq_m128i(r, e);
26201	}
26202
26203	#[simd_test(enable = "avx512fp16,avx512vl")]
26204	unsafe fn test_mm_maskz_cvttph_epi32() {
26205	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26206	let r = _mm_maskz_cvttph_epi32(`0b0101`, a);
26207	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26208	assert_eq_m128i(r, e);
26209	}
26210
26211	#[simd_test(enable = "avx512fp16,avx512vl")]
26212	unsafe fn test_mm256_cvttph_epi32() {
26213	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26214	let r = _mm256_cvttph_epi32(a);
26215	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26216	assert_eq_m256i(r, e);
26217	}
26218
26219	#[simd_test(enable = "avx512fp16,avx512vl")]
26220	unsafe fn test_mm256_mask_cvttph_epi32() {
26221	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26222	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26223	let r = _mm256_mask_cvttph_epi32(src, `0b01010101`, a);
26224	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26225	assert_eq_m256i(r, e);
26226	}
26227
26228	#[simd_test(enable = "avx512fp16,avx512vl")]
26229	unsafe fn test_mm256_maskz_cvttph_epi32() {
26230	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26231	let r = _mm256_maskz_cvttph_epi32(`0b01010101`, a);
26232	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26233	assert_eq_m256i(r, e);
26234	}
26235
26236	#[simd_test(enable = "avx512fp16")]
26237	unsafe fn test_mm512_cvttph_epi32() {
26238	let a = _mm256_set_ph(
26239	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26240	);
26241	let r = _mm512_cvttph_epi32(a);
26242	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26243	assert_eq_m512i(r, e);
26244	}
26245
26246	#[simd_test(enable = "avx512fp16")]
26247	unsafe fn test_mm512_mask_cvttph_epi32() {
26248	let a = _mm256_set_ph(
26249	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26250	);
26251	let src = _mm512_set_epi32(
26252	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26253	);
26254	let r = _mm512_mask_cvttph_epi32(src, `0b0101010101010101`, a);
26255	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26256	assert_eq_m512i(r, e);
26257	}
26258
26259	#[simd_test(enable = "avx512fp16")]
26260	unsafe fn test_mm512_maskz_cvttph_epi32() {
26261	let a = _mm256_set_ph(
26262	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26263	);
26264	let r = _mm512_maskz_cvttph_epi32(`0b0101010101010101`, a);
26265	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26266	assert_eq_m512i(r, e);
26267	}
26268
26269	#[simd_test(enable = "avx512fp16")]
26270	unsafe fn test_mm512_cvtt_roundph_epi32() {
26271	let a = _mm256_set_ph(
26272	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26273	);
26274	let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26275	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26276	assert_eq_m512i(r, e);
26277	}
26278
26279	#[simd_test(enable = "avx512fp16")]
26280	unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
26281	let a = _mm256_set_ph(
26282	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26283	);
26284	let src = _mm512_set_epi32(
26285	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26286	);
26287	let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26288	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26289	assert_eq_m512i(r, e);
26290	}
26291
26292	#[simd_test(enable = "avx512fp16")]
26293	unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
26294	let a = _mm256_set_ph(
26295	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26296	);
26297	let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26298	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26299	assert_eq_m512i(r, e);
26300	}
26301
26302	#[simd_test(enable = "avx512fp16")]
26303	unsafe fn test_mm_cvttsh_i32() {
26304	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26305	let r = _mm_cvttsh_i32(a);
26306	assert_eq!(r, `1`);
26307	}
26308
26309	#[simd_test(enable = "avx512fp16")]
26310	unsafe fn test_mm_cvtt_roundsh_i32() {
26311	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26312	let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26313	assert_eq!(r, `1`);
26314	}
26315
26316	#[simd_test(enable = "avx512fp16,avx512vl")]
26317	unsafe fn test_mm_cvttph_epu32() {
26318	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26319	let r = _mm_cvttph_epu32(a);
26320	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
26321	assert_eq_m128i(r, e);
26322	}
26323
26324	#[simd_test(enable = "avx512fp16,avx512vl")]
26325	unsafe fn test_mm_mask_cvttph_epu32() {
26326	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26327	let src = _mm_set_epi32(`10`, `11`, `12`, `13`);
26328	let r = _mm_mask_cvttph_epu32(src, `0b0101`, a);
26329	let e = _mm_set_epi32(`10`, `2`, `12`, `4`);
26330	assert_eq_m128i(r, e);
26331	}
26332
26333	#[simd_test(enable = "avx512fp16,avx512vl")]
26334	unsafe fn test_mm_maskz_cvttph_epu32() {
26335	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26336	let r = _mm_maskz_cvttph_epu32(`0b0101`, a);
26337	let e = _mm_set_epi32(`0`, `2`, `0`, `4`);
26338	assert_eq_m128i(r, e);
26339	}
26340
26341	#[simd_test(enable = "avx512fp16,avx512vl")]
26342	unsafe fn test_mm256_cvttph_epu32() {
26343	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26344	let r = _mm256_cvttph_epu32(a);
26345	let e = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26346	assert_eq_m256i(r, e);
26347	}
26348
26349	#[simd_test(enable = "avx512fp16,avx512vl")]
26350	unsafe fn test_mm256_mask_cvttph_epu32() {
26351	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26352	let src = _mm256_set_epi32(`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`);
26353	let r = _mm256_mask_cvttph_epu32(src, `0b01010101`, a);
26354	let e = _mm256_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`);
26355	assert_eq_m256i(r, e);
26356	}
26357
26358	#[simd_test(enable = "avx512fp16,avx512vl")]
26359	unsafe fn test_mm256_maskz_cvttph_epu32() {
26360	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26361	let r = _mm256_maskz_cvttph_epu32(`0b01010101`, a);
26362	let e = _mm256_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26363	assert_eq_m256i(r, e);
26364	}
26365
26366	#[simd_test(enable = "avx512fp16")]
26367	unsafe fn test_mm512_cvttph_epu32() {
26368	let a = _mm256_set_ph(
26369	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26370	);
26371	let r = _mm512_cvttph_epu32(a);
26372	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26373	assert_eq_m512i(r, e);
26374	}
26375
26376	#[simd_test(enable = "avx512fp16")]
26377	unsafe fn test_mm512_mask_cvttph_epu32() {
26378	let a = _mm256_set_ph(
26379	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26380	);
26381	let src = _mm512_set_epi32(
26382	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26383	);
26384	let r = _mm512_mask_cvttph_epu32(src, `0b0101010101010101`, a);
26385	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26386	assert_eq_m512i(r, e);
26387	}
26388
26389	#[simd_test(enable = "avx512fp16")]
26390	unsafe fn test_mm512_maskz_cvttph_epu32() {
26391	let a = _mm256_set_ph(
26392	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26393	);
26394	let r = _mm512_maskz_cvttph_epu32(`0b0101010101010101`, a);
26395	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26396	assert_eq_m512i(r, e);
26397	}
26398
26399	#[simd_test(enable = "avx512fp16")]
26400	unsafe fn test_mm512_cvtt_roundph_epu32() {
26401	let a = _mm256_set_ph(
26402	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26403	);
26404	let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26405	let e = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26406	assert_eq_m512i(r, e);
26407	}
26408
26409	#[simd_test(enable = "avx512fp16")]
26410	unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
26411	let a = _mm256_set_ph(
26412	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26413	);
26414	let src = _mm512_set_epi32(
26415	`10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`,
26416	);
26417	let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26418	let e = _mm512_set_epi32(`10`, `2`, `12`, `4`, `14`, `6`, `16`, `8`, `18`, `10`, `20`, `12`, `22`, `14`, `24`, `16`);
26419	assert_eq_m512i(r, e);
26420	}
26421
26422	#[simd_test(enable = "avx512fp16")]
26423	unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
26424	let a = _mm256_set_ph(
26425	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26426	);
26427	let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26428	let e = _mm512_set_epi32(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`, `0`, `10`, `0`, `12`, `0`, `14`, `0`, `16`);
26429	assert_eq_m512i(r, e);
26430	}
26431
26432	#[simd_test(enable = "avx512fp16")]
26433	unsafe fn test_mm_cvttsh_u32() {
26434	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26435	let r = _mm_cvttsh_u32(a);
26436	assert_eq!(r, `1`);
26437	}
26438
26439	#[simd_test(enable = "avx512fp16")]
26440	unsafe fn test_mm_cvtt_roundsh_u32() {
26441	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26442	let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26443	assert_eq!(r, `1`);
26444	}
26445
26446	#[simd_test(enable = "avx512fp16,avx512vl")]
26447	unsafe fn test_mm_cvtph_epi64() {
26448	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26449	let r = _mm_cvtph_epi64(a);
26450	let e = _mm_set_epi64x(`1`, `2`);
26451	assert_eq_m128i(r, e);
26452	}
26453
26454	#[simd_test(enable = "avx512fp16,avx512vl")]
26455	unsafe fn test_mm_mask_cvtph_epi64() {
26456	let src = _mm_set_epi64x(`3`, `4`);
26457	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26458	let r = _mm_mask_cvtph_epi64(src, `0b01`, a);
26459	let e = _mm_set_epi64x(`3`, `2`);
26460	assert_eq_m128i(r, e);
26461	}
26462
26463	#[simd_test(enable = "avx512fp16,avx512vl")]
26464	unsafe fn test_mm_maskz_cvtph_epi64() {
26465	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26466	let r = _mm_maskz_cvtph_epi64(`0b01`, a);
26467	let e = _mm_set_epi64x(`0`, `2`);
26468	assert_eq_m128i(r, e);
26469	}
26470
26471	#[simd_test(enable = "avx512fp16,avx512vl")]
26472	unsafe fn test_mm256_cvtph_epi64() {
26473	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26474	let r = _mm256_cvtph_epi64(a);
26475	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26476	assert_eq_m256i(r, e);
26477	}
26478
26479	#[simd_test(enable = "avx512fp16,avx512vl")]
26480	unsafe fn test_mm256_mask_cvtph_epi64() {
26481	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26482	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26483	let r = _mm256_mask_cvtph_epi64(src, `0b0101`, a);
26484	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26485	assert_eq_m256i(r, e);
26486	}
26487
26488	#[simd_test(enable = "avx512fp16,avx512vl")]
26489	unsafe fn test_mm256_maskz_cvtph_epi64() {
26490	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26491	let r = _mm256_maskz_cvtph_epi64(`0b0101`, a);
26492	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26493	assert_eq_m256i(r, e);
26494	}
26495
26496	#[simd_test(enable = "avx512fp16")]
26497	unsafe fn test_mm512_cvtph_epi64() {
26498	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26499	let r = _mm512_cvtph_epi64(a);
26500	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26501	assert_eq_m512i(r, e);
26502	}
26503
26504	#[simd_test(enable = "avx512fp16")]
26505	unsafe fn test_mm512_mask_cvtph_epi64() {
26506	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26507	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26508	let r = _mm512_mask_cvtph_epi64(src, `0b01010101`, a);
26509	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26510	assert_eq_m512i(r, e);
26511	}
26512
26513	#[simd_test(enable = "avx512fp16")]
26514	unsafe fn test_mm512_maskz_cvtph_epi64() {
26515	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26516	let r = _mm512_maskz_cvtph_epi64(`0b01010101`, a);
26517	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26518	assert_eq_m512i(r, e);
26519	}
26520
26521	#[simd_test(enable = "avx512fp16")]
26522	unsafe fn test_mm512_cvt_roundph_epi64() {
26523	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26524	let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26525	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26526	assert_eq_m512i(r, e);
26527	}
26528
26529	#[simd_test(enable = "avx512fp16")]
26530	unsafe fn test_mm512_mask_cvt_roundph_epi64() {
26531	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26532	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26533	let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26534	src, `0b01010101`, a,
26535	);
26536	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26537	assert_eq_m512i(r, e);
26538	}
26539
26540	#[simd_test(enable = "avx512fp16")]
26541	unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
26542	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26543	let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26544	`0b01010101`, a,
26545	);
26546	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26547	assert_eq_m512i(r, e);
26548	}
26549
26550	#[simd_test(enable = "avx512fp16,avx512vl")]
26551	unsafe fn test_mm_cvtph_epu64() {
26552	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26553	let r = _mm_cvtph_epu64(a);
26554	let e = _mm_set_epi64x(`1`, `2`);
26555	assert_eq_m128i(r, e);
26556	}
26557
26558	#[simd_test(enable = "avx512fp16,avx512vl")]
26559	unsafe fn test_mm_mask_cvtph_epu64() {
26560	let src = _mm_set_epi64x(`3`, `4`);
26561	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26562	let r = _mm_mask_cvtph_epu64(src, `0b01`, a);
26563	let e = _mm_set_epi64x(`3`, `2`);
26564	assert_eq_m128i(r, e);
26565	}
26566
26567	#[simd_test(enable = "avx512fp16,avx512vl")]
26568	unsafe fn test_mm_maskz_cvtph_epu64() {
26569	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26570	let r = _mm_maskz_cvtph_epu64(`0b01`, a);
26571	let e = _mm_set_epi64x(`0`, `2`);
26572	assert_eq_m128i(r, e);
26573	}
26574
26575	#[simd_test(enable = "avx512fp16,avx512vl")]
26576	unsafe fn test_mm256_cvtph_epu64() {
26577	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26578	let r = _mm256_cvtph_epu64(a);
26579	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26580	assert_eq_m256i(r, e);
26581	}
26582
26583	#[simd_test(enable = "avx512fp16,avx512vl")]
26584	unsafe fn test_mm256_mask_cvtph_epu64() {
26585	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26586	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26587	let r = _mm256_mask_cvtph_epu64(src, `0b0101`, a);
26588	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26589	assert_eq_m256i(r, e);
26590	}
26591
26592	#[simd_test(enable = "avx512fp16,avx512vl")]
26593	unsafe fn test_mm256_maskz_cvtph_epu64() {
26594	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26595	let r = _mm256_maskz_cvtph_epu64(`0b0101`, a);
26596	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26597	assert_eq_m256i(r, e);
26598	}
26599
26600	#[simd_test(enable = "avx512fp16")]
26601	unsafe fn test_mm512_cvtph_epu64() {
26602	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26603	let r = _mm512_cvtph_epu64(a);
26604	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26605	assert_eq_m512i(r, e);
26606	}
26607
26608	#[simd_test(enable = "avx512fp16")]
26609	unsafe fn test_mm512_mask_cvtph_epu64() {
26610	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26611	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26612	let r = _mm512_mask_cvtph_epu64(src, `0b01010101`, a);
26613	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26614	assert_eq_m512i(r, e);
26615	}
26616
26617	#[simd_test(enable = "avx512fp16")]
26618	unsafe fn test_mm512_maskz_cvtph_epu64() {
26619	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26620	let r = _mm512_maskz_cvtph_epu64(`0b01010101`, a);
26621	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26622	assert_eq_m512i(r, e);
26623	}
26624
26625	#[simd_test(enable = "avx512fp16")]
26626	unsafe fn test_mm512_cvt_roundph_epu64() {
26627	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26628	let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
26629	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26630	assert_eq_m512i(r, e);
26631	}
26632
26633	#[simd_test(enable = "avx512fp16")]
26634	unsafe fn test_mm512_mask_cvt_roundph_epu64() {
26635	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26636	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26637	let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26638	src, `0b01010101`, a,
26639	);
26640	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26641	assert_eq_m512i(r, e);
26642	}
26643
26644	#[simd_test(enable = "avx512fp16")]
26645	unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
26646	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26647	let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
26648	`0b01010101`, a,
26649	);
26650	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26651	assert_eq_m512i(r, e);
26652	}
26653
26654	#[simd_test(enable = "avx512fp16,avx512vl")]
26655	unsafe fn test_mm_cvttph_epi64() {
26656	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26657	let r = _mm_cvttph_epi64(a);
26658	let e = _mm_set_epi64x(`1`, `2`);
26659	assert_eq_m128i(r, e);
26660	}
26661
26662	#[simd_test(enable = "avx512fp16,avx512vl")]
26663	unsafe fn test_mm_mask_cvttph_epi64() {
26664	let src = _mm_set_epi64x(`3`, `4`);
26665	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26666	let r = _mm_mask_cvttph_epi64(src, `0b01`, a);
26667	let e = _mm_set_epi64x(`3`, `2`);
26668	assert_eq_m128i(r, e);
26669	}
26670
26671	#[simd_test(enable = "avx512fp16,avx512vl")]
26672	unsafe fn test_mm_maskz_cvttph_epi64() {
26673	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26674	let r = _mm_maskz_cvttph_epi64(`0b01`, a);
26675	let e = _mm_set_epi64x(`0`, `2`);
26676	assert_eq_m128i(r, e);
26677	}
26678
26679	#[simd_test(enable = "avx512fp16,avx512vl")]
26680	unsafe fn test_mm256_cvttph_epi64() {
26681	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26682	let r = _mm256_cvttph_epi64(a);
26683	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26684	assert_eq_m256i(r, e);
26685	}
26686
26687	#[simd_test(enable = "avx512fp16,avx512vl")]
26688	unsafe fn test_mm256_mask_cvttph_epi64() {
26689	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26690	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26691	let r = _mm256_mask_cvttph_epi64(src, `0b0101`, a);
26692	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26693	assert_eq_m256i(r, e);
26694	}
26695
26696	#[simd_test(enable = "avx512fp16,avx512vl")]
26697	unsafe fn test_mm256_maskz_cvttph_epi64() {
26698	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26699	let r = _mm256_maskz_cvttph_epi64(`0b0101`, a);
26700	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26701	assert_eq_m256i(r, e);
26702	}
26703
26704	#[simd_test(enable = "avx512fp16")]
26705	unsafe fn test_mm512_cvttph_epi64() {
26706	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26707	let r = _mm512_cvttph_epi64(a);
26708	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26709	assert_eq_m512i(r, e);
26710	}
26711
26712	#[simd_test(enable = "avx512fp16")]
26713	unsafe fn test_mm512_mask_cvttph_epi64() {
26714	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26715	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26716	let r = _mm512_mask_cvttph_epi64(src, `0b01010101`, a);
26717	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26718	assert_eq_m512i(r, e);
26719	}
26720
26721	#[simd_test(enable = "avx512fp16")]
26722	unsafe fn test_mm512_maskz_cvttph_epi64() {
26723	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26724	let r = _mm512_maskz_cvttph_epi64(`0b01010101`, a);
26725	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26726	assert_eq_m512i(r, e);
26727	}
26728
26729	#[simd_test(enable = "avx512fp16")]
26730	unsafe fn test_mm512_cvtt_roundph_epi64() {
26731	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26732	let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
26733	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26734	assert_eq_m512i(r, e);
26735	}
26736
26737	#[simd_test(enable = "avx512fp16")]
26738	unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
26739	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26740	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26741	let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
26742	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26743	assert_eq_m512i(r, e);
26744	}
26745
26746	#[simd_test(enable = "avx512fp16")]
26747	unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
26748	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26749	let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
26750	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26751	assert_eq_m512i(r, e);
26752	}
26753
26754	#[simd_test(enable = "avx512fp16,avx512vl")]
26755	unsafe fn test_mm_cvttph_epu64() {
26756	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26757	let r = _mm_cvttph_epu64(a);
26758	let e = _mm_set_epi64x(`1`, `2`);
26759	assert_eq_m128i(r, e);
26760	}
26761
26762	#[simd_test(enable = "avx512fp16,avx512vl")]
26763	unsafe fn test_mm_mask_cvttph_epu64() {
26764	let src = _mm_set_epi64x(`3`, `4`);
26765	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26766	let r = _mm_mask_cvttph_epu64(src, `0b01`, a);
26767	let e = _mm_set_epi64x(`3`, `2`);
26768	assert_eq_m128i(r, e);
26769	}
26770
26771	#[simd_test(enable = "avx512fp16,avx512vl")]
26772	unsafe fn test_mm_maskz_cvttph_epu64() {
26773	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
26774	let r = _mm_maskz_cvttph_epu64(`0b01`, a);
26775	let e = _mm_set_epi64x(`0`, `2`);
26776	assert_eq_m128i(r, e);
26777	}
26778
26779	#[simd_test(enable = "avx512fp16,avx512vl")]
26780	unsafe fn test_mm256_cvttph_epu64() {
26781	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26782	let r = _mm256_cvttph_epu64(a);
26783	let e = _mm256_set_epi64x(`1`, `2`, `3`, `4`);
26784	assert_eq_m256i(r, e);
26785	}
26786
26787	#[simd_test(enable = "avx512fp16,avx512vl")]
26788	unsafe fn test_mm256_mask_cvttph_epu64() {
26789	let src = _mm256_set_epi64x(`5`, `6`, `7`, `8`);
26790	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26791	let r = _mm256_mask_cvttph_epu64(src, `0b0101`, a);
26792	let e = _mm256_set_epi64x(`5`, `2`, `7`, `4`);
26793	assert_eq_m256i(r, e);
26794	}
26795
26796	#[simd_test(enable = "avx512fp16,avx512vl")]
26797	unsafe fn test_mm256_maskz_cvttph_epu64() {
26798	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26799	let r = _mm256_maskz_cvttph_epu64(`0b0101`, a);
26800	let e = _mm256_set_epi64x(`0`, `2`, `0`, `4`);
26801	assert_eq_m256i(r, e);
26802	}
26803
26804	#[simd_test(enable = "avx512fp16")]
26805	unsafe fn test_mm512_cvttph_epu64() {
26806	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26807	let r = _mm512_cvttph_epu64(a);
26808	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26809	assert_eq_m512i(r, e);
26810	}
26811
26812	#[simd_test(enable = "avx512fp16")]
26813	unsafe fn test_mm512_mask_cvttph_epu64() {
26814	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26815	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26816	let r = _mm512_mask_cvttph_epu64(src, `0b01010101`, a);
26817	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26818	assert_eq_m512i(r, e);
26819	}
26820
26821	#[simd_test(enable = "avx512fp16")]
26822	unsafe fn test_mm512_maskz_cvttph_epu64() {
26823	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26824	let r = _mm512_maskz_cvttph_epu64(`0b01010101`, a);
26825	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26826	assert_eq_m512i(r, e);
26827	}
26828
26829	#[simd_test(enable = "avx512fp16")]
26830	unsafe fn test_mm512_cvtt_roundph_epu64() {
26831	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26832	let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
26833	let e = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
26834	assert_eq_m512i(r, e);
26835	}
26836
26837	#[simd_test(enable = "avx512fp16")]
26838	unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
26839	let src = _mm512_set_epi64(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
26840	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26841	let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
26842	let e = _mm512_set_epi64(`9`, `2`, `11`, `4`, `13`, `6`, `15`, `8`);
26843	assert_eq_m512i(r, e);
26844	}
26845
26846	#[simd_test(enable = "avx512fp16")]
26847	unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
26848	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26849	let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
26850	let e = _mm512_set_epi64(`0`, `2`, `0`, `4`, `0`, `6`, `0`, `8`);
26851	assert_eq_m512i(r, e);
26852	}
26853
26854	#[simd_test(enable = "avx512fp16,avx512vl")]
26855	unsafe fn test_mm_cvtxph_ps() {
26856	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26857	let r = _mm_cvtxph_ps(a);
26858	let e = _mm_set_ps(`1.0`, `2.0`, `3.0`, `4.0`);
26859	assert_eq_m128(r, e);
26860	}
26861
26862	#[simd_test(enable = "avx512fp16,avx512vl")]
26863	unsafe fn test_mm_mask_cvtxph_ps() {
26864	let src = _mm_set_ps(`10.0`, `11.0`, `12.0`, `13.0`);
26865	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26866	let r = _mm_mask_cvtxph_ps(src, `0b0101`, a);
26867	let e = _mm_set_ps(`10.0`, `2.0`, `12.0`, `4.0`);
26868	assert_eq_m128(r, e);
26869	}
26870
26871	#[simd_test(enable = "avx512fp16,avx512vl")]
26872	unsafe fn test_mm_maskz_cvtxph_ps() {
26873	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
26874	let r = _mm_maskz_cvtxph_ps(`0b0101`, a);
26875	let e = _mm_set_ps(`0.0`, `2.0`, `0.0`, `4.0`);
26876	assert_eq_m128(r, e);
26877	}
26878
26879	#[simd_test(enable = "avx512fp16,avx512vl")]
26880	unsafe fn test_mm256_cvtxph_ps() {
26881	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26882	let r = _mm256_cvtxph_ps(a);
26883	let e = _mm256_set_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26884	assert_eq_m256(r, e);
26885	}
26886
26887	#[simd_test(enable = "avx512fp16,avx512vl")]
26888	unsafe fn test_mm256_mask_cvtxph_ps() {
26889	let src = _mm256_set_ps(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
26890	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26891	let r = _mm256_mask_cvtxph_ps(src, `0b01010101`, a);
26892	let e = _mm256_set_ps(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
26893	assert_eq_m256(r, e);
26894	}
26895
26896	#[simd_test(enable = "avx512fp16,avx512vl")]
26897	unsafe fn test_mm256_maskz_cvtxph_ps() {
26898	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
26899	let r = _mm256_maskz_cvtxph_ps(`0b01010101`, a);
26900	let e = _mm256_set_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
26901	assert_eq_m256(r, e);
26902	}
26903
26904	#[simd_test(enable = "avx512fp16")]
26905	unsafe fn test_mm512_cvtxph_ps() {
26906	let a = _mm256_set_ph(
26907	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26908	);
26909	let r = _mm512_cvtxph_ps(a);
26910	let e = _mm512_set_ps(
26911	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26912	);
26913	assert_eq_m512(r, e);
26914	}
26915
26916	#[simd_test(enable = "avx512fp16")]
26917	unsafe fn test_mm512_mask_cvtxph_ps() {
26918	let src = _mm512_set_ps(
26919	`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`,
26920	`24.0`, `25.0`,
26921	);
26922	let a = _mm256_set_ph(
26923	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26924	);
26925	let r = _mm512_mask_cvtxph_ps(src, `0b0101010101010101`, a);
26926	let e = _mm512_set_ps(
26927	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
26928	`16.0`,
26929	);
26930	assert_eq_m512(r, e);
26931	}
26932
26933	#[simd_test(enable = "avx512fp16")]
26934	unsafe fn test_mm512_maskz_cvtxph_ps() {
26935	let a = _mm256_set_ph(
26936	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26937	);
26938	let r = _mm512_maskz_cvtxph_ps(`0b0101010101010101`, a);
26939	let e = _mm512_set_ps(
26940	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
26941	);
26942	assert_eq_m512(r, e);
26943	}
26944
26945	#[simd_test(enable = "avx512fp16")]
26946	unsafe fn test_mm512_cvtx_roundph_ps() {
26947	let a = _mm256_set_ph(
26948	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26949	);
26950	let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
26951	let e = _mm512_set_ps(
26952	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26953	);
26954	assert_eq_m512(r, e);
26955	}
26956
26957	#[simd_test(enable = "avx512fp16")]
26958	unsafe fn test_mm512_mask_cvtx_roundph_ps() {
26959	let src = _mm512_set_ps(
26960	`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`,
26961	`24.0`, `25.0`,
26962	);
26963	let a = _mm256_set_ph(
26964	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26965	);
26966	let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0b0101010101010101`, a);
26967	let e = _mm512_set_ps(
26968	`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`, `18.0`, `10.0`, `20.0`, `12.0`, `22.0`, `14.0`, `24.0`,
26969	`16.0`,
26970	);
26971	assert_eq_m512(r, e);
26972	}
26973
26974	#[simd_test(enable = "avx512fp16")]
26975	unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
26976	let a = _mm256_set_ph(
26977	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
26978	);
26979	let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(`0b0101010101010101`, a);
26980	let e = _mm512_set_ps(
26981	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`, `0.0`, `10.0`, `0.0`, `12.0`, `0.0`, `14.0`, `0.0`, `16.0`,
26982	);
26983	assert_eq_m512(r, e);
26984	}
26985
26986	#[simd_test(enable = "avx512fp16")]
26987	unsafe fn test_mm_cvtsh_ss() {
26988	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
26989	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
26990	let r = _mm_cvtsh_ss(a, b);
26991	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
26992	assert_eq_m128(r, e);
26993	}
26994
26995	#[simd_test(enable = "avx512fp16")]
26996	unsafe fn test_mm_mask_cvtsh_ss() {
26997	let src = _mm_setr_ps(`3.0`, `11.0`, `12.0`, `13.0`);
26998	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
26999	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27000	let r = _mm_mask_cvtsh_ss(src, `0`, a, b);
27001	let e = _mm_setr_ps(`3.0`, `20.0`, `21.0`, `22.0`);
27002	assert_eq_m128(r, e);
27003	let r = _mm_mask_cvtsh_ss(src, `1`, a, b);
27004	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27005	assert_eq_m128(r, e);
27006	}
27007
27008	#[simd_test(enable = "avx512fp16")]
27009	unsafe fn test_mm_maskz_cvtsh_ss() {
27010	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27011	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27012	let r = _mm_maskz_cvtsh_ss(`0`, a, b);
27013	let e = _mm_setr_ps(`0.0`, `20.0`, `21.0`, `22.0`);
27014	assert_eq_m128(r, e);
27015	let r = _mm_maskz_cvtsh_ss(`1`, a, b);
27016	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27017	assert_eq_m128(r, e);
27018	}
27019
27020	#[simd_test(enable = "avx512fp16")]
27021	unsafe fn test_mm_cvt_roundsh_ss() {
27022	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27023	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27024	let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27025	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27026	assert_eq_m128(r, e);
27027	}
27028
27029	#[simd_test(enable = "avx512fp16")]
27030	unsafe fn test_mm_mask_cvt_roundsh_ss() {
27031	let src = _mm_setr_ps(`3.0`, `11.0`, `12.0`, `13.0`);
27032	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27033	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27034	let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
27035	let e = _mm_setr_ps(`3.0`, `20.0`, `21.0`, `22.0`);
27036	assert_eq_m128(r, e);
27037	let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
27038	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27039	assert_eq_m128(r, e);
27040	}
27041
27042	#[simd_test(enable = "avx512fp16")]
27043	unsafe fn test_mm_maskz_cvt_roundsh_ss() {
27044	let a = _mm_setr_ps(`2.0`, `20.0`, `21.0`, `22.0`);
27045	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27046	let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(`0`, a, b);
27047	let e = _mm_setr_ps(`0.0`, `20.0`, `21.0`, `22.0`);
27048	assert_eq_m128(r, e);
27049	let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(`1`, a, b);
27050	let e = _mm_setr_ps(`1.0`, `20.0`, `21.0`, `22.0`);
27051	assert_eq_m128(r, e);
27052	}
27053
27054	#[simd_test(enable = "avx512fp16,avx512vl")]
27055	unsafe fn test_mm_cvtph_pd() {
27056	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27057	let r = _mm_cvtph_pd(a);
27058	let e = _mm_set_pd(`1.0`, `2.0`);
27059	assert_eq_m128d(r, e);
27060	}
27061
27062	#[simd_test(enable = "avx512fp16,avx512vl")]
27063	unsafe fn test_mm_mask_cvtph_pd() {
27064	let src = _mm_set_pd(`10.0`, `11.0`);
27065	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27066	let r = _mm_mask_cvtph_pd(src, `0b01`, a);
27067	let e = _mm_set_pd(`10.0`, `2.0`);
27068	assert_eq_m128d(r, e);
27069	}
27070
27071	#[simd_test(enable = "avx512fp16,avx512vl")]
27072	unsafe fn test_mm_maskz_cvtph_pd() {
27073	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`);
27074	let r = _mm_maskz_cvtph_pd(`0b01`, a);
27075	let e = _mm_set_pd(`0.0`, `2.0`);
27076	assert_eq_m128d(r, e);
27077	}
27078
27079	#[simd_test(enable = "avx512fp16,avx512vl")]
27080	unsafe fn test_mm256_cvtph_pd() {
27081	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27082	let r = _mm256_cvtph_pd(a);
27083	let e = _mm256_set_pd(`1.0`, `2.0`, `3.0`, `4.0`);
27084	assert_eq_m256d(r, e);
27085	}
27086
27087	#[simd_test(enable = "avx512fp16,avx512vl")]
27088	unsafe fn test_mm256_mask_cvtph_pd() {
27089	let src = _mm256_set_pd(`10.0`, `11.0`, `12.0`, `13.0`);
27090	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27091	let r = _mm256_mask_cvtph_pd(src, `0b0101`, a);
27092	let e = _mm256_set_pd(`10.0`, `2.0`, `12.0`, `4.0`);
27093	assert_eq_m256d(r, e);
27094	}
27095
27096	#[simd_test(enable = "avx512fp16,avx512vl")]
27097	unsafe fn test_mm256_maskz_cvtph_pd() {
27098	let a = _mm_set_ph(`0.0`, `0.0`, `0.0`, `0.0`, `1.0`, `2.0`, `3.0`, `4.0`);
27099	let r = _mm256_maskz_cvtph_pd(`0b0101`, a);
27100	let e = _mm256_set_pd(`0.0`, `2.0`, `0.0`, `4.0`);
27101	assert_eq_m256d(r, e);
27102	}
27103
27104	#[simd_test(enable = "avx512fp16")]
27105	unsafe fn test_mm512_cvtph_pd() {
27106	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27107	let r = _mm512_cvtph_pd(a);
27108	let e = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27109	assert_eq_m512d(r, e);
27110	}
27111
27112	#[simd_test(enable = "avx512fp16")]
27113	unsafe fn test_mm512_mask_cvtph_pd() {
27114	let src = _mm512_set_pd(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
27115	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27116	let r = _mm512_mask_cvtph_pd(src, `0b01010101`, a);
27117	let e = _mm512_set_pd(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
27118	assert_eq_m512d(r, e);
27119	}
27120
27121	#[simd_test(enable = "avx512fp16")]
27122	unsafe fn test_mm512_maskz_cvtph_pd() {
27123	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27124	let r = _mm512_maskz_cvtph_pd(`0b01010101`, a);
27125	let e = _mm512_set_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
27126	assert_eq_m512d(r, e);
27127	}
27128
27129	#[simd_test(enable = "avx512fp16")]
27130	unsafe fn test_mm512_cvt_roundph_pd() {
27131	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27132	let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27133	let e = _mm512_set_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27134	assert_eq_m512d(r, e);
27135	}
27136
27137	#[simd_test(enable = "avx512fp16")]
27138	unsafe fn test_mm512_mask_cvt_roundph_pd() {
27139	let src = _mm512_set_pd(`10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`, `17.0`);
27140	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27141	let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, `0b01010101`, a);
27142	let e = _mm512_set_pd(`10.0`, `2.0`, `12.0`, `4.0`, `14.0`, `6.0`, `16.0`, `8.0`);
27143	assert_eq_m512d(r, e);
27144	}
27145
27146	#[simd_test(enable = "avx512fp16")]
27147	unsafe fn test_mm512_maskz_cvt_roundph_pd() {
27148	let a = _mm_set_ph(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27149	let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(`0b01010101`, a);
27150	let e = _mm512_set_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `6.0`, `0.0`, `8.0`);
27151	assert_eq_m512d(r, e);
27152	}
27153
27154	#[simd_test(enable = "avx512fp16")]
27155	unsafe fn test_mm_cvtsh_sd() {
27156	let a = _mm_setr_pd(`2.0`, `20.0`);
27157	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27158	let r = _mm_cvtsh_sd(a, b);
27159	let e = _mm_setr_pd(`1.0`, `20.0`);
27160	assert_eq_m128d(r, e);
27161	}
27162
27163	#[simd_test(enable = "avx512fp16")]
27164	unsafe fn test_mm_mask_cvtsh_sd() {
27165	let src = _mm_setr_pd(`3.0`, `11.0`);
27166	let a = _mm_setr_pd(`2.0`, `20.0`);
27167	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27168	let r = _mm_mask_cvtsh_sd(src, `0`, a, b);
27169	let e = _mm_setr_pd(`3.0`, `20.0`);
27170	assert_eq_m128d(r, e);
27171	let r = _mm_mask_cvtsh_sd(src, `1`, a, b);
27172	let e = _mm_setr_pd(`1.0`, `20.0`);
27173	assert_eq_m128d(r, e);
27174	}
27175
27176	#[simd_test(enable = "avx512fp16")]
27177	unsafe fn test_mm_maskz_cvtsh_sd() {
27178	let a = _mm_setr_pd(`2.0`, `20.0`);
27179	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27180	let r = _mm_maskz_cvtsh_sd(`0`, a, b);
27181	let e = _mm_setr_pd(`0.0`, `20.0`);
27182	assert_eq_m128d(r, e);
27183	let r = _mm_maskz_cvtsh_sd(`1`, a, b);
27184	let e = _mm_setr_pd(`1.0`, `20.0`);
27185	assert_eq_m128d(r, e);
27186	}
27187
27188	#[simd_test(enable = "avx512fp16")]
27189	unsafe fn test_mm_cvt_roundsh_sd() {
27190	let a = _mm_setr_pd(`2.0`, `20.0`);
27191	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27192	let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27193	let e = _mm_setr_pd(`1.0`, `20.0`);
27194	assert_eq_m128d(r, e);
27195	}
27196
27197	#[simd_test(enable = "avx512fp16")]
27198	unsafe fn test_mm_mask_cvt_roundsh_sd() {
27199	let src = _mm_setr_pd(`3.0`, `11.0`);
27200	let a = _mm_setr_pd(`2.0`, `20.0`);
27201	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27202	let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, `0`, a, b);
27203	let e = _mm_setr_pd(`3.0`, `20.0`);
27204	assert_eq_m128d(r, e);
27205	let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, `1`, a, b);
27206	let e = _mm_setr_pd(`1.0`, `20.0`);
27207	assert_eq_m128d(r, e);
27208	}
27209
27210	#[simd_test(enable = "avx512fp16")]
27211	unsafe fn test_mm_maskz_cvt_roundsh_sd() {
27212	let a = _mm_setr_pd(`2.0`, `20.0`);
27213	let b = _mm_setr_ph(`1.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`);
27214	let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(`0`, a, b);
27215	let e = _mm_setr_pd(`0.0`, `20.0`);
27216	assert_eq_m128d(r, e);
27217	let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(`1`, a, b);
27218	let e = _mm_setr_pd(`1.0`, `20.0`);
27219	assert_eq_m128d(r, e);
27220	}
27221
27222	#[simd_test(enable = "avx512fp16")]
27223	unsafe fn test_mm_cvtsh_h() {
27224	let a = _mm_setr_ph(`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`);
27225	let r = _mm_cvtsh_h(a);
27226	assert_eq!(r, `1.0`);
27227	}
27228
27229	#[simd_test(enable = "avx512fp16")]
27230	unsafe fn test_mm256_cvtsh_h() {
27231	let a = _mm256_setr_ph(
27232	`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
27233	);
27234	let r = _mm256_cvtsh_h(a);
27235	assert_eq!(r, `1.0`);
27236	}
27237
27238	#[simd_test(enable = "avx512fp16")]
27239	unsafe fn test_mm512_cvtsh_h() {
27240	let a = _mm512_setr_ph(
27241	`1.0`, `2.0`, `3.0`, `42.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
27242	`17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`, `24.0`, `25.0`, `26.0`, `27.0`, `28.0`, `29.0`, `30.0`,
27243	`31.0`, `32.0`,
27244	);
27245	let r = _mm512_cvtsh_h(a);
27246	assert_eq!(r, `1.0`);
27247	}
27248
27249	#[simd_test(enable = "avx512fp16")]
27250	unsafe fn test_mm_cvtsi128_si16() {
27251	let a = _mm_setr_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
27252	let r = _mm_cvtsi128_si16(a);
27253	assert_eq!(r, `1`);
27254	}
27255
27256	#[simd_test(enable = "avx512fp16")]
27257	unsafe fn test_mm_cvtsi16_si128() {
27258	let a = `1`;
27259	let r = _mm_cvtsi16_si128(a);
27260	let e = _mm_setr_epi16(`1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
27261	assert_eq_m128i(r, e);
27262	}
27263	}
27264