1use crate::arch::asm;
2use crate::core_arch::{simd::*, x86::*};
3use crate::intrinsics::{fmaf16, simd::*};
4use crate::ptr;
5
6/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7///
8/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9#[inline]
10#[target_feature(enable = "avx512fp16")]
11#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12pub fn _mm_set_ph(
13 e7: f16,
14 e6: f16,
15 e5: f16,
16 e4: f16,
17 e3: f16,
18 e2: f16,
19 e1: f16,
20 e0: f16,
21) -> __m128h {
22 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
23}
24
25/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
28#[inline]
29#[target_feature(enable = "avx512fp16")]
30#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
31pub fn _mm256_set_ph(
32 e15: f16,
33 e14: f16,
34 e13: f16,
35 e12: f16,
36 e11: f16,
37 e10: f16,
38 e9: f16,
39 e8: f16,
40 e7: f16,
41 e6: f16,
42 e5: f16,
43 e4: f16,
44 e3: f16,
45 e2: f16,
46 e1: f16,
47 e0: f16,
48) -> __m256h {
49 __m256h([
50 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
51 ])
52}
53
54/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
55///
56/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
57#[inline]
58#[target_feature(enable = "avx512fp16")]
59#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
60pub fn _mm512_set_ph(
61 e31: f16,
62 e30: f16,
63 e29: f16,
64 e28: f16,
65 e27: f16,
66 e26: f16,
67 e25: f16,
68 e24: f16,
69 e23: f16,
70 e22: f16,
71 e21: f16,
72 e20: f16,
73 e19: f16,
74 e18: f16,
75 e17: f16,
76 e16: f16,
77 e15: f16,
78 e14: f16,
79 e13: f16,
80 e12: f16,
81 e11: f16,
82 e10: f16,
83 e9: f16,
84 e8: f16,
85 e7: f16,
86 e6: f16,
87 e5: f16,
88 e4: f16,
89 e3: f16,
90 e2: f16,
91 e1: f16,
92 e0: f16,
93) -> __m512h {
94 __m512h([
95 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
96 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
97 ])
98}
99
100/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
101/// the upper 7 elements.
102///
103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
104#[inline]
105#[target_feature(enable = "avx512fp16")]
106#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
107pub fn _mm_set_sh(a: f16) -> __m128h {
108 __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
109}
110
111/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
112///
113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
114#[inline]
115#[target_feature(enable = "avx512fp16")]
116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
117pub fn _mm_set1_ph(a: f16) -> __m128h {
118 unsafe { transmute(src:f16x8::splat(a)) }
119}
120
121/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
122///
123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
124#[inline]
125#[target_feature(enable = "avx512fp16")]
126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127pub fn _mm256_set1_ph(a: f16) -> __m256h {
128 unsafe { transmute(src:f16x16::splat(a)) }
129}
130
131/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
132///
133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
134#[inline]
135#[target_feature(enable = "avx512fp16")]
136#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
137pub fn _mm512_set1_ph(a: f16) -> __m512h {
138 unsafe { transmute(src:f16x32::splat(a)) }
139}
140
141/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
142///
143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
144#[inline]
145#[target_feature(enable = "avx512fp16")]
146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
147pub fn _mm_setr_ph(
148 e0: f16,
149 e1: f16,
150 e2: f16,
151 e3: f16,
152 e4: f16,
153 e5: f16,
154 e6: f16,
155 e7: f16,
156) -> __m128h {
157 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
158}
159
160/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
161///
162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
163#[inline]
164#[target_feature(enable = "avx512fp16")]
165#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
166pub fn _mm256_setr_ph(
167 e0: f16,
168 e1: f16,
169 e2: f16,
170 e3: f16,
171 e4: f16,
172 e5: f16,
173 e6: f16,
174 e7: f16,
175 e8: f16,
176 e9: f16,
177 e10: f16,
178 e11: f16,
179 e12: f16,
180 e13: f16,
181 e14: f16,
182 e15: f16,
183) -> __m256h {
184 __m256h([
185 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
186 ])
187}
188
189/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
190///
191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
192#[inline]
193#[target_feature(enable = "avx512fp16")]
194#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
195pub fn _mm512_setr_ph(
196 e0: f16,
197 e1: f16,
198 e2: f16,
199 e3: f16,
200 e4: f16,
201 e5: f16,
202 e6: f16,
203 e7: f16,
204 e8: f16,
205 e9: f16,
206 e10: f16,
207 e11: f16,
208 e12: f16,
209 e13: f16,
210 e14: f16,
211 e15: f16,
212 e16: f16,
213 e17: f16,
214 e18: f16,
215 e19: f16,
216 e20: f16,
217 e21: f16,
218 e22: f16,
219 e23: f16,
220 e24: f16,
221 e25: f16,
222 e26: f16,
223 e27: f16,
224 e28: f16,
225 e29: f16,
226 e30: f16,
227 e31: f16,
228) -> __m512h {
229 __m512h([
230 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
231 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
232 ])
233}
234
235/// Return vector of type __m128h with all elements set to zero.
236///
237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
238#[inline]
239#[target_feature(enable = "avx512fp16,avx512vl")]
240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
241pub fn _mm_setzero_ph() -> __m128h {
242 unsafe { transmute(src:f16x8::ZERO) }
243}
244
245/// Return vector of type __m256h with all elements set to zero.
246///
247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
248#[inline]
249#[target_feature(enable = "avx512fp16,avx512vl")]
250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
251pub fn _mm256_setzero_ph() -> __m256h {
252 unsafe { transmute(src:f16x16::ZERO) }
253}
254
255/// Return vector of type __m512h with all elements set to zero.
256///
257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
258#[inline]
259#[target_feature(enable = "avx512fp16")]
260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
261pub fn _mm512_setzero_ph() -> __m512h {
262 unsafe { transmute(src:f16x32::ZERO) }
263}
264
265/// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero
266/// vector.
267///
268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
269#[inline]
270#[target_feature(enable = "avx512fp16,avx512vl")]
271#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
272pub fn _mm_undefined_ph() -> __m128h {
273 unsafe { transmute(src:f16x8::ZERO) }
274}
275
276/// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero
277/// vector.
278///
279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
280#[inline]
281#[target_feature(enable = "avx512fp16,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
283pub fn _mm256_undefined_ph() -> __m256h {
284 unsafe { transmute(src:f16x16::ZERO) }
285}
286
287/// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero
288/// vector.
289///
290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
291#[inline]
292#[target_feature(enable = "avx512fp16")]
293#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
294pub fn _mm512_undefined_ph() -> __m512h {
295 unsafe { transmute(src:f16x32::ZERO) }
296}
297
298/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
299/// does not generate any instructions, thus it has zero latency.
300///
301/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
302#[inline]
303#[target_feature(enable = "avx512fp16")]
304#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
305pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
306 unsafe { transmute(src:a) }
307}
308
309/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
310/// does not generate any instructions, thus it has zero latency.
311///
312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
313#[inline]
314#[target_feature(enable = "avx512fp16")]
315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
316pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
317 unsafe { transmute(src:a) }
318}
319
320/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
321/// does not generate any instructions, thus it has zero latency.
322///
323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
324#[inline]
325#[target_feature(enable = "avx512fp16")]
326#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
327pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
328 unsafe { transmute(src:a) }
329}
330
331/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
332/// does not generate any instructions, thus it has zero latency.
333///
334/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
335#[inline]
336#[target_feature(enable = "avx512fp16")]
337#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
338pub fn _mm_castph_pd(a: __m128h) -> __m128d {
339 unsafe { transmute(src:a) }
340}
341
342/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
343/// does not generate any instructions, thus it has zero latency.
344///
345/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
346#[inline]
347#[target_feature(enable = "avx512fp16")]
348#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
349pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
350 unsafe { transmute(src:a) }
351}
352
353/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
354/// does not generate any instructions, thus it has zero latency.
355///
356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
357#[inline]
358#[target_feature(enable = "avx512fp16")]
359#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
360pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
361 unsafe { transmute(src:a) }
362}
363
364/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
365/// does not generate any instructions, thus it has zero latency.
366///
367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
368#[inline]
369#[target_feature(enable = "avx512fp16")]
370#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
371pub fn _mm_castps_ph(a: __m128) -> __m128h {
372 unsafe { transmute(src:a) }
373}
374
375/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
376/// does not generate any instructions, thus it has zero latency.
377///
378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
379#[inline]
380#[target_feature(enable = "avx512fp16")]
381#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
382pub fn _mm256_castps_ph(a: __m256) -> __m256h {
383 unsafe { transmute(src:a) }
384}
385
386/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
387/// does not generate any instructions, thus it has zero latency.
388///
389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
390#[inline]
391#[target_feature(enable = "avx512fp16")]
392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
393pub fn _mm512_castps_ph(a: __m512) -> __m512h {
394 unsafe { transmute(src:a) }
395}
396
397/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
398/// does not generate any instructions, thus it has zero latency.
399///
400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
401#[inline]
402#[target_feature(enable = "avx512fp16")]
403#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
404pub fn _mm_castph_ps(a: __m128h) -> __m128 {
405 unsafe { transmute(src:a) }
406}
407
408/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
409/// does not generate any instructions, thus it has zero latency.
410///
411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
412#[inline]
413#[target_feature(enable = "avx512fp16")]
414#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
415pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
416 unsafe { transmute(src:a) }
417}
418
419/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
420/// does not generate any instructions, thus it has zero latency.
421///
422/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
423#[inline]
424#[target_feature(enable = "avx512fp16")]
425#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
426pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
427 unsafe { transmute(src:a) }
428}
429
430/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
431/// does not generate any instructions, thus it has zero latency.
432///
433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
434#[inline]
435#[target_feature(enable = "avx512fp16")]
436#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
437pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
438 unsafe { transmute(src:a) }
439}
440
441/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
442/// does not generate any instructions, thus it has zero latency.
443///
444/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
445#[inline]
446#[target_feature(enable = "avx512fp16")]
447#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
448pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
449 unsafe { transmute(src:a) }
450}
451
452/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
453/// does not generate any instructions, thus it has zero latency.
454///
455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
456#[inline]
457#[target_feature(enable = "avx512fp16")]
458#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
459pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
460 unsafe { transmute(src:a) }
461}
462
463/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
464/// does not generate any instructions, thus it has zero latency.
465///
466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
467#[inline]
468#[target_feature(enable = "avx512fp16")]
469#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
470pub fn _mm_castph_si128(a: __m128h) -> __m128i {
471 unsafe { transmute(src:a) }
472}
473
474/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
475/// does not generate any instructions, thus it has zero latency.
476///
477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
478#[inline]
479#[target_feature(enable = "avx512fp16")]
480#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
481pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
482 unsafe { transmute(src:a) }
483}
484
485/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
486/// does not generate any instructions, thus it has zero latency.
487///
488/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
489#[inline]
490#[target_feature(enable = "avx512fp16")]
491#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
492pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
493 unsafe { transmute(src:a) }
494}
495
496/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
497/// does not generate any instructions, thus it has zero latency.
498///
499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
500#[inline]
501#[target_feature(enable = "avx512fp16")]
502#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
503pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
504 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
505}
506
507/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
508/// does not generate any instructions, thus it has zero latency.
509///
510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
511#[inline]
512#[target_feature(enable = "avx512fp16")]
513#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
514pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
515 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
516}
517
518/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
519/// does not generate any instructions, thus it has zero latency.
520///
521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
522#[inline]
523#[target_feature(enable = "avx512fp16")]
524#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
525pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
526 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
527}
528
529/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
530/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
531/// but most of the time it does not generate any instructions.
532///
533/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
534#[inline]
535#[target_feature(enable = "avx512fp16")]
536#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
537pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
538 unsafe {
539 simd_shuffle!(
540 a,
541 _mm_undefined_ph(),
542 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
543 )
544 }
545}
546
547/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
548/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
549/// but most of the time it does not generate any instructions.
550///
551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
552#[inline]
553#[target_feature(enable = "avx512fp16")]
554#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
555pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
556 unsafe {
557 simd_shuffle!(
558 a,
559 _mm_undefined_ph(),
560 [
561 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
562 8, 8, 8, 8
563 ]
564 )
565 }
566}
567
568/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
569/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
570/// but most of the time it does not generate any instructions.
571///
572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
573#[inline]
574#[target_feature(enable = "avx512fp16")]
575#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
576pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
577 unsafe {
578 simd_shuffle!(
579 a,
580 _mm256_undefined_ph(),
581 [
582 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
583 16, 16, 16, 16, 16, 16, 16, 16, 16
584 ]
585 )
586 }
587}
588
589/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
590/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
591/// any instructions.
592///
593/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
594#[inline]
595#[target_feature(enable = "avx512fp16")]
596#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
597pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
598 unsafe {
599 simd_shuffle!(
600 a,
601 _mm_setzero_ph(),
602 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
603 )
604 }
605}
606
607/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
608/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
609/// any instructions.
610///
611/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
612#[inline]
613#[target_feature(enable = "avx512fp16")]
614#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
615pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
616 unsafe {
617 simd_shuffle!(
618 a,
619 _mm256_setzero_ph(),
620 [
621 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
622 16, 16, 16, 16, 16, 16, 16, 16, 16
623 ]
624 )
625 }
626}
627
628/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
629/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
630/// any instructions.
631///
632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
633#[inline]
634#[target_feature(enable = "avx512fp16")]
635#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
636pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
637 unsafe {
638 simd_shuffle!(
639 a,
640 _mm_setzero_ph(),
641 [
642 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
643 8, 8, 8, 8
644 ]
645 )
646 }
647}
648
649macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
650 ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
651 let dst: $mask_type;
652 asm!(
653 "vcmpph {k}, {a}, {b}, {imm8}",
654 k = lateout(kreg) dst,
655 a = in($reg) $a,
656 b = in($reg) $b,
657 imm8 = const IMM5,
658 options(pure, nomem, nostack)
659 );
660 dst
661 }};
662 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
663 let dst: $mask_type;
664 asm!(
665 "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
666 k = lateout(kreg) dst,
667 mask = in(kreg) $mask,
668 a = in($reg) $a,
669 b = in($reg) $b,
670 imm8 = const IMM5,
671 options(pure, nomem, nostack)
672 );
673 dst
674 }};
675}
676
677/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
678/// operand specified by imm8, and store the results in mask vector k.
679///
680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
681#[inline]
682#[target_feature(enable = "avx512fp16,avx512vl")]
683#[rustc_legacy_const_generics(2)]
684#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
685pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
686 unsafe {
687 static_assert_uimm_bits!(IMM5, 5);
688 cmp_asm!(__mmask8, xmm_reg, a, b)
689 }
690}
691
692/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
693/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
694/// zeroed out when the corresponding mask bit is not set).
695///
696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
697#[inline]
698#[target_feature(enable = "avx512fp16,avx512vl")]
699#[rustc_legacy_const_generics(3)]
700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
701pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
702 unsafe {
703 static_assert_uimm_bits!(IMM5, 5);
704 cmp_asm!(__mmask8, k1, xmm_reg, a, b)
705 }
706}
707
708/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
709/// operand specified by imm8, and store the results in mask vector k.
710///
711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
712#[inline]
713#[target_feature(enable = "avx512fp16,avx512vl")]
714#[rustc_legacy_const_generics(2)]
715#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
716pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
717 unsafe {
718 static_assert_uimm_bits!(IMM5, 5);
719 cmp_asm!(__mmask16, ymm_reg, a, b)
720 }
721}
722
723/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
724/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
725/// zeroed out when the corresponding mask bit is not set).
726///
727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
728#[inline]
729#[target_feature(enable = "avx512fp16,avx512vl")]
730#[rustc_legacy_const_generics(3)]
731#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
732pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
733 k1: __mmask16,
734 a: __m256h,
735 b: __m256h,
736) -> __mmask16 {
737 unsafe {
738 static_assert_uimm_bits!(IMM5, 5);
739 cmp_asm!(__mmask16, k1, ymm_reg, a, b)
740 }
741}
742
743/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
744/// operand specified by imm8, and store the results in mask vector k.
745///
746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
747#[inline]
748#[target_feature(enable = "avx512fp16")]
749#[rustc_legacy_const_generics(2)]
750#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
751pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
752 unsafe {
753 static_assert_uimm_bits!(IMM5, 5);
754 cmp_asm!(__mmask32, zmm_reg, a, b)
755 }
756}
757
758/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
759/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
760/// zeroed out when the corresponding mask bit is not set).
761///
762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
763#[inline]
764#[target_feature(enable = "avx512fp16")]
765#[rustc_legacy_const_generics(3)]
766#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
767pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
768 k1: __mmask32,
769 a: __m512h,
770 b: __m512h,
771) -> __mmask32 {
772 unsafe {
773 static_assert_uimm_bits!(IMM5, 5);
774 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
775 }
776}
777
778/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
779/// operand specified by imm8, and store the results in mask vector k.
780///
781/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
782///
783/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
784#[inline]
785#[target_feature(enable = "avx512fp16")]
786#[rustc_legacy_const_generics(2, 3)]
787#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
788pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
789 a: __m512h,
790 b: __m512h,
791) -> __mmask32 {
792 unsafe {
793 static_assert_uimm_bits!(IMM5, 5);
794 static_assert_sae!(SAE);
795 if SAE == _MM_FROUND_NO_EXC {
796 let dst: __mmask32;
797 asm!(
798 "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
799 k = lateout(kreg) dst,
800 a = in(zmm_reg) a,
801 b = in(zmm_reg) b,
802 imm8 = const IMM5,
803 options(pure, nomem, nostack)
804 );
805 dst
806 } else {
807 cmp_asm!(__mmask32, zmm_reg, a, b)
808 }
809 }
810}
811
812/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
813/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
814/// zeroed out when the corresponding mask bit is not set).
815///
816/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
817///
818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
819#[inline]
820#[target_feature(enable = "avx512fp16")]
821#[rustc_legacy_const_generics(3, 4)]
822#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
823pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
824 k1: __mmask32,
825 a: __m512h,
826 b: __m512h,
827) -> __mmask32 {
828 unsafe {
829 static_assert_uimm_bits!(IMM5, 5);
830 static_assert_sae!(SAE);
831 if SAE == _MM_FROUND_NO_EXC {
832 let dst: __mmask32;
833 asm!(
834 "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
835 k = lateout(kreg) dst,
836 k1 = in(kreg) k1,
837 a = in(zmm_reg) a,
838 b = in(zmm_reg) b,
839 imm8 = const IMM5,
840 options(pure, nomem, nostack)
841 );
842 dst
843 } else {
844 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
845 }
846 }
847}
848
849/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
850/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
851/// passing _MM_FROUND_NO_EXC in the sae parameter.
852///
853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
854#[inline]
855#[target_feature(enable = "avx512fp16")]
856#[rustc_legacy_const_generics(2, 3)]
857#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
858pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
859 static_assert_uimm_bits!(IMM5, 5);
860 static_assert_sae!(SAE);
861 _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b)
862}
863
864/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
865/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
866/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
867///
868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
869#[inline]
870#[target_feature(enable = "avx512fp16")]
871#[rustc_legacy_const_generics(3, 4)]
872#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
873pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
874 k1: __mmask8,
875 a: __m128h,
876 b: __m128h,
877) -> __mmask8 {
878 unsafe {
879 static_assert_uimm_bits!(IMM5, 5);
880 static_assert_sae!(SAE);
881 vcmpsh(a, b, IMM5, mask:k1, SAE)
882 }
883}
884
885/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
886/// operand specified by imm8, and store the result in mask vector k.
887///
888/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
889#[inline]
890#[target_feature(enable = "avx512fp16")]
891#[rustc_legacy_const_generics(2)]
892#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
893pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
894 static_assert_uimm_bits!(IMM5, 5);
895 _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
896}
897
898/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
899/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
900///
901/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
902#[inline]
903#[target_feature(enable = "avx512fp16")]
904#[rustc_legacy_const_generics(3)]
905#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
906pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
907 static_assert_uimm_bits!(IMM5, 5);
908 _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
909}
910
911/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
912/// operand specified by imm8, and return the boolean result (0 or 1).
913/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
914///
915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
916#[inline]
917#[target_feature(enable = "avx512fp16")]
918#[rustc_legacy_const_generics(2, 3)]
919#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
920pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
921 unsafe {
922 static_assert_uimm_bits!(IMM5, 5);
923 static_assert_sae!(SAE);
924 vcomish(a, b, IMM5, SAE)
925 }
926}
927
928/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
929/// operand specified by imm8, and return the boolean result (0 or 1).
930///
931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
932#[inline]
933#[target_feature(enable = "avx512fp16")]
934#[rustc_legacy_const_generics(2)]
935#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
936pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
937 static_assert_uimm_bits!(IMM5, 5);
938 _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
939}
940
941/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
942/// the boolean result (0 or 1).
943///
944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
945#[inline]
946#[target_feature(enable = "avx512fp16")]
947#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
948pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
949 _mm_comi_sh::<_CMP_EQ_OS>(a, b)
950}
951
952/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
953/// and return the boolean result (0 or 1).
954///
955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
956#[inline]
957#[target_feature(enable = "avx512fp16")]
958#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
959pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
960 _mm_comi_sh::<_CMP_GE_OS>(a, b)
961}
962
963/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
964/// the boolean result (0 or 1).
965///
966/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
967#[inline]
968#[target_feature(enable = "avx512fp16")]
969#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
970pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
971 _mm_comi_sh::<_CMP_GT_OS>(a, b)
972}
973
974/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
975/// return the boolean result (0 or 1).
976///
977/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
978#[inline]
979#[target_feature(enable = "avx512fp16")]
980#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
981pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
982 _mm_comi_sh::<_CMP_LE_OS>(a, b)
983}
984
985/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
986/// the boolean result (0 or 1).
987///
988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
989#[inline]
990#[target_feature(enable = "avx512fp16")]
991#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
992pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
993 _mm_comi_sh::<_CMP_LT_OS>(a, b)
994}
995
996/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
997/// the boolean result (0 or 1).
998///
999/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1000#[inline]
1001#[target_feature(enable = "avx512fp16")]
1002#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1003pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1004 _mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1005}
1006
1007/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1008/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1009///
1010/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1011#[inline]
1012#[target_feature(enable = "avx512fp16")]
1013#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1014pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1015 _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1016}
1017
1018/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1019/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1020///
1021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1022#[inline]
1023#[target_feature(enable = "avx512fp16")]
1024#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1025pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1026 _mm_comi_sh::<_CMP_GE_OQ>(a, b)
1027}
1028
1029/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1030/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1031///
1032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1033#[inline]
1034#[target_feature(enable = "avx512fp16")]
1035#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1036pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1037 _mm_comi_sh::<_CMP_GT_OQ>(a, b)
1038}
1039
1040/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1041/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1042///
1043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1044#[inline]
1045#[target_feature(enable = "avx512fp16")]
1046#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1047pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1048 _mm_comi_sh::<_CMP_LE_OQ>(a, b)
1049}
1050
1051/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1052/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1053///
1054/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1055#[inline]
1056#[target_feature(enable = "avx512fp16")]
1057#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1058pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1059 _mm_comi_sh::<_CMP_LT_OQ>(a, b)
1060}
1061
1062/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1063/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1064///
1065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1066#[inline]
1067#[target_feature(enable = "avx512fp16")]
1068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1069pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1070 _mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1071}
1072
1073/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1074/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1075///
1076/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1077#[inline]
1078#[target_feature(enable = "avx512fp16,avx512vl")]
1079#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1080pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1081 *mem_addr.cast()
1082}
1083
1084/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1085/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1086///
1087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1088#[inline]
1089#[target_feature(enable = "avx512fp16,avx512vl")]
1090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1091pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1092 *mem_addr.cast()
1093}
1094
1095/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1096/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1097///
1098/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1099#[inline]
1100#[target_feature(enable = "avx512fp16")]
1101#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1102pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1103 *mem_addr.cast()
1104}
1105
1106/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1107/// and zero the upper elements
1108///
1109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1110#[inline]
1111#[target_feature(enable = "avx512fp16")]
1112#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1113pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1114 _mm_set_sh(*mem_addr)
1115}
1116
1117/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1118/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1119///
1120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1121#[inline]
1122#[target_feature(enable = "avx512fp16")]
1123#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1124pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1125 let mut dst: __m128h = src;
1126 asm!(
1127 vpl!("vmovsh {dst}{{{k}}}"),
1128 dst = inout(xmm_reg) dst,
1129 k = in(kreg) k,
1130 p = in(reg) mem_addr,
1131 options(pure, readonly, nostack, preserves_flags)
1132 );
1133 dst
1134}
1135
1136/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1137/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1138///
1139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1140#[inline]
1141#[target_feature(enable = "avx512fp16")]
1142#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1143pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1144 let mut dst: __m128h;
1145 asm!(
1146 vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1147 dst = out(xmm_reg) dst,
1148 k = in(kreg) k,
1149 p = in(reg) mem_addr,
1150 options(pure, readonly, nostack, preserves_flags)
1151 );
1152 dst
1153}
1154
1155/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1156/// a new vector. The address does not need to be aligned to any particular boundary.
1157///
1158/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1159#[inline]
1160#[target_feature(enable = "avx512fp16,avx512vl")]
1161#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1162pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1163 ptr::read_unaligned(src:mem_addr.cast())
1164}
1165
1166/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1167/// a new vector. The address does not need to be aligned to any particular boundary.
1168///
1169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1170#[inline]
1171#[target_feature(enable = "avx512fp16,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1173pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1174 ptr::read_unaligned(src:mem_addr.cast())
1175}
1176
1177/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1178/// a new vector. The address does not need to be aligned to any particular boundary.
1179///
1180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1181#[inline]
1182#[target_feature(enable = "avx512fp16")]
1183#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1184pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1185 ptr::read_unaligned(src:mem_addr.cast())
1186}
1187
1188/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1189/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1190/// 7 packed elements from a to the upper elements of dst.
1191///
1192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1193#[inline]
1194#[target_feature(enable = "avx512fp16")]
1195#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1196pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1197 unsafe {
1198 let mut mov: f16 = simd_extract!(src, 0);
1199 if (k & 1) != 0 {
1200 mov = simd_extract!(b, 0);
1201 }
1202 simd_insert!(a, 0, mov)
1203 }
1204}
1205
1206/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1207/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1208/// elements from a to the upper elements of dst.
1209///
1210/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1211#[inline]
1212#[target_feature(enable = "avx512fp16")]
1213#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1214pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1215 unsafe {
1216 let mut mov: f16 = 0.;
1217 if (k & 1) != 0 {
1218 mov = simd_extract!(b, 0);
1219 }
1220 simd_insert!(a, 0, mov)
1221 }
1222}
1223
1224/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1225/// and copy the upper 7 packed elements from a to the upper elements of dst.
1226///
1227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1228#[inline]
1229#[target_feature(enable = "avx512fp16")]
1230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1231pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1232 unsafe {
1233 let mov: f16 = simd_extract!(b, 0);
1234 simd_insert!(a, 0, mov)
1235 }
1236}
1237
1238/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1239/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1240///
1241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1242#[inline]
1243#[target_feature(enable = "avx512fp16,avx512vl")]
1244#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1245pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1246 *mem_addr.cast() = a;
1247}
1248
1249/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1250/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1251///
1252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1253#[inline]
1254#[target_feature(enable = "avx512fp16,avx512vl")]
1255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1256pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1257 *mem_addr.cast() = a;
1258}
1259
1260/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1261/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1262///
1263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1264#[inline]
1265#[target_feature(enable = "avx512fp16")]
1266#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1267pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1268 *mem_addr.cast() = a;
1269}
1270
1271/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1272///
1273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1274#[inline]
1275#[target_feature(enable = "avx512fp16")]
1276#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1277pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1278 *mem_addr = simd_extract!(a, 0);
1279}
1280
1281/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1282///
1283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1284#[inline]
1285#[target_feature(enable = "avx512fp16")]
1286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1287pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1288 asm!(
1289 vps!("vmovdqu16", "{{{k}}}, {src}"),
1290 p = in(reg) mem_addr,
1291 k = in(kreg) k,
1292 src = in(xmm_reg) a,
1293 options(nostack, preserves_flags)
1294 );
1295}
1296
1297/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1298/// The address does not need to be aligned to any particular boundary.
1299///
1300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1301#[inline]
1302#[target_feature(enable = "avx512fp16,avx512vl")]
1303#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1304pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1305 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1306}
1307
1308/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1309/// The address does not need to be aligned to any particular boundary.
1310///
1311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1312#[inline]
1313#[target_feature(enable = "avx512fp16,avx512vl")]
1314#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1315pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1316 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1317}
1318
1319/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1320/// The address does not need to be aligned to any particular boundary.
1321///
1322/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1323#[inline]
1324#[target_feature(enable = "avx512fp16")]
1325#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1326pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1327 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1328}
1329
1330/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1331///
1332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1333#[inline]
1334#[target_feature(enable = "avx512fp16,avx512vl")]
1335#[cfg_attr(test, assert_instr(vaddph))]
1336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1337pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1338 unsafe { simd_add(x:a, y:b) }
1339}
1340
1341/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1342/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1343///
1344/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1345#[inline]
1346#[target_feature(enable = "avx512fp16,avx512vl")]
1347#[cfg_attr(test, assert_instr(vaddph))]
1348#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1349pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1350 unsafe {
1351 let r: __m128h = _mm_add_ph(a, b);
1352 simd_select_bitmask(m:k, yes:r, no:src)
1353 }
1354}
1355
1356/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1357/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1360#[inline]
1361#[target_feature(enable = "avx512fp16,avx512vl")]
1362#[cfg_attr(test, assert_instr(vaddph))]
1363#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1364pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1365 unsafe {
1366 let r: __m128h = _mm_add_ph(a, b);
1367 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1368 }
1369}
1370
1371/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1372///
1373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1374#[inline]
1375#[target_feature(enable = "avx512fp16,avx512vl")]
1376#[cfg_attr(test, assert_instr(vaddph))]
1377#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1378pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1379 unsafe { simd_add(x:a, y:b) }
1380}
1381
1382/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1383/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1384///
1385/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1386#[inline]
1387#[target_feature(enable = "avx512fp16,avx512vl")]
1388#[cfg_attr(test, assert_instr(vaddph))]
1389#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1390pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1391 unsafe {
1392 let r: __m256h = _mm256_add_ph(a, b);
1393 simd_select_bitmask(m:k, yes:r, no:src)
1394 }
1395}
1396
1397/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1398/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1399///
1400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1401#[inline]
1402#[target_feature(enable = "avx512fp16,avx512vl")]
1403#[cfg_attr(test, assert_instr(vaddph))]
1404#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1405pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1406 unsafe {
1407 let r: __m256h = _mm256_add_ph(a, b);
1408 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1409 }
1410}
1411
1412/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1413///
1414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1415#[inline]
1416#[target_feature(enable = "avx512fp16")]
1417#[cfg_attr(test, assert_instr(vaddph))]
1418#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1419pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1420 unsafe { simd_add(x:a, y:b) }
1421}
1422
1423/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1424/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1425///
1426/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1427#[inline]
1428#[target_feature(enable = "avx512fp16")]
1429#[cfg_attr(test, assert_instr(vaddph))]
1430#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1431pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1432 unsafe {
1433 let r: __m512h = _mm512_add_ph(a, b);
1434 simd_select_bitmask(m:k, yes:r, no:src)
1435 }
1436}
1437
1438/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1439/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1440///
1441/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1442#[inline]
1443#[target_feature(enable = "avx512fp16")]
1444#[cfg_attr(test, assert_instr(vaddph))]
1445#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1446pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1447 unsafe {
1448 let r: __m512h = _mm512_add_ph(a, b);
1449 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1450 }
1451}
1452
1453/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1454/// Rounding is done according to the rounding parameter, which can be one of:
1455///
1456/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1457/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1458/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1459/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1460/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1461///
1462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1463#[inline]
1464#[target_feature(enable = "avx512fp16")]
1465#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1466#[rustc_legacy_const_generics(2)]
1467#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1468pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1469 unsafe {
1470 static_assert_rounding!(ROUNDING);
1471 vaddph(a, b, ROUNDING)
1472 }
1473}
1474
1475/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1476/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1477/// Rounding is done according to the rounding parameter, which can be one of:
1478///
1479/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1480/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1481/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1482/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1484///
1485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1486#[inline]
1487#[target_feature(enable = "avx512fp16")]
1488#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1489#[rustc_legacy_const_generics(4)]
1490#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1491pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1492 src: __m512h,
1493 k: __mmask32,
1494 a: __m512h,
1495 b: __m512h,
1496) -> __m512h {
1497 unsafe {
1498 static_assert_rounding!(ROUNDING);
1499 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1500 simd_select_bitmask(m:k, yes:r, no:src)
1501 }
1502}
1503
1504/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1505/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1506/// Rounding is done according to the rounding parameter, which can be one of:
1507///
1508/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1509/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1510/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1511/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1512///
1513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1514#[inline]
1515#[target_feature(enable = "avx512fp16")]
1516#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1517#[rustc_legacy_const_generics(3)]
1518#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1519pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1520 k: __mmask32,
1521 a: __m512h,
1522 b: __m512h,
1523) -> __m512h {
1524 unsafe {
1525 static_assert_rounding!(ROUNDING);
1526 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1527 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1528 }
1529}
1530
1531/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1532/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1533/// Rounding is done according to the rounding parameter, which can be one of:
1534///
1535/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1536/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1537/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1538/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1539/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1540///
1541/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1542#[inline]
1543#[target_feature(enable = "avx512fp16")]
1544#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1545#[rustc_legacy_const_generics(2)]
1546#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1547pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1548 static_assert_rounding!(ROUNDING);
1549 _mm_mask_add_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
1550}
1551
1552/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1553/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1554/// writemask k (the element is copied from src when mask bit 0 is not set).
1555/// Rounding is done according to the rounding parameter, which can be one of:
1556///
1557/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1558/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1559/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1560/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1561/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1562///
1563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1564#[inline]
1565#[target_feature(enable = "avx512fp16")]
1566#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1567#[rustc_legacy_const_generics(4)]
1568#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1569pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1570 src: __m128h,
1571 k: __mmask8,
1572 a: __m128h,
1573 b: __m128h,
1574) -> __m128h {
1575 unsafe {
1576 static_assert_rounding!(ROUNDING);
1577 vaddsh(a, b, src, k, ROUNDING)
1578 }
1579}
1580
1581/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1582/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1583/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1584/// Rounding is done according to the rounding parameter, which can be one of:
1585///
1586/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1587/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1588/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1589/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1590/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1591///
1592/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1593#[inline]
1594#[target_feature(enable = "avx512fp16")]
1595#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1596#[rustc_legacy_const_generics(3)]
1597#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1598pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1599 static_assert_rounding!(ROUNDING);
1600 _mm_mask_add_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
1601}
1602
1603/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1604/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1605///
1606/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1607#[inline]
1608#[target_feature(enable = "avx512fp16")]
1609#[cfg_attr(test, assert_instr(vaddsh))]
1610#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1611pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1612 _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1613}
1614
1615/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1616/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1617/// writemask k (the element is copied from src when mask bit 0 is not set).
1618///
1619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1620#[inline]
1621#[target_feature(enable = "avx512fp16")]
1622#[cfg_attr(test, assert_instr(vaddsh))]
1623#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1624pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1625 _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1626}
1627
1628/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1629/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1630/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1631///
1632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1633#[inline]
1634#[target_feature(enable = "avx512fp16")]
1635#[cfg_attr(test, assert_instr(vaddsh))]
1636#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1637pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1638 _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1639}
1640
1641/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1642///
1643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1644#[inline]
1645#[target_feature(enable = "avx512fp16,avx512vl")]
1646#[cfg_attr(test, assert_instr(vsubph))]
1647#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1648pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1649 unsafe { simd_sub(lhs:a, rhs:b) }
1650}
1651
1652/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1653/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1654///
1655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1656#[inline]
1657#[target_feature(enable = "avx512fp16,avx512vl")]
1658#[cfg_attr(test, assert_instr(vsubph))]
1659#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1660pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1661 unsafe {
1662 let r: __m128h = _mm_sub_ph(a, b);
1663 simd_select_bitmask(m:k, yes:r, no:src)
1664 }
1665}
1666
1667/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1668/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1671#[inline]
1672#[target_feature(enable = "avx512fp16,avx512vl")]
1673#[cfg_attr(test, assert_instr(vsubph))]
1674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1675pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1676 unsafe {
1677 let r: __m128h = _mm_sub_ph(a, b);
1678 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1679 }
1680}
1681
1682/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1683///
1684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1685#[inline]
1686#[target_feature(enable = "avx512fp16,avx512vl")]
1687#[cfg_attr(test, assert_instr(vsubph))]
1688#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1689pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1690 unsafe { simd_sub(lhs:a, rhs:b) }
1691}
1692
1693/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1694/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1695///
1696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1697#[inline]
1698#[target_feature(enable = "avx512fp16,avx512vl")]
1699#[cfg_attr(test, assert_instr(vsubph))]
1700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1701pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1702 unsafe {
1703 let r: __m256h = _mm256_sub_ph(a, b);
1704 simd_select_bitmask(m:k, yes:r, no:src)
1705 }
1706}
1707
1708/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1709/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1710///
1711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1712#[inline]
1713#[target_feature(enable = "avx512fp16,avx512vl")]
1714#[cfg_attr(test, assert_instr(vsubph))]
1715#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1716pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1717 unsafe {
1718 let r: __m256h = _mm256_sub_ph(a, b);
1719 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1720 }
1721}
1722
1723/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1724///
1725/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1726#[inline]
1727#[target_feature(enable = "avx512fp16")]
1728#[cfg_attr(test, assert_instr(vsubph))]
1729#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1730pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1731 unsafe { simd_sub(lhs:a, rhs:b) }
1732}
1733
1734/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1735/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1736///
1737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1738#[inline]
1739#[target_feature(enable = "avx512fp16")]
1740#[cfg_attr(test, assert_instr(vsubph))]
1741#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1742pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1743 unsafe {
1744 let r: __m512h = _mm512_sub_ph(a, b);
1745 simd_select_bitmask(m:k, yes:r, no:src)
1746 }
1747}
1748
1749/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1750/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1751///
1752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1753#[inline]
1754#[target_feature(enable = "avx512fp16")]
1755#[cfg_attr(test, assert_instr(vsubph))]
1756#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1757pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1758 unsafe {
1759 let r: __m512h = _mm512_sub_ph(a, b);
1760 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1761 }
1762}
1763
1764/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1765/// Rounding is done according to the rounding parameter, which can be one of:
1766///
1767/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1768/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1769/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1770/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1771/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1772///
1773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1774#[inline]
1775#[target_feature(enable = "avx512fp16")]
1776#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1777#[rustc_legacy_const_generics(2)]
1778#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1779pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1780 unsafe {
1781 static_assert_rounding!(ROUNDING);
1782 vsubph(a, b, ROUNDING)
1783 }
1784}
1785
1786/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1787/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1788/// Rounding is done according to the rounding parameter, which can be one of:
1789///
1790/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1791/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1792/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1793/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1794/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1795///
1796/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1797#[inline]
1798#[target_feature(enable = "avx512fp16")]
1799#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1800#[rustc_legacy_const_generics(4)]
1801#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1802pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1803 src: __m512h,
1804 k: __mmask32,
1805 a: __m512h,
1806 b: __m512h,
1807) -> __m512h {
1808 unsafe {
1809 static_assert_rounding!(ROUNDING);
1810 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1811 simd_select_bitmask(m:k, yes:r, no:src)
1812 }
1813}
1814
1815/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1816/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1817/// Rounding is done according to the rounding parameter, which can be one of:
1818///
1819/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1820/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1821/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1822/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1823/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1824///
1825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1826#[inline]
1827#[target_feature(enable = "avx512fp16")]
1828#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1829#[rustc_legacy_const_generics(3)]
1830#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1831pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1832 k: __mmask32,
1833 a: __m512h,
1834 b: __m512h,
1835) -> __m512h {
1836 unsafe {
1837 static_assert_rounding!(ROUNDING);
1838 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1839 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1840 }
1841}
1842
1843/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1844/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1845/// Rounding is done according to the rounding parameter, which can be one of:
1846///
1847/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1848/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1849/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1850/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1851/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1852///
1853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1854#[inline]
1855#[target_feature(enable = "avx512fp16")]
1856#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1857#[rustc_legacy_const_generics(2)]
1858#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1859pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1860 static_assert_rounding!(ROUNDING);
1861 _mm_mask_sub_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
1862}
1863
1864/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1865/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1866/// writemask k (the element is copied from src when mask bit 0 is not set).
1867/// Rounding is done according to the rounding parameter, which can be one of:
1868///
1869/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1870/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1871/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1872/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1873/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1874///
1875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1876#[inline]
1877#[target_feature(enable = "avx512fp16")]
1878#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1879#[rustc_legacy_const_generics(4)]
1880#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1881pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1882 src: __m128h,
1883 k: __mmask8,
1884 a: __m128h,
1885 b: __m128h,
1886) -> __m128h {
1887 unsafe {
1888 static_assert_rounding!(ROUNDING);
1889 vsubsh(a, b, src, k, ROUNDING)
1890 }
1891}
1892
1893/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1894/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1895/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1896/// Rounding is done according to the rounding parameter, which can be one of:
1897///
1898/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1899/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1900/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1901/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1902/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1903///
1904/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
1905#[inline]
1906#[target_feature(enable = "avx512fp16")]
1907#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1908#[rustc_legacy_const_generics(3)]
1909#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1910pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1911 static_assert_rounding!(ROUNDING);
1912 _mm_mask_sub_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
1913}
1914
1915/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1916/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1917///
1918/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
1919#[inline]
1920#[target_feature(enable = "avx512fp16")]
1921#[cfg_attr(test, assert_instr(vsubsh))]
1922#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1923pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
1924 _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1925}
1926
1927/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1928/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1929/// writemask k (the element is copied from src when mask bit 0 is not set).
1930///
1931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
1932#[inline]
1933#[target_feature(enable = "avx512fp16")]
1934#[cfg_attr(test, assert_instr(vsubsh))]
1935#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1936pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1937 _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1938}
1939
1940/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1941/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1942/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1943///
1944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
1945#[inline]
1946#[target_feature(enable = "avx512fp16")]
1947#[cfg_attr(test, assert_instr(vsubsh))]
1948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1949pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1950 _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1951}
1952
1953/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1954///
1955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
1956#[inline]
1957#[target_feature(enable = "avx512fp16,avx512vl")]
1958#[cfg_attr(test, assert_instr(vmulph))]
1959#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1960pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
1961 unsafe { simd_mul(x:a, y:b) }
1962}
1963
1964/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1965/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1966///
1967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
1968#[inline]
1969#[target_feature(enable = "avx512fp16,avx512vl")]
1970#[cfg_attr(test, assert_instr(vmulph))]
1971#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1972pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1973 unsafe {
1974 let r: __m128h = _mm_mul_ph(a, b);
1975 simd_select_bitmask(m:k, yes:r, no:src)
1976 }
1977}
1978
1979/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1980/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1981///
1982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
1983#[inline]
1984#[target_feature(enable = "avx512fp16,avx512vl")]
1985#[cfg_attr(test, assert_instr(vmulph))]
1986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1987pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1988 unsafe {
1989 let r: __m128h = _mm_mul_ph(a, b);
1990 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1991 }
1992}
1993
1994/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1995///
1996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
1997#[inline]
1998#[target_feature(enable = "avx512fp16,avx512vl")]
1999#[cfg_attr(test, assert_instr(vmulph))]
2000#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2001pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2002 unsafe { simd_mul(x:a, y:b) }
2003}
2004
2005/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2006/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2007///
2008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2009#[inline]
2010#[target_feature(enable = "avx512fp16,avx512vl")]
2011#[cfg_attr(test, assert_instr(vmulph))]
2012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2013pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2014 unsafe {
2015 let r: __m256h = _mm256_mul_ph(a, b);
2016 simd_select_bitmask(m:k, yes:r, no:src)
2017 }
2018}
2019
2020/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2021/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2022///
2023/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2024#[inline]
2025#[target_feature(enable = "avx512fp16,avx512vl")]
2026#[cfg_attr(test, assert_instr(vmulph))]
2027#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2028pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2029 unsafe {
2030 let r: __m256h = _mm256_mul_ph(a, b);
2031 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2032 }
2033}
2034
2035/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2036///
2037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2038#[inline]
2039#[target_feature(enable = "avx512fp16")]
2040#[cfg_attr(test, assert_instr(vmulph))]
2041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2042pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2043 unsafe { simd_mul(x:a, y:b) }
2044}
2045
2046/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2047/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2048///
2049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2050#[inline]
2051#[target_feature(enable = "avx512fp16")]
2052#[cfg_attr(test, assert_instr(vmulph))]
2053#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2054pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2055 unsafe {
2056 let r: __m512h = _mm512_mul_ph(a, b);
2057 simd_select_bitmask(m:k, yes:r, no:src)
2058 }
2059}
2060
2061/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2062/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2063///
2064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2065#[inline]
2066#[target_feature(enable = "avx512fp16")]
2067#[cfg_attr(test, assert_instr(vmulph))]
2068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2069pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2070 unsafe {
2071 let r: __m512h = _mm512_mul_ph(a, b);
2072 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2073 }
2074}
2075
2076/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2077/// Rounding is done according to the rounding parameter, which can be one of:
2078///
2079/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2080/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2081/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2082/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2083/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2084///
2085/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2086#[inline]
2087#[target_feature(enable = "avx512fp16")]
2088#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2089#[rustc_legacy_const_generics(2)]
2090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2091pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2092 unsafe {
2093 static_assert_rounding!(ROUNDING);
2094 vmulph(a, b, ROUNDING)
2095 }
2096}
2097
2098/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2099/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2100/// Rounding is done according to the rounding parameter, which can be one of:
2101///
2102/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2103/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2104/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2105/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2106/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2107///
2108/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2109#[inline]
2110#[target_feature(enable = "avx512fp16")]
2111#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2112#[rustc_legacy_const_generics(4)]
2113#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2114pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2115 src: __m512h,
2116 k: __mmask32,
2117 a: __m512h,
2118 b: __m512h,
2119) -> __m512h {
2120 unsafe {
2121 static_assert_rounding!(ROUNDING);
2122 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2123 simd_select_bitmask(m:k, yes:r, no:src)
2124 }
2125}
2126
2127/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2128/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2129/// Rounding is done according to the rounding parameter, which can be one of:
2130///
2131/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2132/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2133/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2134/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2135/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2136///
2137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2138#[inline]
2139#[target_feature(enable = "avx512fp16")]
2140#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2141#[rustc_legacy_const_generics(3)]
2142#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2143pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2144 k: __mmask32,
2145 a: __m512h,
2146 b: __m512h,
2147) -> __m512h {
2148 unsafe {
2149 static_assert_rounding!(ROUNDING);
2150 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2151 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2152 }
2153}
2154
2155/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2156/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2157/// Rounding is done according to the rounding parameter, which can be one of:
2158///
2159/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2160/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2161/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2162/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2163/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2164///
2165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2166#[inline]
2167#[target_feature(enable = "avx512fp16")]
2168#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2169#[rustc_legacy_const_generics(2)]
2170#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2171pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2172 static_assert_rounding!(ROUNDING);
2173 _mm_mask_mul_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
2174}
2175
2176/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2177/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2178/// writemask k (the element is copied from src when mask bit 0 is not set).
2179/// Rounding is done according to the rounding parameter, which can be one of:
2180///
2181/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2182/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2183/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2184/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2185/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2186///
2187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2188#[inline]
2189#[target_feature(enable = "avx512fp16")]
2190#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2191#[rustc_legacy_const_generics(4)]
2192#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2193pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2194 src: __m128h,
2195 k: __mmask8,
2196 a: __m128h,
2197 b: __m128h,
2198) -> __m128h {
2199 unsafe {
2200 static_assert_rounding!(ROUNDING);
2201 vmulsh(a, b, src, k, ROUNDING)
2202 }
2203}
2204
2205/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2206/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2207/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2208/// Rounding is done according to the rounding parameter, which can be one of:
2209///
2210/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2211/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2212/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2213/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2214/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2215///
2216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2217#[inline]
2218#[target_feature(enable = "avx512fp16")]
2219#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2220#[rustc_legacy_const_generics(3)]
2221#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2222pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2223 static_assert_rounding!(ROUNDING);
2224 _mm_mask_mul_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2225}
2226
2227/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2228/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2229///
2230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2231#[inline]
2232#[target_feature(enable = "avx512fp16")]
2233#[cfg_attr(test, assert_instr(vmulsh))]
2234#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2235pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2236 _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2237}
2238
2239/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2240/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2241/// writemask k (the element is copied from src when mask bit 0 is not set).
2242///
2243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2244#[inline]
2245#[target_feature(enable = "avx512fp16")]
2246#[cfg_attr(test, assert_instr(vmulsh))]
2247#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2248pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2249 _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2250}
2251
2252/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2253/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2254/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2255///
2256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2257#[inline]
2258#[target_feature(enable = "avx512fp16")]
2259#[cfg_attr(test, assert_instr(vmulsh))]
2260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2261pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2262 _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2263}
2264
2265/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2266///
2267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2268#[inline]
2269#[target_feature(enable = "avx512fp16,avx512vl")]
2270#[cfg_attr(test, assert_instr(vdivph))]
2271#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2272pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2273 unsafe { simd_div(lhs:a, rhs:b) }
2274}
2275
2276/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2277/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2278///
2279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2280#[inline]
2281#[target_feature(enable = "avx512fp16,avx512vl")]
2282#[cfg_attr(test, assert_instr(vdivph))]
2283#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2284pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2285 unsafe {
2286 let r: __m128h = _mm_div_ph(a, b);
2287 simd_select_bitmask(m:k, yes:r, no:src)
2288 }
2289}
2290
2291/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2292/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2293///
2294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2295#[inline]
2296#[target_feature(enable = "avx512fp16,avx512vl")]
2297#[cfg_attr(test, assert_instr(vdivph))]
2298#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2299pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2300 unsafe {
2301 let r: __m128h = _mm_div_ph(a, b);
2302 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2303 }
2304}
2305
2306/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2307///
2308/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2309#[inline]
2310#[target_feature(enable = "avx512fp16,avx512vl")]
2311#[cfg_attr(test, assert_instr(vdivph))]
2312#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2313pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2314 unsafe { simd_div(lhs:a, rhs:b) }
2315}
2316
2317/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2318/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2319///
2320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2321#[inline]
2322#[target_feature(enable = "avx512fp16,avx512vl")]
2323#[cfg_attr(test, assert_instr(vdivph))]
2324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2325pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2326 unsafe {
2327 let r: __m256h = _mm256_div_ph(a, b);
2328 simd_select_bitmask(m:k, yes:r, no:src)
2329 }
2330}
2331
2332/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2333/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2334///
2335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2336#[inline]
2337#[target_feature(enable = "avx512fp16,avx512vl")]
2338#[cfg_attr(test, assert_instr(vdivph))]
2339#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2340pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2341 unsafe {
2342 let r: __m256h = _mm256_div_ph(a, b);
2343 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2344 }
2345}
2346
2347/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2348///
2349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2350#[inline]
2351#[target_feature(enable = "avx512fp16")]
2352#[cfg_attr(test, assert_instr(vdivph))]
2353#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2354pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2355 unsafe { simd_div(lhs:a, rhs:b) }
2356}
2357
2358/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2359/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2362#[inline]
2363#[target_feature(enable = "avx512fp16")]
2364#[cfg_attr(test, assert_instr(vdivph))]
2365#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2366pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2367 unsafe {
2368 let r: __m512h = _mm512_div_ph(a, b);
2369 simd_select_bitmask(m:k, yes:r, no:src)
2370 }
2371}
2372
2373/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2374/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2375///
2376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2377#[inline]
2378#[target_feature(enable = "avx512fp16")]
2379#[cfg_attr(test, assert_instr(vdivph))]
2380#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2381pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2382 unsafe {
2383 let r: __m512h = _mm512_div_ph(a, b);
2384 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2385 }
2386}
2387
2388/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2389/// Rounding is done according to the rounding parameter, which can be one of:
2390///
2391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2396///
2397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2398#[inline]
2399#[target_feature(enable = "avx512fp16")]
2400#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2401#[rustc_legacy_const_generics(2)]
2402#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2403pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2404 unsafe {
2405 static_assert_rounding!(ROUNDING);
2406 vdivph(a, b, ROUNDING)
2407 }
2408}
2409
2410/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2411/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2412/// Rounding is done according to the rounding parameter, which can be one of:
2413///
2414/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2415/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2416/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2417/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2418/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2419///
2420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2421#[inline]
2422#[target_feature(enable = "avx512fp16")]
2423#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2424#[rustc_legacy_const_generics(4)]
2425#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2426pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2427 src: __m512h,
2428 k: __mmask32,
2429 a: __m512h,
2430 b: __m512h,
2431) -> __m512h {
2432 unsafe {
2433 static_assert_rounding!(ROUNDING);
2434 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2435 simd_select_bitmask(m:k, yes:r, no:src)
2436 }
2437}
2438
2439/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2440/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441/// Rounding is done according to the rounding parameter, which can be one of:
2442///
2443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2448///
2449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2450#[inline]
2451#[target_feature(enable = "avx512fp16")]
2452#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2453#[rustc_legacy_const_generics(3)]
2454#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2455pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2456 k: __mmask32,
2457 a: __m512h,
2458 b: __m512h,
2459) -> __m512h {
2460 unsafe {
2461 static_assert_rounding!(ROUNDING);
2462 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2463 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2464 }
2465}
2466
2467/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2468/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2469/// Rounding is done according to the rounding parameter, which can be one of:
2470///
2471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2476///
2477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2478#[inline]
2479#[target_feature(enable = "avx512fp16")]
2480#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2481#[rustc_legacy_const_generics(2)]
2482#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2483pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2484 static_assert_rounding!(ROUNDING);
2485 _mm_mask_div_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
2486}
2487
2488/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2489/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2490/// writemask k (the element is copied from src when mask bit 0 is not set).
2491/// Rounding is done according to the rounding parameter, which can be one of:
2492///
2493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2498///
2499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2500#[inline]
2501#[target_feature(enable = "avx512fp16")]
2502#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2503#[rustc_legacy_const_generics(4)]
2504#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2505pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2506 src: __m128h,
2507 k: __mmask8,
2508 a: __m128h,
2509 b: __m128h,
2510) -> __m128h {
2511 unsafe {
2512 static_assert_rounding!(ROUNDING);
2513 vdivsh(a, b, src, k, ROUNDING)
2514 }
2515}
2516
2517/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2518/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2519/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2520/// Rounding is done according to the rounding parameter, which can be one of:
2521///
2522/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2523/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2524/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2525/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2526/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2527///
2528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2529#[inline]
2530#[target_feature(enable = "avx512fp16")]
2531#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2532#[rustc_legacy_const_generics(3)]
2533#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2534pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2535 static_assert_rounding!(ROUNDING);
2536 _mm_mask_div_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2537}
2538
2539/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2540/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2541///
2542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2543#[inline]
2544#[target_feature(enable = "avx512fp16")]
2545#[cfg_attr(test, assert_instr(vdivsh))]
2546#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2547pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2548 _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2549}
2550
2551/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2552/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2553/// writemask k (the element is copied from src when mask bit 0 is not set).
2554///
2555/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2556#[inline]
2557#[target_feature(enable = "avx512fp16")]
2558#[cfg_attr(test, assert_instr(vdivsh))]
2559#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2560pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2561 _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2562}
2563
2564/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2565/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2566/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2567///
2568/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2569#[inline]
2570#[target_feature(enable = "avx512fp16")]
2571#[cfg_attr(test, assert_instr(vdivsh))]
2572#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2573pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2574 _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2575}
2576
2577/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2578/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2579/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2580///
2581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2582#[inline]
2583#[target_feature(enable = "avx512fp16,avx512vl")]
2584#[cfg_attr(test, assert_instr(vfmulcph))]
2585#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2586pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2587 _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
2588}
2589
2590/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2591/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2592/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2593///
2594/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2595#[inline]
2596#[target_feature(enable = "avx512fp16,avx512vl")]
2597#[cfg_attr(test, assert_instr(vfmulcph))]
2598#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2599pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2600 unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2601}
2602
2603/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2604/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2605/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2606///
2607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2608#[inline]
2609#[target_feature(enable = "avx512fp16,avx512vl")]
2610#[cfg_attr(test, assert_instr(vfmulcph))]
2611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2612pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2613 _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2614}
2615
2616/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2617/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2618/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2619///
2620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2621#[inline]
2622#[target_feature(enable = "avx512fp16,avx512vl")]
2623#[cfg_attr(test, assert_instr(vfmulcph))]
2624#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2625pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2626 _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
2627}
2628
2629/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2630/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2631/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2632///
2633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2634#[inline]
2635#[target_feature(enable = "avx512fp16,avx512vl")]
2636#[cfg_attr(test, assert_instr(vfmulcph))]
2637#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2638pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2639 unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2640}
2641
2642/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2643/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2644/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2645///
2646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2647#[inline]
2648#[target_feature(enable = "avx512fp16,avx512vl")]
2649#[cfg_attr(test, assert_instr(vfmulcph))]
2650#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2651pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2652 _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2653}
2654
2655/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2656/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2657/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2658///
2659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2660#[inline]
2661#[target_feature(enable = "avx512fp16")]
2662#[cfg_attr(test, assert_instr(vfmulcph))]
2663#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2664pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2665 _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
2666}
2667
2668/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2669/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2670/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2671///
2672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2673#[inline]
2674#[target_feature(enable = "avx512fp16")]
2675#[cfg_attr(test, assert_instr(vfmulcph))]
2676#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2677pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2678 _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2679}
2680
2681/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2682/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2683/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2684///
2685/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2686#[inline]
2687#[target_feature(enable = "avx512fp16")]
2688#[cfg_attr(test, assert_instr(vfmulcph))]
2689#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2690pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2691 _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2692}
2693
2694/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2695/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2696/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2697///
2698/// Rounding is done according to the rounding parameter, which can be one of:
2699///
2700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2705///
2706/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2707#[inline]
2708#[target_feature(enable = "avx512fp16")]
2709#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2710#[rustc_legacy_const_generics(2)]
2711#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2712pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2713 static_assert_rounding!(ROUNDING);
2714 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
2715}
2716
2717/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2718/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2719/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2720///
2721/// Rounding is done according to the rounding parameter, which can be one of:
2722///
2723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2728///
2729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2730#[inline]
2731#[target_feature(enable = "avx512fp16")]
2732#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2733#[rustc_legacy_const_generics(4)]
2734#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2735pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2736 src: __m512h,
2737 k: __mmask16,
2738 a: __m512h,
2739 b: __m512h,
2740) -> __m512h {
2741 unsafe {
2742 static_assert_rounding!(ROUNDING);
2743 transmute(src:vfmulcph_512(
2744 a:transmute(a),
2745 b:transmute(b),
2746 src:transmute(src),
2747 k,
2748 ROUNDING,
2749 ))
2750 }
2751}
2752
2753/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2754/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2755/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2756///
2757/// Rounding is done according to the rounding parameter, which can be one of:
2758///
2759/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2760/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2761/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2762/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2763/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2764///
2765/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2766#[inline]
2767#[target_feature(enable = "avx512fp16")]
2768#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2769#[rustc_legacy_const_generics(3)]
2770#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2771pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2772 k: __mmask16,
2773 a: __m512h,
2774 b: __m512h,
2775) -> __m512h {
2776 static_assert_rounding!(ROUNDING);
2777 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2778}
2779
2780/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2781/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2782/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2783/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2784///
2785/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2786#[inline]
2787#[target_feature(enable = "avx512fp16")]
2788#[cfg_attr(test, assert_instr(vfmulcsh))]
2789#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2790pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2791 _mm_mask_mul_sch(src:_mm_undefined_ph(), k:0xff, a, b)
2792}
2793
2794/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2795/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2796/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2797/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2798///
2799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2800#[inline]
2801#[target_feature(enable = "avx512fp16")]
2802#[cfg_attr(test, assert_instr(vfmulcsh))]
2803#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2804pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2805 _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2806}
2807
2808/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2809/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2810/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2811/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2812///
2813/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2814#[inline]
2815#[target_feature(enable = "avx512fp16")]
2816#[cfg_attr(test, assert_instr(vfmulcsh))]
2817#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2818pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2819 _mm_mask_mul_sch(src:_mm_setzero_ph(), k, a, b)
2820}
2821
2822/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2823/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2824/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2825/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2826///
2827/// Rounding is done according to the rounding parameter, which can be one of:
2828///
2829/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2830/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2831/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2832/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2833/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2834///
2835/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
2836#[inline]
2837#[target_feature(enable = "avx512fp16")]
2838#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2839#[rustc_legacy_const_generics(2)]
2840#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2841pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2842 static_assert_rounding!(ROUNDING);
2843 _mm_mask_mul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
2844}
2845
2846/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2847/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2848/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2849/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2850///
2851/// Rounding is done according to the rounding parameter, which can be one of:
2852///
2853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2858///
2859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
2860#[inline]
2861#[target_feature(enable = "avx512fp16")]
2862#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2863#[rustc_legacy_const_generics(4)]
2864#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2865pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
2866 src: __m128h,
2867 k: __mmask8,
2868 a: __m128h,
2869 b: __m128h,
2870) -> __m128h {
2871 unsafe {
2872 static_assert_rounding!(ROUNDING);
2873 transmute(src:vfmulcsh(
2874 a:transmute(a),
2875 b:transmute(b),
2876 src:transmute(src),
2877 k,
2878 ROUNDING,
2879 ))
2880 }
2881}
2882
2883/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2884/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2885/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2886/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2887///
2888/// Rounding is done according to the rounding parameter, which can be one of:
2889///
2890/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2891/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2892/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2893/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2895///
2896/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
2897#[inline]
2898#[target_feature(enable = "avx512fp16")]
2899#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2900#[rustc_legacy_const_generics(3)]
2901#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2902pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
2903 k: __mmask8,
2904 a: __m128h,
2905 b: __m128h,
2906) -> __m128h {
2907 static_assert_rounding!(ROUNDING);
2908 _mm_mask_mul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
2909}
2910
2911/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2912/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2913/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2914///
2915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
2916#[inline]
2917#[target_feature(enable = "avx512fp16,avx512vl")]
2918#[cfg_attr(test, assert_instr(vfmulcph))]
2919#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2920pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
2921 _mm_mul_pch(a, b)
2922}
2923
2924/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2925/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2926/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2927///
2928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
2929#[inline]
2930#[target_feature(enable = "avx512fp16,avx512vl")]
2931#[cfg_attr(test, assert_instr(vfmulcph))]
2932#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2933pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2934 _mm_mask_mul_pch(src, k, a, b)
2935}
2936
2937/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2938/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2939/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2940///
2941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
2942#[inline]
2943#[target_feature(enable = "avx512fp16,avx512vl")]
2944#[cfg_attr(test, assert_instr(vfmulcph))]
2945#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2946pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2947 _mm_maskz_mul_pch(k, a, b)
2948}
2949
2950/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2951/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2952/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2953///
2954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
2955#[inline]
2956#[target_feature(enable = "avx512fp16,avx512vl")]
2957#[cfg_attr(test, assert_instr(vfmulcph))]
2958#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2959pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
2960 _mm256_mul_pch(a, b)
2961}
2962
2963/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2964/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2965/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2966///
2967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
2968#[inline]
2969#[target_feature(enable = "avx512fp16,avx512vl")]
2970#[cfg_attr(test, assert_instr(vfmulcph))]
2971#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2972pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2973 _mm256_mask_mul_pch(src, k, a, b)
2974}
2975
2976/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2977/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2978/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2979///
2980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
2981#[inline]
2982#[target_feature(enable = "avx512fp16,avx512vl")]
2983#[cfg_attr(test, assert_instr(vfmulcph))]
2984#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2985pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2986 _mm256_maskz_mul_pch(k, a, b)
2987}
2988
2989/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
2990/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2991///
2992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
2993#[inline]
2994#[target_feature(enable = "avx512fp16")]
2995#[cfg_attr(test, assert_instr(vfmulcph))]
2996#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2997pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
2998 _mm512_mul_pch(a, b)
2999}
3000
3001/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3002/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3003/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3004///
3005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3006#[inline]
3007#[target_feature(enable = "avx512fp16")]
3008#[cfg_attr(test, assert_instr(vfmulcph))]
3009#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3010pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3011 _mm512_mask_mul_pch(src, k, a, b)
3012}
3013
3014/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3015/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3016/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3017///
3018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3019#[inline]
3020#[target_feature(enable = "avx512fp16")]
3021#[cfg_attr(test, assert_instr(vfmulcph))]
3022#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3023pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3024 _mm512_maskz_mul_pch(k, a, b)
3025}
3026
3027/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3028/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3029/// Rounding is done according to the rounding parameter, which can be one of:
3030///
3031/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3032/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3033/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3034/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3035/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3036///
3037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3038#[inline]
3039#[target_feature(enable = "avx512fp16")]
3040#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3041#[rustc_legacy_const_generics(2)]
3042#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3043pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3044 static_assert_rounding!(ROUNDING);
3045 _mm512_mul_round_pch::<ROUNDING>(a, b)
3046}
3047
3048/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3049/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3050/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3051/// Rounding is done according to the rounding parameter, which can be one of:
3052///
3053/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3054/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3055/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3056/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3057/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3058///
3059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3060#[inline]
3061#[target_feature(enable = "avx512fp16")]
3062#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3063#[rustc_legacy_const_generics(4)]
3064#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3065pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3066 src: __m512h,
3067 k: __mmask16,
3068 a: __m512h,
3069 b: __m512h,
3070) -> __m512h {
3071 static_assert_rounding!(ROUNDING);
3072 _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3073}
3074
3075/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3076/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3077/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3078/// Rounding is done according to the rounding parameter, which can be one of:
3079///
3080/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3081/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3082/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3083/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3084/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3085///
3086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3087#[inline]
3088#[target_feature(enable = "avx512fp16")]
3089#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3090#[rustc_legacy_const_generics(3)]
3091#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3092pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3093 k: __mmask16,
3094 a: __m512h,
3095 b: __m512h,
3096) -> __m512h {
3097 static_assert_rounding!(ROUNDING);
3098 _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3099}
3100
3101/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3102/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3103/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3104///
3105/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3106#[inline]
3107#[target_feature(enable = "avx512fp16")]
3108#[cfg_attr(test, assert_instr(vfmulcsh))]
3109#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3110pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3111 _mm_mul_sch(a, b)
3112}
3113
3114/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3115/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3116/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3117///
3118/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3119#[inline]
3120#[target_feature(enable = "avx512fp16")]
3121#[cfg_attr(test, assert_instr(vfmulcsh))]
3122#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3123pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3124 _mm_mask_mul_sch(src, k, a, b)
3125}
3126
3127/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3128/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3129/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3130///
3131/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3132#[inline]
3133#[target_feature(enable = "avx512fp16")]
3134#[cfg_attr(test, assert_instr(vfmulcsh))]
3135#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3136pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3137 _mm_maskz_mul_sch(k, a, b)
3138}
3139
3140/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3141/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3142///
3143/// Rounding is done according to the rounding parameter, which can be one of:
3144///
3145/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3146/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3147/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3148/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3149/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3150///
3151/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3152#[inline]
3153#[target_feature(enable = "avx512fp16")]
3154#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3155#[rustc_legacy_const_generics(2)]
3156#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3157pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3158 static_assert_rounding!(ROUNDING);
3159 _mm_mul_round_sch::<ROUNDING>(a, b)
3160}
3161
3162/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3163/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3164/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3165///
3166/// Rounding is done according to the rounding parameter, which can be one of:
3167///
3168/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3169/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3170/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3171/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3172/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3173///
3174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3175#[inline]
3176#[target_feature(enable = "avx512fp16")]
3177#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3178#[rustc_legacy_const_generics(4)]
3179#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3180pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3181 src: __m128h,
3182 k: __mmask8,
3183 a: __m128h,
3184 b: __m128h,
3185) -> __m128h {
3186 static_assert_rounding!(ROUNDING);
3187 _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3188}
3189
3190/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3191/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3192/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3193///
3194/// Rounding is done according to the rounding parameter, which can be one of:
3195///
3196/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3197/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3198/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3199/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3200/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3201///
3202/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3203#[inline]
3204#[target_feature(enable = "avx512fp16")]
3205#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3206#[rustc_legacy_const_generics(3)]
3207#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3208pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3209 k: __mmask8,
3210 a: __m128h,
3211 b: __m128h,
3212) -> __m128h {
3213 static_assert_rounding!(ROUNDING);
3214 _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3215}
3216
3217/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3218/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3219/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3220/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3221///
3222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3223#[inline]
3224#[target_feature(enable = "avx512fp16,avx512vl")]
3225#[cfg_attr(test, assert_instr(vfcmulcph))]
3226#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3227pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3228 _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
3229}
3230
3231/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3232/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3233/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3234/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3235///
3236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3237#[inline]
3238#[target_feature(enable = "avx512fp16,avx512vl")]
3239#[cfg_attr(test, assert_instr(vfcmulcph))]
3240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3241pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3242 unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3243}
3244
3245/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3246/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3247/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3248/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3249///
3250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3251#[inline]
3252#[target_feature(enable = "avx512fp16,avx512vl")]
3253#[cfg_attr(test, assert_instr(vfcmulcph))]
3254#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3255pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3256 _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3257}
3258
3259/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3260/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3261/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3262/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3263///
3264/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3265#[inline]
3266#[target_feature(enable = "avx512fp16,avx512vl")]
3267#[cfg_attr(test, assert_instr(vfcmulcph))]
3268#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3269pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3270 _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
3271}
3272
3273/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3274/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3275/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3276/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3277///
3278/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3279#[inline]
3280#[target_feature(enable = "avx512fp16,avx512vl")]
3281#[cfg_attr(test, assert_instr(vfcmulcph))]
3282#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3283pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3284 unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3285}
3286
3287/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3288/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3289/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3290/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3291///
3292/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3293#[inline]
3294#[target_feature(enable = "avx512fp16,avx512vl")]
3295#[cfg_attr(test, assert_instr(vfcmulcph))]
3296#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3297pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3298 _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3299}
3300
3301/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3302/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3303/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3304/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3305///
3306/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3307#[inline]
3308#[target_feature(enable = "avx512fp16")]
3309#[cfg_attr(test, assert_instr(vfcmulcph))]
3310#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3311pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3312 _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
3313}
3314
3315/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3316/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3317/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3318/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3319///
3320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3321#[inline]
3322#[target_feature(enable = "avx512fp16")]
3323#[cfg_attr(test, assert_instr(vfcmulcph))]
3324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3325pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3326 _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3327}
3328
3329/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3330/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3331/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3332/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3333///
3334/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3335#[inline]
3336#[target_feature(enable = "avx512fp16")]
3337#[cfg_attr(test, assert_instr(vfcmulcph))]
3338#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3339pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3340 _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3341}
3342
3343/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3344/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3345/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3346/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3347///
3348/// Rounding is done according to the rounding parameter, which can be one of:
3349///
3350/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3351/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3352/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3353/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3354/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3355///
3356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3357#[inline]
3358#[target_feature(enable = "avx512fp16")]
3359#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3360#[rustc_legacy_const_generics(2)]
3361#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3362pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3363 static_assert_rounding!(ROUNDING);
3364 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
3365}
3366
3367/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3368/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3369/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3370/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3371///
3372/// Rounding is done according to the rounding parameter, which can be one of:
3373///
3374/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3375/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3376/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3377/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3378/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3379///
3380/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3381#[inline]
3382#[target_feature(enable = "avx512fp16")]
3383#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3384#[rustc_legacy_const_generics(4)]
3385#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3386pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3387 src: __m512h,
3388 k: __mmask16,
3389 a: __m512h,
3390 b: __m512h,
3391) -> __m512h {
3392 unsafe {
3393 static_assert_rounding!(ROUNDING);
3394 transmute(src:vfcmulcph_512(
3395 a:transmute(a),
3396 b:transmute(b),
3397 src:transmute(src),
3398 k,
3399 ROUNDING,
3400 ))
3401 }
3402}
3403
3404/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3405/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3406/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3407/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3408///
3409/// Rounding is done according to the rounding parameter, which can be one of:
3410///
3411/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3412/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3413/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3414/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3415/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3416///
3417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3418#[inline]
3419#[target_feature(enable = "avx512fp16")]
3420#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3421#[rustc_legacy_const_generics(3)]
3422#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3423pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3424 k: __mmask16,
3425 a: __m512h,
3426 b: __m512h,
3427) -> __m512h {
3428 static_assert_rounding!(ROUNDING);
3429 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3430}
3431
3432/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3433/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3434/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3435///
3436/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3437#[inline]
3438#[target_feature(enable = "avx512fp16")]
3439#[cfg_attr(test, assert_instr(vfcmulcsh))]
3440#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3441pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3442 _mm_mask_cmul_sch(src:_mm_undefined_ph(), k:0xff, a, b)
3443}
3444
3445/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3446/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3447/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3448/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3449///
3450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3451#[inline]
3452#[target_feature(enable = "avx512fp16")]
3453#[cfg_attr(test, assert_instr(vfcmulcsh))]
3454#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3455pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3456 _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3457}
3458
3459/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3460/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3461/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3462/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3463///
3464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3465#[inline]
3466#[target_feature(enable = "avx512fp16")]
3467#[cfg_attr(test, assert_instr(vfcmulcsh))]
3468#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3469pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3470 _mm_mask_cmul_sch(src:_mm_setzero_ph(), k, a, b)
3471}
3472
3473/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3474/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3475/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3476///
3477/// Rounding is done according to the rounding parameter, which can be one of:
3478///
3479/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3480/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3481/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3482/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3484///
3485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3486#[inline]
3487#[target_feature(enable = "avx512fp16")]
3488#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3489#[rustc_legacy_const_generics(2)]
3490#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3491pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3492 static_assert_rounding!(ROUNDING);
3493 _mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
3494}
3495
3496/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3497/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3498/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3499/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3500///
3501/// Rounding is done according to the rounding parameter, which can be one of:
3502///
3503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3508///
3509/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3510#[inline]
3511#[target_feature(enable = "avx512fp16")]
3512#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3513#[rustc_legacy_const_generics(4)]
3514#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3515pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3516 src: __m128h,
3517 k: __mmask8,
3518 a: __m128h,
3519 b: __m128h,
3520) -> __m128h {
3521 unsafe {
3522 static_assert_rounding!(ROUNDING);
3523 transmute(src:vfcmulcsh(
3524 a:transmute(a),
3525 b:transmute(b),
3526 src:transmute(src),
3527 k,
3528 ROUNDING,
3529 ))
3530 }
3531}
3532
3533/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3534/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3535/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3536/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3537///
3538/// Rounding is done according to the rounding parameter, which can be one of:
3539///
3540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3545///
3546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3547#[inline]
3548#[target_feature(enable = "avx512fp16")]
3549#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3550#[rustc_legacy_const_generics(3)]
3551#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3552pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3553 k: __mmask8,
3554 a: __m128h,
3555 b: __m128h,
3556) -> __m128h {
3557 static_assert_rounding!(ROUNDING);
3558 _mm_mask_cmul_round_sch::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
3559}
3560
3561/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3562/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3563/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3564/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3565///
3566/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3567#[inline]
3568#[target_feature(enable = "avx512fp16,avx512vl")]
3569#[cfg_attr(test, assert_instr(vfcmulcph))]
3570#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3571pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3572 _mm_cmul_pch(a, b)
3573}
3574
3575/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3576/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3577/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3578/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3579///
3580/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3581#[inline]
3582#[target_feature(enable = "avx512fp16,avx512vl")]
3583#[cfg_attr(test, assert_instr(vfcmulcph))]
3584#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3585pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3586 _mm_mask_cmul_pch(src, k, a, b)
3587}
3588
3589/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3590/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3591/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3592/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3593///
3594/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3595#[inline]
3596#[target_feature(enable = "avx512fp16,avx512vl")]
3597#[cfg_attr(test, assert_instr(vfcmulcph))]
3598#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3599pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3600 _mm_maskz_cmul_pch(k, a, b)
3601}
3602
3603/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3604/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3605/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3606/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3607///
3608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3609#[inline]
3610#[target_feature(enable = "avx512fp16,avx512vl")]
3611#[cfg_attr(test, assert_instr(vfcmulcph))]
3612#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3613pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3614 _mm256_cmul_pch(a, b)
3615}
3616
3617/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3618/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3619/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3620/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3621///
3622/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3623#[inline]
3624#[target_feature(enable = "avx512fp16,avx512vl")]
3625#[cfg_attr(test, assert_instr(vfcmulcph))]
3626#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3627pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3628 _mm256_mask_cmul_pch(src, k, a, b)
3629}
3630
3631/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3632/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3633/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3634/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3635///
3636/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3637#[inline]
3638#[target_feature(enable = "avx512fp16,avx512vl")]
3639#[cfg_attr(test, assert_instr(vfcmulcph))]
3640#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3641pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3642 _mm256_maskz_cmul_pch(k, a, b)
3643}
3644
3645/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3646/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3647/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3648/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3649///
3650/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3651#[inline]
3652#[target_feature(enable = "avx512fp16")]
3653#[cfg_attr(test, assert_instr(vfcmulcph))]
3654#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3655pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3656 _mm512_cmul_pch(a, b)
3657}
3658
3659/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3660/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3661/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3662/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3663///
3664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3665#[inline]
3666#[target_feature(enable = "avx512fp16")]
3667#[cfg_attr(test, assert_instr(vfcmulcph))]
3668#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3669pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3670 _mm512_mask_cmul_pch(src, k, a, b)
3671}
3672
3673/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3674/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3675/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3676/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3677///
3678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3679#[inline]
3680#[target_feature(enable = "avx512fp16")]
3681#[cfg_attr(test, assert_instr(vfcmulcph))]
3682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3683pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3684 _mm512_maskz_cmul_pch(k, a, b)
3685}
3686
3687/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3688/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3689/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3690///
3691/// Rounding is done according to the rounding parameter, which can be one of:
3692///
3693/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3694/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3695/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3696/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3697/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3698///
3699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3700#[inline]
3701#[target_feature(enable = "avx512fp16")]
3702#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3703#[rustc_legacy_const_generics(2)]
3704#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3705pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3706 static_assert_rounding!(ROUNDING);
3707 _mm512_cmul_round_pch::<ROUNDING>(a, b)
3708}
3709
3710/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3711/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3712/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3713/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3714///
3715/// Rounding is done according to the rounding parameter, which can be one of:
3716///
3717/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3718/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3719/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3720/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3721/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3722///
3723/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3724#[inline]
3725#[target_feature(enable = "avx512fp16")]
3726#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3727#[rustc_legacy_const_generics(4)]
3728#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3729pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3730 src: __m512h,
3731 k: __mmask16,
3732 a: __m512h,
3733 b: __m512h,
3734) -> __m512h {
3735 static_assert_rounding!(ROUNDING);
3736 _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3737}
3738
3739/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3740/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3741/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3742/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3743///
3744/// Rounding is done according to the rounding parameter, which can be one of:
3745///
3746/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3747/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3748/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3749/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3750/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3751///
3752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3753#[inline]
3754#[target_feature(enable = "avx512fp16")]
3755#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3756#[rustc_legacy_const_generics(3)]
3757#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3758pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3759 k: __mmask16,
3760 a: __m512h,
3761 b: __m512h,
3762) -> __m512h {
3763 static_assert_rounding!(ROUNDING);
3764 _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3765}
3766
3767/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3768/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3769/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3770/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3771///
3772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3773#[inline]
3774#[target_feature(enable = "avx512fp16")]
3775#[cfg_attr(test, assert_instr(vfcmulcsh))]
3776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3777pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3778 _mm_cmul_sch(a, b)
3779}
3780
3781/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3782/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3783/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3784/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3785///
3786/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3787#[inline]
3788#[target_feature(enable = "avx512fp16")]
3789#[cfg_attr(test, assert_instr(vfcmulcsh))]
3790#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3791pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3792 _mm_mask_cmul_sch(src, k, a, b)
3793}
3794
3795/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3796/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3797/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3798/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3799///
3800/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3801#[inline]
3802#[target_feature(enable = "avx512fp16")]
3803#[cfg_attr(test, assert_instr(vfcmulcsh))]
3804#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3805pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3806 _mm_maskz_cmul_sch(k, a, b)
3807}
3808
3809/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3810/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3811/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3812///
3813/// Rounding is done according to the rounding parameter, which can be one of:
3814///
3815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3820///
3821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
3822#[inline]
3823#[target_feature(enable = "avx512fp16")]
3824#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3825#[rustc_legacy_const_generics(2)]
3826#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3827pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3828 static_assert_rounding!(ROUNDING);
3829 _mm_cmul_round_sch::<ROUNDING>(a, b)
3830}
3831
3832/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3833/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3834/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3835/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3836///
3837/// Rounding is done according to the rounding parameter, which can be one of:
3838///
3839/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3840/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3841/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3842/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3844///
3845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
3846#[inline]
3847#[target_feature(enable = "avx512fp16")]
3848#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3849#[rustc_legacy_const_generics(4)]
3850#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3851pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
3852 src: __m128h,
3853 k: __mmask8,
3854 a: __m128h,
3855 b: __m128h,
3856) -> __m128h {
3857 static_assert_rounding!(ROUNDING);
3858 _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
3859}
3860
3861/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3862/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3863/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3864/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3865///
3866/// Rounding is done according to the rounding parameter, which can be one of:
3867///
3868/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3869/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3870/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3871/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3872/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3873///
3874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
3875#[inline]
3876#[target_feature(enable = "avx512fp16")]
3877#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3878#[rustc_legacy_const_generics(3)]
3879#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3880pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
3881 k: __mmask8,
3882 a: __m128h,
3883 b: __m128h,
3884) -> __m128h {
3885 static_assert_rounding!(ROUNDING);
3886 _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
3887}
3888
3889/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3890/// the results in dst.
3891///
3892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
3893#[inline]
3894#[target_feature(enable = "avx512fp16,avx512vl")]
3895#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3896pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
3897 unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
3898}
3899
3900/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3901/// the result in dst.
3902///
3903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
3904#[inline]
3905#[target_feature(enable = "avx512fp16,avx512vl")]
3906#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3907pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
3908 unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
3909}
3910
3911/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3912/// the result in dst.
3913///
3914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
3915#[inline]
3916#[target_feature(enable = "avx512fp16")]
3917#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3918pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
3919 unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
3920}
3921
3922/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
3923/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
3924/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate
3925/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3926///
3927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
3928#[inline]
3929#[target_feature(enable = "avx512fp16,avx512vl")]
3930#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3931pub fn _mm_conj_pch(a: __m128h) -> __m128h {
3932 unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
3933}
3934
3935/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3936/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3937/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
3938/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3939///
3940/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
3941#[inline]
3942#[target_feature(enable = "avx512fp16,avx512vl")]
3943#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3944pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
3945 unsafe {
3946 let r: __m128 = transmute(src:_mm_conj_pch(a));
3947 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3948 }
3949}
3950
3951/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3952/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3953/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3954/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3955///
3956/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
3957#[inline]
3958#[target_feature(enable = "avx512fp16,avx512vl")]
3959#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3960pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
3961 _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
3962}
3963
3964/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
3965/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3966/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3967///
3968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
3969#[inline]
3970#[target_feature(enable = "avx512fp16,avx512vl")]
3971#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3972pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
3973 unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
3974}
3975
3976/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3977/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3978/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3979/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3980///
3981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
3982#[inline]
3983#[target_feature(enable = "avx512fp16,avx512vl")]
3984#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3985pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
3986 unsafe {
3987 let r: __m256 = transmute(src:_mm256_conj_pch(a));
3988 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3989 }
3990}
3991
3992/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3993/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3994/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3995/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3996///
3997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
3998#[inline]
3999#[target_feature(enable = "avx512fp16,avx512vl")]
4000#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4001pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4002 _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4003}
4004
4005/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4006/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4007/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4008///
4009/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4010#[inline]
4011#[target_feature(enable = "avx512fp16")]
4012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4013pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
4014 unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4015}
4016
4017/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4018/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4019/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4020/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4021///
4022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4023#[inline]
4024#[target_feature(enable = "avx512fp16")]
4025#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4026pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4027 unsafe {
4028 let r: __m512 = transmute(src:_mm512_conj_pch(a));
4029 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4030 }
4031}
4032
4033/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4034/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4035/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4036/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4037///
4038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4039#[inline]
4040#[target_feature(enable = "avx512fp16")]
4041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4042pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4043 _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4044}
4045
4046/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4047/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4048/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4049///
4050/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4051#[inline]
4052#[target_feature(enable = "avx512fp16,avx512vl")]
4053#[cfg_attr(test, assert_instr(vfmaddcph))]
4054#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4055pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4056 _mm_mask3_fmadd_pch(a, b, c, k:0xff)
4057}
4058
4059/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4060/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4061/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4062/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4063///
4064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4065#[inline]
4066#[target_feature(enable = "avx512fp16,avx512vl")]
4067#[cfg_attr(test, assert_instr(vfmaddcph))]
4068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4069pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4070 unsafe {
4071 let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4072 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4073 }
4074}
4075
4076/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4077/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4078/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4079/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4080///
4081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4082#[inline]
4083#[target_feature(enable = "avx512fp16,avx512vl")]
4084#[cfg_attr(test, assert_instr(vfmaddcph))]
4085#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4086pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4087 unsafe {
4088 transmute(src:vfmaddcph_mask3_128(
4089 a:transmute(a),
4090 b:transmute(b),
4091 c:transmute(src:c),
4092 k,
4093 ))
4094 }
4095}
4096
4097/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4098/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4099/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4100/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4101///
4102/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4103#[inline]
4104#[target_feature(enable = "avx512fp16,avx512vl")]
4105#[cfg_attr(test, assert_instr(vfmaddcph))]
4106#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4107pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4108 unsafe {
4109 transmute(src:vfmaddcph_maskz_128(
4110 a:transmute(a),
4111 b:transmute(b),
4112 c:transmute(src:c),
4113 k,
4114 ))
4115 }
4116}
4117
4118/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4119/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4120/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4121///
4122/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4123#[inline]
4124#[target_feature(enable = "avx512fp16,avx512vl")]
4125#[cfg_attr(test, assert_instr(vfmaddcph))]
4126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4127pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4128 _mm256_mask3_fmadd_pch(a, b, c, k:0xff)
4129}
4130
4131/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4132/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4133/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4134/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4135///
4136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4137#[inline]
4138#[target_feature(enable = "avx512fp16,avx512vl")]
4139#[cfg_attr(test, assert_instr(vfmaddcph))]
4140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4141pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4142 unsafe {
4143 let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4144 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4145 }
4146}
4147
4148/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4149/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4150/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4151/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4152///
4153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4154#[inline]
4155#[target_feature(enable = "avx512fp16,avx512vl")]
4156#[cfg_attr(test, assert_instr(vfmaddcph))]
4157#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4158pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4159 unsafe {
4160 transmute(src:vfmaddcph_mask3_256(
4161 a:transmute(a),
4162 b:transmute(b),
4163 c:transmute(src:c),
4164 k,
4165 ))
4166 }
4167}
4168
4169/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4170/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4171/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4172/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4173///
4174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4175#[inline]
4176#[target_feature(enable = "avx512fp16,avx512vl")]
4177#[cfg_attr(test, assert_instr(vfmaddcph))]
4178#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4179pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4180 unsafe {
4181 transmute(src:vfmaddcph_maskz_256(
4182 a:transmute(a),
4183 b:transmute(b),
4184 c:transmute(src:c),
4185 k,
4186 ))
4187 }
4188}
4189
4190/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4191/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4192/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4193///
4194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4195#[inline]
4196#[target_feature(enable = "avx512fp16")]
4197#[cfg_attr(test, assert_instr(vfmaddcph))]
4198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4199pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4200 _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4201}
4202
4203/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4204/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4205/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4206/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4207///
4208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4209#[inline]
4210#[target_feature(enable = "avx512fp16")]
4211#[cfg_attr(test, assert_instr(vfmaddcph))]
4212#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4213pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4214 _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4215}
4216
4217/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4218/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4219/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4220/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4221///
4222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4223#[inline]
4224#[target_feature(enable = "avx512fp16")]
4225#[cfg_attr(test, assert_instr(vfmaddcph))]
4226#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4227pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4228 _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4229}
4230
4231/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4232/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4233/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4234/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4235///
4236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4237#[inline]
4238#[target_feature(enable = "avx512fp16")]
4239#[cfg_attr(test, assert_instr(vfmaddcph))]
4240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4241pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4242 _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4243}
4244
4245/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4246/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4247/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4248///
4249/// Rounding is done according to the rounding parameter, which can be one of:
4250///
4251/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4252/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4253/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4254/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4256///
4257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4258#[inline]
4259#[target_feature(enable = "avx512fp16")]
4260#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4261#[rustc_legacy_const_generics(3)]
4262#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4263pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4264 static_assert_rounding!(ROUNDING);
4265 _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4266}
4267
4268/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4269/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4270/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4271/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4272///
4273/// Rounding is done according to the rounding parameter, which can be one of:
4274///
4275/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4276/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4277/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4278/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4279/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4280///
4281/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4282#[inline]
4283#[target_feature(enable = "avx512fp16")]
4284#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4285#[rustc_legacy_const_generics(4)]
4286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4287pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4288 a: __m512h,
4289 k: __mmask16,
4290 b: __m512h,
4291 c: __m512h,
4292) -> __m512h {
4293 unsafe {
4294 static_assert_rounding!(ROUNDING);
4295 let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4296 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4297 }
4298}
4299
4300/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4301/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4302/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4303/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4304///
4305/// Rounding is done according to the rounding parameter, which can be one of:
4306///
4307/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4308/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4309/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4310/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4311/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4312///
4313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4314#[inline]
4315#[target_feature(enable = "avx512fp16")]
4316#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4317#[rustc_legacy_const_generics(4)]
4318#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4319pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4320 a: __m512h,
4321 b: __m512h,
4322 c: __m512h,
4323 k: __mmask16,
4324) -> __m512h {
4325 unsafe {
4326 static_assert_rounding!(ROUNDING);
4327 transmute(src:vfmaddcph_mask3_512(
4328 a:transmute(a),
4329 b:transmute(b),
4330 c:transmute(src:c),
4331 k,
4332 ROUNDING,
4333 ))
4334 }
4335}
4336
4337/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4338/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4339/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4340/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4341///
4342/// Rounding is done according to the rounding parameter, which can be one of:
4343///
4344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4349///
4350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4351#[inline]
4352#[target_feature(enable = "avx512fp16")]
4353#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4354#[rustc_legacy_const_generics(4)]
4355#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4356pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4357 k: __mmask16,
4358 a: __m512h,
4359 b: __m512h,
4360 c: __m512h,
4361) -> __m512h {
4362 unsafe {
4363 static_assert_rounding!(ROUNDING);
4364 transmute(src:vfmaddcph_maskz_512(
4365 a:transmute(a),
4366 b:transmute(b),
4367 c:transmute(src:c),
4368 k,
4369 ROUNDING,
4370 ))
4371 }
4372}
4373
4374/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4375/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4376/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4377/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4378///
4379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4380#[inline]
4381#[target_feature(enable = "avx512fp16")]
4382#[cfg_attr(test, assert_instr(vfmaddcsh))]
4383#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4384pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4385 _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4386}
4387
4388/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4389/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4390/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4391/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4392/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4393///
4394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4395#[inline]
4396#[target_feature(enable = "avx512fp16")]
4397#[cfg_attr(test, assert_instr(vfmaddcsh))]
4398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4399pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4400 _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4401}
4402
4403/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4404/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4405/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4406/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4407/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4408///
4409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4410#[inline]
4411#[target_feature(enable = "avx512fp16")]
4412#[cfg_attr(test, assert_instr(vfmaddcsh))]
4413#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4414pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4415 _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4416}
4417
4418/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4419/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4420/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4421/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4422/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4423///
4424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4425#[inline]
4426#[target_feature(enable = "avx512fp16")]
4427#[cfg_attr(test, assert_instr(vfmaddcsh))]
4428#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4429pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4430 _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4431}
4432
4433/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4434/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4435/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4436///
4437/// Rounding is done according to the rounding parameter, which can be one of:
4438///
4439/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4440/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4441/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4442/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4443/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4444///
4445/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4446#[inline]
4447#[target_feature(enable = "avx512fp16")]
4448#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4449#[rustc_legacy_const_generics(3)]
4450#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4451pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4452 unsafe {
4453 static_assert_rounding!(ROUNDING);
4454 transmute(src:vfmaddcsh_mask(
4455 a:transmute(a),
4456 b:transmute(b),
4457 c:transmute(c),
4458 k:0xff,
4459 ROUNDING,
4460 ))
4461 }
4462}
4463
4464/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4465/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4466/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4467/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4468/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4469///
4470/// Rounding is done according to the rounding parameter, which can be one of:
4471///
4472/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4473/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4474/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4475/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4476/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4477///
4478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4479#[inline]
4480#[target_feature(enable = "avx512fp16")]
4481#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4482#[rustc_legacy_const_generics(4)]
4483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4484pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4485 a: __m128h,
4486 k: __mmask8,
4487 b: __m128h,
4488 c: __m128h,
4489) -> __m128h {
4490 unsafe {
4491 static_assert_rounding!(ROUNDING);
4492 let a: __m128 = transmute(src:a);
4493 let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4494 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4495 }
4496}
4497
4498/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4499/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4500/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4501/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4502/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4503///
4504/// Rounding is done according to the rounding parameter, which can be one of:
4505///
4506/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4507/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4508/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4509/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4510/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4511///
4512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4513#[inline]
4514#[target_feature(enable = "avx512fp16")]
4515#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4516#[rustc_legacy_const_generics(4)]
4517#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4518pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4519 a: __m128h,
4520 b: __m128h,
4521 c: __m128h,
4522 k: __mmask8,
4523) -> __m128h {
4524 unsafe {
4525 static_assert_rounding!(ROUNDING);
4526 let c: __m128 = transmute(src:c);
4527 let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4528 transmute(src:_mm_move_ss(a:c, b:r))
4529 }
4530}
4531
4532/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4533/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4534/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4535/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4536/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4537///
4538/// Rounding is done according to the rounding parameter, which can be one of:
4539///
4540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4545///
4546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4547#[inline]
4548#[target_feature(enable = "avx512fp16")]
4549#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4550#[rustc_legacy_const_generics(4)]
4551#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4552pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4553 k: __mmask8,
4554 a: __m128h,
4555 b: __m128h,
4556 c: __m128h,
4557) -> __m128h {
4558 unsafe {
4559 static_assert_rounding!(ROUNDING);
4560 let a: __m128 = transmute(src:a);
4561 let r: __m128 = vfmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
4562 transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect
4563 }
4564}
4565
4566/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4567/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4568/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4569/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4570///
4571/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4572#[inline]
4573#[target_feature(enable = "avx512fp16,avx512vl")]
4574#[cfg_attr(test, assert_instr(vfcmaddcph))]
4575#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4576pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4577 _mm_mask3_fcmadd_pch(a, b, c, k:0xff)
4578}
4579
4580/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4581/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4582/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4583/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4584/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4585///
4586/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4587#[inline]
4588#[target_feature(enable = "avx512fp16,avx512vl")]
4589#[cfg_attr(test, assert_instr(vfcmaddcph))]
4590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4591pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4592 unsafe {
4593 let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4594 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4595 }
4596}
4597
4598/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4599/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4600/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4601/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4602/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4603///
4604/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4605#[inline]
4606#[target_feature(enable = "avx512fp16,avx512vl")]
4607#[cfg_attr(test, assert_instr(vfcmaddcph))]
4608#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4609pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4610 unsafe {
4611 transmute(src:vfcmaddcph_mask3_128(
4612 a:transmute(a),
4613 b:transmute(b),
4614 c:transmute(src:c),
4615 k,
4616 ))
4617 }
4618}
4619
4620/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4621/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4622/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4623/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4624/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4625///
4626/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4627#[inline]
4628#[target_feature(enable = "avx512fp16,avx512vl")]
4629#[cfg_attr(test, assert_instr(vfcmaddcph))]
4630#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4631pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4632 unsafe {
4633 transmute(src:vfcmaddcph_maskz_128(
4634 a:transmute(a),
4635 b:transmute(b),
4636 c:transmute(src:c),
4637 k,
4638 ))
4639 }
4640}
4641
4642/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4643/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4644/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4645/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4646///
4647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4648#[inline]
4649#[target_feature(enable = "avx512fp16,avx512vl")]
4650#[cfg_attr(test, assert_instr(vfcmaddcph))]
4651#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4652pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4653 _mm256_mask3_fcmadd_pch(a, b, c, k:0xff)
4654}
4655
4656/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4657/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4658/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4659/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4660/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4661///
4662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4663#[inline]
4664#[target_feature(enable = "avx512fp16,avx512vl")]
4665#[cfg_attr(test, assert_instr(vfcmaddcph))]
4666#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4667pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4668 unsafe {
4669 let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4670 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4671 }
4672}
4673
4674/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4675/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4676/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4677/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4678/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4679///
4680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4681#[inline]
4682#[target_feature(enable = "avx512fp16,avx512vl")]
4683#[cfg_attr(test, assert_instr(vfcmaddcph))]
4684#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4685pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4686 unsafe {
4687 transmute(src:vfcmaddcph_mask3_256(
4688 a:transmute(a),
4689 b:transmute(b),
4690 c:transmute(src:c),
4691 k,
4692 ))
4693 }
4694}
4695
4696/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4697/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4698/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4699/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4700/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4701///
4702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4703#[inline]
4704#[target_feature(enable = "avx512fp16,avx512vl")]
4705#[cfg_attr(test, assert_instr(vfcmaddcph))]
4706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4707pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4708 unsafe {
4709 transmute(src:vfcmaddcph_maskz_256(
4710 a:transmute(a),
4711 b:transmute(b),
4712 c:transmute(src:c),
4713 k,
4714 ))
4715 }
4716}
4717
4718/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4719/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4720/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4721/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4722///
4723/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4724#[inline]
4725#[target_feature(enable = "avx512fp16")]
4726#[cfg_attr(test, assert_instr(vfcmaddcph))]
4727#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4728pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4729 _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4730}
4731
4732/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4733/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4734/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4735/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4736/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4737///
4738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4739#[inline]
4740#[target_feature(enable = "avx512fp16")]
4741#[cfg_attr(test, assert_instr(vfcmaddcph))]
4742#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4743pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4744 _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4745}
4746
4747/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4748/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4749/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4750/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4751/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4752///
4753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4754#[inline]
4755#[target_feature(enable = "avx512fp16")]
4756#[cfg_attr(test, assert_instr(vfcmaddcph))]
4757#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4758pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4759 _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4760}
4761
4762/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4763/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4764/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4765/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4766/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4767///
4768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4769#[inline]
4770#[target_feature(enable = "avx512fp16")]
4771#[cfg_attr(test, assert_instr(vfcmaddcph))]
4772#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4773pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4774 _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4775}
4776
4777/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4778/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4779/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4780/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4781///
4782/// Rounding is done according to the rounding parameter, which can be one of:
4783///
4784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4789///
4790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4791#[inline]
4792#[target_feature(enable = "avx512fp16")]
4793#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4794#[rustc_legacy_const_generics(3)]
4795#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4796pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4797 static_assert_rounding!(ROUNDING);
4798 _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4799}
4800
4801/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4802/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4803/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4804/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4805/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4806///
4807/// Rounding is done according to the rounding parameter, which can be one of:
4808///
4809/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4810/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4811/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4812/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4813/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4814///
4815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
4816#[inline]
4817#[target_feature(enable = "avx512fp16")]
4818#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4819#[rustc_legacy_const_generics(4)]
4820#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4821pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
4822 a: __m512h,
4823 k: __mmask16,
4824 b: __m512h,
4825 c: __m512h,
4826) -> __m512h {
4827 unsafe {
4828 static_assert_rounding!(ROUNDING);
4829 let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4830 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4831 }
4832}
4833
4834/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4835/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
4836/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4837/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4838/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4839///
4840/// Rounding is done according to the rounding parameter, which can be one of:
4841///
4842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4847///
4848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
4849#[inline]
4850#[target_feature(enable = "avx512fp16")]
4851#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4852#[rustc_legacy_const_generics(4)]
4853#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4854pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
4855 a: __m512h,
4856 b: __m512h,
4857 c: __m512h,
4858 k: __mmask16,
4859) -> __m512h {
4860 unsafe {
4861 static_assert_rounding!(ROUNDING);
4862 transmute(src:vfcmaddcph_mask3_512(
4863 a:transmute(a),
4864 b:transmute(b),
4865 c:transmute(src:c),
4866 k,
4867 ROUNDING,
4868 ))
4869 }
4870}
4871
4872/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4873/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
4874/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4875/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4876/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4877///
4878/// Rounding is done according to the rounding parameter, which can be one of:
4879///
4880/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4881/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4882/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4883/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4884/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4885///
4886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
4887#[inline]
4888#[target_feature(enable = "avx512fp16")]
4889#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4890#[rustc_legacy_const_generics(4)]
4891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4892pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
4893 k: __mmask16,
4894 a: __m512h,
4895 b: __m512h,
4896 c: __m512h,
4897) -> __m512h {
4898 unsafe {
4899 static_assert_rounding!(ROUNDING);
4900 transmute(src:vfcmaddcph_maskz_512(
4901 a:transmute(a),
4902 b:transmute(b),
4903 c:transmute(src:c),
4904 k,
4905 ROUNDING,
4906 ))
4907 }
4908}
4909
4910/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4911/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4912/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4913/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4914/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4915///
4916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
4917#[inline]
4918#[target_feature(enable = "avx512fp16")]
4919#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4921pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4922 _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4923}
4924
4925/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4926/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4927/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
4928/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4929/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4930/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4931///
4932/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
4933#[inline]
4934#[target_feature(enable = "avx512fp16")]
4935#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4936#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4937pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4938 _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4939}
4940
4941/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4942/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4943/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
4944/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4945/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4946/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4947///
4948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
4949#[inline]
4950#[target_feature(enable = "avx512fp16")]
4951#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4952#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4953pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4954 _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4955}
4956
4957/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4958/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4959/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
4960/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4961/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4962/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4963///
4964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
4965#[inline]
4966#[target_feature(enable = "avx512fp16")]
4967#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4968#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4969pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4970 _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4971}
4972
4973/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4974/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4975/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4976/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4977/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4978///
4979/// Rounding is done according to the rounding parameter, which can be one of:
4980///
4981/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4982/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4983/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4984/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4985/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4986///
4987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
4988#[inline]
4989#[target_feature(enable = "avx512fp16")]
4990#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
4991#[rustc_legacy_const_generics(3)]
4992#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4993pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4994 unsafe {
4995 static_assert_rounding!(ROUNDING);
4996 transmute(src:vfcmaddcsh_mask(
4997 a:transmute(a),
4998 b:transmute(b),
4999 c:transmute(c),
5000 k:0xff,
5001 ROUNDING,
5002 ))
5003 }
5004}
5005
5006/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5007/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5008/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5009/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5010/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5011/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5012///
5013/// Rounding is done according to the rounding parameter, which can be one of:
5014///
5015/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5016/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5017/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5018/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5019/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5020///
5021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5022#[inline]
5023#[target_feature(enable = "avx512fp16")]
5024#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5025#[rustc_legacy_const_generics(4)]
5026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5027pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5028 a: __m128h,
5029 k: __mmask8,
5030 b: __m128h,
5031 c: __m128h,
5032) -> __m128h {
5033 unsafe {
5034 static_assert_rounding!(ROUNDING);
5035 let a: __m128 = transmute(src:a);
5036 let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5037 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5038 }
5039}
5040
5041/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5042/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5043/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5044/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5045/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5046/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5047///
5048/// Rounding is done according to the rounding parameter, which can be one of:
5049///
5050/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5051/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5052/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5053/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5054/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5055///
5056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5057#[inline]
5058#[target_feature(enable = "avx512fp16")]
5059#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5060#[rustc_legacy_const_generics(4)]
5061#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5062pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5063 a: __m128h,
5064 b: __m128h,
5065 c: __m128h,
5066 k: __mmask8,
5067) -> __m128h {
5068 unsafe {
5069 static_assert_rounding!(ROUNDING);
5070 let c: __m128 = transmute(src:c);
5071 let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5072 transmute(src:_mm_move_ss(a:c, b:r))
5073 }
5074}
5075
5076/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5077/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5078/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5079/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5080/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5081/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5082///
5083/// Rounding is done according to the rounding parameter, which can be one of:
5084///
5085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5090///
5091/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5092#[inline]
5093#[target_feature(enable = "avx512fp16")]
5094#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5095#[rustc_legacy_const_generics(4)]
5096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5097pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5098 k: __mmask8,
5099 a: __m128h,
5100 b: __m128h,
5101 c: __m128h,
5102) -> __m128h {
5103 unsafe {
5104 static_assert_rounding!(ROUNDING);
5105 let a: __m128 = transmute(src:a);
5106 let r: __m128 = vfcmaddcsh_maskz(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5107 transmute(src:_mm_move_ss(a, b:r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect
5108 }
5109}
5110
5111/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5112/// result to packed elements in c, and store the results in dst.
5113///
5114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5115#[inline]
5116#[target_feature(enable = "avx512fp16,avx512vl")]
5117#[cfg_attr(test, assert_instr(vfmadd))]
5118#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5119pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5120 unsafe { simd_fma(x:a, y:b, z:c) }
5121}
5122
5123/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5124/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5125/// from a when the corresponding mask bit is not set).
5126///
5127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5128#[inline]
5129#[target_feature(enable = "avx512fp16,avx512vl")]
5130#[cfg_attr(test, assert_instr(vfmadd))]
5131#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5132pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5133 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5134}
5135
5136/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5137/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5138/// from c when the corresponding mask bit is not set).
5139///
5140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5141#[inline]
5142#[target_feature(enable = "avx512fp16,avx512vl")]
5143#[cfg_attr(test, assert_instr(vfmadd))]
5144#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5145pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5146 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5147}
5148
5149/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5150/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5151/// out when the corresponding mask bit is not set).
5152///
5153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5154#[inline]
5155#[target_feature(enable = "avx512fp16,avx512vl")]
5156#[cfg_attr(test, assert_instr(vfmadd))]
5157#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5158pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5159 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5160}
5161
5162/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5163/// result to packed elements in c, and store the results in dst.
5164///
5165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5166#[inline]
5167#[target_feature(enable = "avx512fp16,avx512vl")]
5168#[cfg_attr(test, assert_instr(vfmadd))]
5169#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5170pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5171 unsafe { simd_fma(x:a, y:b, z:c) }
5172}
5173
5174/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5175/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5176/// from a when the corresponding mask bit is not set).
5177///
5178/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5179#[inline]
5180#[target_feature(enable = "avx512fp16,avx512vl")]
5181#[cfg_attr(test, assert_instr(vfmadd))]
5182#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5183pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5184 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5185}
5186
5187/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5188/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5189/// from c when the corresponding mask bit is not set).
5190///
5191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5192#[inline]
5193#[target_feature(enable = "avx512fp16,avx512vl")]
5194#[cfg_attr(test, assert_instr(vfmadd))]
5195#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5196pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5197 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5198}
5199
5200/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5201/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5202/// out when the corresponding mask bit is not set).
5203///
5204/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5205#[inline]
5206#[target_feature(enable = "avx512fp16,avx512vl")]
5207#[cfg_attr(test, assert_instr(vfmadd))]
5208#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5209pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5210 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5211}
5212
5213/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5214/// result to packed elements in c, and store the results in dst.
5215///
5216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5217#[inline]
5218#[target_feature(enable = "avx512fp16")]
5219#[cfg_attr(test, assert_instr(vfmadd))]
5220#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5221pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5222 unsafe { simd_fma(x:a, y:b, z:c) }
5223}
5224
5225/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5226/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5227/// from a when the corresponding mask bit is not set).
5228///
5229/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5230#[inline]
5231#[target_feature(enable = "avx512fp16")]
5232#[cfg_attr(test, assert_instr(vfmadd))]
5233#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5234pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5235 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5236}
5237
5238/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5239/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5240/// from c when the corresponding mask bit is not set).
5241///
5242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5243#[inline]
5244#[target_feature(enable = "avx512fp16")]
5245#[cfg_attr(test, assert_instr(vfmadd))]
5246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5247pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5248 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5249}
5250
5251/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5252/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5253/// out when the corresponding mask bit is not set).
5254///
5255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5256#[inline]
5257#[target_feature(enable = "avx512fp16")]
5258#[cfg_attr(test, assert_instr(vfmadd))]
5259#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5260pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5261 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5262}
5263
5264/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5265/// result to packed elements in c, and store the results in dst.
5266///
5267/// Rounding is done according to the rounding parameter, which can be one of:
5268///
5269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5274///
5275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5276#[inline]
5277#[target_feature(enable = "avx512fp16")]
5278#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5279#[rustc_legacy_const_generics(3)]
5280#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5281pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5282 unsafe {
5283 static_assert_rounding!(ROUNDING);
5284 vfmaddph_512(a, b, c, ROUNDING)
5285 }
5286}
5287
5288/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5289/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5290/// from a when the corresponding mask bit is not set).
5291///
5292/// Rounding is done according to the rounding parameter, which can be one of:
5293///
5294/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5295/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5296/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5297/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5299///
5300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5301#[inline]
5302#[target_feature(enable = "avx512fp16")]
5303#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5304#[rustc_legacy_const_generics(4)]
5305#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5306pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5307 a: __m512h,
5308 k: __mmask32,
5309 b: __m512h,
5310 c: __m512h,
5311) -> __m512h {
5312 unsafe {
5313 static_assert_rounding!(ROUNDING);
5314 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5315 }
5316}
5317
5318/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5319/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5320/// from c when the corresponding mask bit is not set).
5321///
5322/// Rounding is done according to the rounding parameter, which can be one of:
5323///
5324/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5325/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5326/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5327/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5329///
5330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5331#[inline]
5332#[target_feature(enable = "avx512fp16")]
5333#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5334#[rustc_legacy_const_generics(4)]
5335#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5336pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5337 a: __m512h,
5338 b: __m512h,
5339 c: __m512h,
5340 k: __mmask32,
5341) -> __m512h {
5342 unsafe {
5343 static_assert_rounding!(ROUNDING);
5344 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5345 }
5346}
5347
5348/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5349/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5350/// out when the corresponding mask bit is not set).
5351///
5352/// Rounding is done according to the rounding parameter, which can be one of:
5353///
5354/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5355/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5356/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5357/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5358/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5359///
5360/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5361#[inline]
5362#[target_feature(enable = "avx512fp16")]
5363#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5364#[rustc_legacy_const_generics(4)]
5365#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5366pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5367 k: __mmask32,
5368 a: __m512h,
5369 b: __m512h,
5370 c: __m512h,
5371) -> __m512h {
5372 unsafe {
5373 static_assert_rounding!(ROUNDING);
5374 simd_select_bitmask(
5375 m:k,
5376 yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5377 no:_mm512_setzero_ph(),
5378 )
5379 }
5380}
5381
5382/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5383/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5384/// 7 packed elements from a to the upper elements of dst.
5385///
5386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5387#[inline]
5388#[target_feature(enable = "avx512fp16")]
5389#[cfg_attr(test, assert_instr(vfmadd))]
5390#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5391pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5392 unsafe {
5393 let extracta: f16 = simd_extract!(a, 0);
5394 let extractb: f16 = simd_extract!(b, 0);
5395 let extractc: f16 = simd_extract!(c, 0);
5396 let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5397 simd_insert!(a, 0, r)
5398 }
5399}
5400
5401/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5402/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5403/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5404/// upper elements of dst.
5405///
5406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5407#[inline]
5408#[target_feature(enable = "avx512fp16")]
5409#[cfg_attr(test, assert_instr(vfmadd))]
5410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5411pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5412 unsafe {
5413 let mut fmadd: f16 = simd_extract!(a, 0);
5414 if k & 1 != 0 {
5415 let extractb: f16 = simd_extract!(b, 0);
5416 let extractc: f16 = simd_extract!(c, 0);
5417 fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5418 }
5419 simd_insert!(a, 0, fmadd)
5420 }
5421}
5422
5423/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5424/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5425/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5426/// upper elements of dst.
5427///
5428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5429#[inline]
5430#[target_feature(enable = "avx512fp16")]
5431#[cfg_attr(test, assert_instr(vfmadd))]
5432#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5433pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5434 unsafe {
5435 let mut fmadd: f16 = simd_extract!(c, 0);
5436 if k & 1 != 0 {
5437 let extracta: f16 = simd_extract!(a, 0);
5438 let extractb: f16 = simd_extract!(b, 0);
5439 fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5440 }
5441 simd_insert!(c, 0, fmadd)
5442 }
5443}
5444
5445/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5446/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5447/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5448/// upper elements of dst.
5449///
5450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5451#[inline]
5452#[target_feature(enable = "avx512fp16")]
5453#[cfg_attr(test, assert_instr(vfmadd))]
5454#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5455pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5456 unsafe {
5457 let mut fmadd: f16 = 0.0;
5458 if k & 1 != 0 {
5459 let extracta: f16 = simd_extract!(a, 0);
5460 let extractb: f16 = simd_extract!(b, 0);
5461 let extractc: f16 = simd_extract!(c, 0);
5462 fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5463 }
5464 simd_insert!(a, 0, fmadd)
5465 }
5466}
5467
5468/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5469/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5470/// 7 packed elements from a to the upper elements of dst.
5471///
5472/// Rounding is done according to the rounding parameter, which can be one of:
5473///
5474/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5475/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5476/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5477/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5478/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5479///
5480/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5481#[inline]
5482#[target_feature(enable = "avx512fp16")]
5483#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5484#[rustc_legacy_const_generics(3)]
5485#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5486pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5487 unsafe {
5488 static_assert_rounding!(ROUNDING);
5489 let extracta: f16 = simd_extract!(a, 0);
5490 let extractb: f16 = simd_extract!(b, 0);
5491 let extractc: f16 = simd_extract!(c, 0);
5492 let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5493 simd_insert!(a, 0, r)
5494 }
5495}
5496
5497/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5498/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5499/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5500/// upper elements of dst.
5501///
5502/// Rounding is done according to the rounding parameter, which can be one of:
5503///
5504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5509///
5510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5511#[inline]
5512#[target_feature(enable = "avx512fp16")]
5513#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5514#[rustc_legacy_const_generics(4)]
5515#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5516pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5517 a: __m128h,
5518 k: __mmask8,
5519 b: __m128h,
5520 c: __m128h,
5521) -> __m128h {
5522 unsafe {
5523 static_assert_rounding!(ROUNDING);
5524 let mut fmadd: f16 = simd_extract!(a, 0);
5525 if k & 1 != 0 {
5526 let extractb: f16 = simd_extract!(b, 0);
5527 let extractc: f16 = simd_extract!(c, 0);
5528 fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5529 }
5530 simd_insert!(a, 0, fmadd)
5531 }
5532}
5533
5534/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5535/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5536/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5537/// upper elements of dst.
5538///
5539/// Rounding is done according to the rounding parameter, which can be one of:
5540///
5541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5546///
5547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5548#[inline]
5549#[target_feature(enable = "avx512fp16")]
5550#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5551#[rustc_legacy_const_generics(4)]
5552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5553pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5554 a: __m128h,
5555 b: __m128h,
5556 c: __m128h,
5557 k: __mmask8,
5558) -> __m128h {
5559 unsafe {
5560 static_assert_rounding!(ROUNDING);
5561 let mut fmadd: f16 = simd_extract!(c, 0);
5562 if k & 1 != 0 {
5563 let extracta: f16 = simd_extract!(a, 0);
5564 let extractb: f16 = simd_extract!(b, 0);
5565 fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5566 }
5567 simd_insert!(c, 0, fmadd)
5568 }
5569}
5570
5571/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5572/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5573/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5574/// upper elements of dst.
5575///
5576/// Rounding is done according to the rounding parameter, which can be one of:
5577///
5578/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5579/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5580/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5581/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5582/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5583///
5584/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5585#[inline]
5586#[target_feature(enable = "avx512fp16")]
5587#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5588#[rustc_legacy_const_generics(4)]
5589#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5590pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5591 k: __mmask8,
5592 a: __m128h,
5593 b: __m128h,
5594 c: __m128h,
5595) -> __m128h {
5596 unsafe {
5597 static_assert_rounding!(ROUNDING);
5598 let mut fmadd: f16 = 0.0;
5599 if k & 1 != 0 {
5600 let extracta: f16 = simd_extract!(a, 0);
5601 let extractb: f16 = simd_extract!(b, 0);
5602 let extractc: f16 = simd_extract!(c, 0);
5603 fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5604 }
5605 simd_insert!(a, 0, fmadd)
5606 }
5607}
5608
5609/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5610/// in c from the intermediate result, and store the results in dst.
5611/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5612///
5613/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5614#[inline]
5615#[target_feature(enable = "avx512fp16,avx512vl")]
5616#[cfg_attr(test, assert_instr(vfmsub))]
5617#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5618pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5619 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5620}
5621
5622/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5623/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5624/// from a when the corresponding mask bit is not set).
5625///
5626/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5627#[inline]
5628#[target_feature(enable = "avx512fp16,avx512vl")]
5629#[cfg_attr(test, assert_instr(vfmsub))]
5630#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5631pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5632 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5633}
5634
5635/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5636/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5637/// from c when the corresponding mask bit is not set).
5638///
5639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5640#[inline]
5641#[target_feature(enable = "avx512fp16,avx512vl")]
5642#[cfg_attr(test, assert_instr(vfmsub))]
5643#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5644pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5645 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5646}
5647
5648/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5649/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5650/// out when the corresponding mask bit is not set).
5651///
5652/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5653#[inline]
5654#[target_feature(enable = "avx512fp16,avx512vl")]
5655#[cfg_attr(test, assert_instr(vfmsub))]
5656#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5657pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5658 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5659}
5660
5661/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5662/// in c from the intermediate result, and store the results in dst.
5663///
5664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5665#[inline]
5666#[target_feature(enable = "avx512fp16,avx512vl")]
5667#[cfg_attr(test, assert_instr(vfmsub))]
5668#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5669pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5670 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5671}
5672
5673/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5674/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5675/// from a when the corresponding mask bit is not set).
5676///
5677/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5678#[inline]
5679#[target_feature(enable = "avx512fp16,avx512vl")]
5680#[cfg_attr(test, assert_instr(vfmsub))]
5681#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5682pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5683 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5684}
5685
5686/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5687/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5688/// from c when the corresponding mask bit is not set).
5689///
5690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5691#[inline]
5692#[target_feature(enable = "avx512fp16,avx512vl")]
5693#[cfg_attr(test, assert_instr(vfmsub))]
5694#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5695pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5696 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5697}
5698
5699/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5700/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5701/// out when the corresponding mask bit is not set).
5702///
5703/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5704#[inline]
5705#[target_feature(enable = "avx512fp16,avx512vl")]
5706#[cfg_attr(test, assert_instr(vfmsub))]
5707#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5708pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5709 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5710}
5711
5712/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5713/// in c from the intermediate result, and store the results in dst.
5714///
5715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5716#[inline]
5717#[target_feature(enable = "avx512fp16")]
5718#[cfg_attr(test, assert_instr(vfmsub))]
5719#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5720pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5721 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5722}
5723
5724/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5725/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5726/// from a when the corresponding mask bit is not set).
5727///
5728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5729#[inline]
5730#[target_feature(enable = "avx512fp16")]
5731#[cfg_attr(test, assert_instr(vfmsub))]
5732#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5733pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5734 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5735}
5736
5737/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5738/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5739/// from c when the corresponding mask bit is not set).
5740///
5741/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5742#[inline]
5743#[target_feature(enable = "avx512fp16")]
5744#[cfg_attr(test, assert_instr(vfmsub))]
5745#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5746pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5747 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5748}
5749
5750/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5751/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5752/// out when the corresponding mask bit is not set).
5753///
5754/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5755#[inline]
5756#[target_feature(enable = "avx512fp16")]
5757#[cfg_attr(test, assert_instr(vfmsub))]
5758#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5759pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5760 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5761}
5762
5763/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5764/// in c from the intermediate result, and store the results in dst.
5765///
5766/// Rounding is done according to the rounding parameter, which can be one of:
5767///
5768/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5769/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5770/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5771/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5772/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5773///
5774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
5775#[inline]
5776#[target_feature(enable = "avx512fp16")]
5777#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5778#[rustc_legacy_const_generics(3)]
5779#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5780pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5781 unsafe {
5782 static_assert_rounding!(ROUNDING);
5783 vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
5784 }
5785}
5786
5787/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5788/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5789/// from a when the corresponding mask bit is not set).
5790///
5791/// Rounding is done according to the rounding parameter, which can be one of:
5792///
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5794/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5795/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5796/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
5800#[inline]
5801#[target_feature(enable = "avx512fp16")]
5802#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5803#[rustc_legacy_const_generics(4)]
5804#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5805pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
5806 a: __m512h,
5807 k: __mmask32,
5808 b: __m512h,
5809 c: __m512h,
5810) -> __m512h {
5811 unsafe {
5812 static_assert_rounding!(ROUNDING);
5813 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
5814 }
5815}
5816
5817/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5818/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5819/// from c when the corresponding mask bit is not set).
5820///
5821/// Rounding is done according to the rounding parameter, which can be one of:
5822///
5823/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5824/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5825/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5826/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5827/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5828///
5829/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
5830#[inline]
5831#[target_feature(enable = "avx512fp16")]
5832#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5833#[rustc_legacy_const_generics(4)]
5834#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5835pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
5836 a: __m512h,
5837 b: __m512h,
5838 c: __m512h,
5839 k: __mmask32,
5840) -> __m512h {
5841 unsafe {
5842 static_assert_rounding!(ROUNDING);
5843 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
5844 }
5845}
5846
5847/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5848/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5849/// out when the corresponding mask bit is not set).
5850///
5851/// Rounding is done according to the rounding parameter, which can be one of:
5852///
5853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5858///
5859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
5860#[inline]
5861#[target_feature(enable = "avx512fp16")]
5862#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5863#[rustc_legacy_const_generics(4)]
5864#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5865pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
5866 k: __mmask32,
5867 a: __m512h,
5868 b: __m512h,
5869 c: __m512h,
5870) -> __m512h {
5871 unsafe {
5872 static_assert_rounding!(ROUNDING);
5873 simd_select_bitmask(
5874 m:k,
5875 yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
5876 no:_mm512_setzero_ph(),
5877 )
5878 }
5879}
5880
5881/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5882/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5883/// 7 packed elements from a to the upper elements of dst.
5884///
5885/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
5886#[inline]
5887#[target_feature(enable = "avx512fp16")]
5888#[cfg_attr(test, assert_instr(vfmsub))]
5889#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5890pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5891 unsafe {
5892 let extracta: f16 = simd_extract!(a, 0);
5893 let extractb: f16 = simd_extract!(b, 0);
5894 let extractc: f16 = simd_extract!(c, 0);
5895 let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
5896 simd_insert!(a, 0, r)
5897 }
5898}
5899
5900/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5901/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5902/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5903/// upper elements of dst.
5904///
5905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
5906#[inline]
5907#[target_feature(enable = "avx512fp16")]
5908#[cfg_attr(test, assert_instr(vfmsub))]
5909#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5910pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5911 unsafe {
5912 let mut fmsub: f16 = simd_extract!(a, 0);
5913 if k & 1 != 0 {
5914 let extractb: f16 = simd_extract!(b, 0);
5915 let extractc: f16 = simd_extract!(c, 0);
5916 fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
5917 }
5918 simd_insert!(a, 0, fmsub)
5919 }
5920}
5921
5922/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5923/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5924/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5925/// upper elements of dst.
5926///
5927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
5928#[inline]
5929#[target_feature(enable = "avx512fp16")]
5930#[cfg_attr(test, assert_instr(vfmsub))]
5931#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5932pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5933 unsafe {
5934 let mut fmsub: f16 = simd_extract!(c, 0);
5935 if k & 1 != 0 {
5936 let extracta: f16 = simd_extract!(a, 0);
5937 let extractb: f16 = simd_extract!(b, 0);
5938 fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
5939 }
5940 simd_insert!(c, 0, fmsub)
5941 }
5942}
5943
5944/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5945/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
5946/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5947/// upper elements of dst.
5948///
5949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
5950#[inline]
5951#[target_feature(enable = "avx512fp16")]
5952#[cfg_attr(test, assert_instr(vfmsub))]
5953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5954pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5955 unsafe {
5956 let mut fmsub: f16 = 0.0;
5957 if k & 1 != 0 {
5958 let extracta: f16 = simd_extract!(a, 0);
5959 let extractb: f16 = simd_extract!(b, 0);
5960 let extractc: f16 = simd_extract!(c, 0);
5961 fmsub = fmaf16(a:extracta, b:extractb, -extractc);
5962 }
5963 simd_insert!(a, 0, fmsub)
5964 }
5965}
5966
5967/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5968/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5969/// 7 packed elements from a to the upper elements of dst.
5970///
5971/// Rounding is done according to the rounding parameter, which can be one of:
5972///
5973/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5974/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5975/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5976/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5977/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5978///
5979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
5980#[inline]
5981#[target_feature(enable = "avx512fp16")]
5982#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5983#[rustc_legacy_const_generics(3)]
5984#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5985pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5986 unsafe {
5987 static_assert_rounding!(ROUNDING);
5988 let extracta: f16 = simd_extract!(a, 0);
5989 let extractb: f16 = simd_extract!(b, 0);
5990 let extractc: f16 = simd_extract!(c, 0);
5991 let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
5992 simd_insert!(a, 0, r)
5993 }
5994}
5995
5996/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5997/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5998/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5999/// upper elements of dst.
6000///
6001/// Rounding is done according to the rounding parameter, which can be one of:
6002///
6003/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6004/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6005/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6006/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6007/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6008///
6009/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6010#[inline]
6011#[target_feature(enable = "avx512fp16")]
6012#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6013#[rustc_legacy_const_generics(4)]
6014#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6015pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6016 a: __m128h,
6017 k: __mmask8,
6018 b: __m128h,
6019 c: __m128h,
6020) -> __m128h {
6021 unsafe {
6022 static_assert_rounding!(ROUNDING);
6023 let mut fmsub: f16 = simd_extract!(a, 0);
6024 if k & 1 != 0 {
6025 let extractb: f16 = simd_extract!(b, 0);
6026 let extractc: f16 = simd_extract!(c, 0);
6027 fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6028 }
6029 simd_insert!(a, 0, fmsub)
6030 }
6031}
6032
6033/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6034/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6035/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6036/// upper elements of dst.
6037///
6038/// Rounding is done according to the rounding parameter, which can be one of:
6039///
6040/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6041/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6042/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6043/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6044/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6045///
6046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6047#[inline]
6048#[target_feature(enable = "avx512fp16")]
6049#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6050#[rustc_legacy_const_generics(4)]
6051#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6052pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6053 a: __m128h,
6054 b: __m128h,
6055 c: __m128h,
6056 k: __mmask8,
6057) -> __m128h {
6058 unsafe {
6059 static_assert_rounding!(ROUNDING);
6060 let mut fmsub: f16 = simd_extract!(c, 0);
6061 if k & 1 != 0 {
6062 let extracta: f16 = simd_extract!(a, 0);
6063 let extractb: f16 = simd_extract!(b, 0);
6064 fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6065 }
6066 simd_insert!(c, 0, fmsub)
6067 }
6068}
6069
6070/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6071/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6072/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6073/// upper elements of dst.
6074///
6075/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6076#[inline]
6077#[target_feature(enable = "avx512fp16")]
6078#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6079#[rustc_legacy_const_generics(4)]
6080#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6081pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6082 k: __mmask8,
6083 a: __m128h,
6084 b: __m128h,
6085 c: __m128h,
6086) -> __m128h {
6087 unsafe {
6088 static_assert_rounding!(ROUNDING);
6089 let mut fmsub: f16 = 0.0;
6090 if k & 1 != 0 {
6091 let extracta: f16 = simd_extract!(a, 0);
6092 let extractb: f16 = simd_extract!(b, 0);
6093 let extractc: f16 = simd_extract!(c, 0);
6094 fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6095 }
6096 simd_insert!(a, 0, fmsub)
6097 }
6098}
6099
6100/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6101/// result from packed elements in c, and store the results in dst.
6102///
6103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6104#[inline]
6105#[target_feature(enable = "avx512fp16,avx512vl")]
6106#[cfg_attr(test, assert_instr(vfnmadd))]
6107#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6108pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6109 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6110}
6111
6112/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6113/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6114/// from a when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6117#[inline]
6118#[target_feature(enable = "avx512fp16,avx512vl")]
6119#[cfg_attr(test, assert_instr(vfnmadd))]
6120#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6121pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6122 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6123}
6124
6125/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6126/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6127/// from c when the corresponding mask bit is not set).
6128///
6129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6130#[inline]
6131#[target_feature(enable = "avx512fp16,avx512vl")]
6132#[cfg_attr(test, assert_instr(vfnmadd))]
6133#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6134pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6135 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6136}
6137
6138/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6139/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6140/// out when the corresponding mask bit is not set).
6141///
6142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6143#[inline]
6144#[target_feature(enable = "avx512fp16,avx512vl")]
6145#[cfg_attr(test, assert_instr(vfnmadd))]
6146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6147pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6148 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6149}
6150
6151/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6152/// result from packed elements in c, and store the results in dst.
6153///
6154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6155#[inline]
6156#[target_feature(enable = "avx512fp16,avx512vl")]
6157#[cfg_attr(test, assert_instr(vfnmadd))]
6158#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6159pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6160 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6161}
6162
6163/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6164/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6165/// from a when the corresponding mask bit is not set).
6166///
6167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6168#[inline]
6169#[target_feature(enable = "avx512fp16,avx512vl")]
6170#[cfg_attr(test, assert_instr(vfnmadd))]
6171#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6172pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6173 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6174}
6175
6176/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6177/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6178/// from c when the corresponding mask bit is not set).
6179///
6180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6181#[inline]
6182#[target_feature(enable = "avx512fp16,avx512vl")]
6183#[cfg_attr(test, assert_instr(vfnmadd))]
6184#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6185pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6186 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6187}
6188
6189/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6190/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6191/// out when the corresponding mask bit is not set).
6192///
6193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6194#[inline]
6195#[target_feature(enable = "avx512fp16,avx512vl")]
6196#[cfg_attr(test, assert_instr(vfnmadd))]
6197#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6198pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6199 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6200}
6201
6202/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6203/// result from packed elements in c, and store the results in dst.
6204///
6205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6206#[inline]
6207#[target_feature(enable = "avx512fp16")]
6208#[cfg_attr(test, assert_instr(vfnmadd))]
6209#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6210pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6211 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6212}
6213
6214/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6215/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6216/// from a when the corresponding mask bit is not set).
6217///
6218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6219#[inline]
6220#[target_feature(enable = "avx512fp16")]
6221#[cfg_attr(test, assert_instr(vfnmadd))]
6222#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6223pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6224 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6225}
6226
6227/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6228/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6229/// from c when the corresponding mask bit is not set).
6230///
6231/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6232#[inline]
6233#[target_feature(enable = "avx512fp16")]
6234#[cfg_attr(test, assert_instr(vfnmadd))]
6235#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6236pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6237 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6238}
6239
6240/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6241/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6242/// out when the corresponding mask bit is not set).
6243///
6244/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6245#[inline]
6246#[target_feature(enable = "avx512fp16")]
6247#[cfg_attr(test, assert_instr(vfnmadd))]
6248#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6249pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6250 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6251}
6252
6253/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6254/// result from packed elements in c, and store the results in dst.
6255///
6256/// Rounding is done according to the rounding parameter, which can be one of:
6257///
6258/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6259/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6260/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6261/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6263///
6264/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6265#[inline]
6266#[target_feature(enable = "avx512fp16")]
6267#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6268#[rustc_legacy_const_generics(3)]
6269#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6270pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6271 unsafe {
6272 static_assert_rounding!(ROUNDING);
6273 vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6274 }
6275}
6276
6277/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6278/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6279/// from a when the corresponding mask bit is not set).
6280///
6281/// Rounding is done according to the rounding parameter, which can be one of:
6282///
6283/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6284/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6285/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6286/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6288///
6289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6290#[inline]
6291#[target_feature(enable = "avx512fp16")]
6292#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6293#[rustc_legacy_const_generics(4)]
6294#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6295pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6296 a: __m512h,
6297 k: __mmask32,
6298 b: __m512h,
6299 c: __m512h,
6300) -> __m512h {
6301 unsafe {
6302 static_assert_rounding!(ROUNDING);
6303 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6304 }
6305}
6306
6307/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6308/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6309/// from c when the corresponding mask bit is not set).
6310///
6311/// Rounding is done according to the rounding parameter, which can be one of:
6312///
6313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6318///
6319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6320#[inline]
6321#[target_feature(enable = "avx512fp16")]
6322#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6323#[rustc_legacy_const_generics(4)]
6324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6325pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6326 a: __m512h,
6327 b: __m512h,
6328 c: __m512h,
6329 k: __mmask32,
6330) -> __m512h {
6331 unsafe {
6332 static_assert_rounding!(ROUNDING);
6333 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6334 }
6335}
6336
6337/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6338/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6339/// out when the corresponding mask bit is not set).
6340///
6341/// Rounding is done according to the rounding parameter, which can be one of:
6342///
6343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6348///
6349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6350#[inline]
6351#[target_feature(enable = "avx512fp16")]
6352#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6353#[rustc_legacy_const_generics(4)]
6354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6355pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6356 k: __mmask32,
6357 a: __m512h,
6358 b: __m512h,
6359 c: __m512h,
6360) -> __m512h {
6361 unsafe {
6362 static_assert_rounding!(ROUNDING);
6363 simd_select_bitmask(
6364 m:k,
6365 yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6366 no:_mm512_setzero_ph(),
6367 )
6368 }
6369}
6370
6371/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6372/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6373/// elements from a to the upper elements of dst.
6374///
6375/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6376#[inline]
6377#[target_feature(enable = "avx512fp16")]
6378#[cfg_attr(test, assert_instr(vfnmadd))]
6379#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6380pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6381 unsafe {
6382 let extracta: f16 = simd_extract!(a, 0);
6383 let extractb: f16 = simd_extract!(b, 0);
6384 let extractc: f16 = simd_extract!(c, 0);
6385 let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6386 simd_insert!(a, 0, r)
6387 }
6388}
6389
6390/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6391/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6392/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6393/// elements of dst.
6394///
6395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6396#[inline]
6397#[target_feature(enable = "avx512fp16")]
6398#[cfg_attr(test, assert_instr(vfnmadd))]
6399#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6400pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6401 unsafe {
6402 let mut fnmadd: f16 = simd_extract!(a, 0);
6403 if k & 1 != 0 {
6404 let extractb: f16 = simd_extract!(b, 0);
6405 let extractc: f16 = simd_extract!(c, 0);
6406 fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6407 }
6408 simd_insert!(a, 0, fnmadd)
6409 }
6410}
6411
6412/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6413/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6414/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6415/// elements of dst.
6416///
6417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6418#[inline]
6419#[target_feature(enable = "avx512fp16")]
6420#[cfg_attr(test, assert_instr(vfnmadd))]
6421#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6422pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6423 unsafe {
6424 let mut fnmadd: f16 = simd_extract!(c, 0);
6425 if k & 1 != 0 {
6426 let extracta: f16 = simd_extract!(a, 0);
6427 let extractb: f16 = simd_extract!(b, 0);
6428 fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6429 }
6430 simd_insert!(c, 0, fnmadd)
6431 }
6432}
6433
6434/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6435/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6436/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6437/// elements of dst.
6438///
6439/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6440#[inline]
6441#[target_feature(enable = "avx512fp16")]
6442#[cfg_attr(test, assert_instr(vfnmadd))]
6443#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6444pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6445 unsafe {
6446 let mut fnmadd: f16 = 0.0;
6447 if k & 1 != 0 {
6448 let extracta: f16 = simd_extract!(a, 0);
6449 let extractb: f16 = simd_extract!(b, 0);
6450 let extractc: f16 = simd_extract!(c, 0);
6451 fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6452 }
6453 simd_insert!(a, 0, fnmadd)
6454 }
6455}
6456
6457/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6458/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6459/// elements from a to the upper elements of dst.
6460///
6461/// Rounding is done according to the rounding parameter, which can be one of:
6462///
6463/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6464/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6465/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6466/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6467/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6468///
6469/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6470#[inline]
6471#[target_feature(enable = "avx512fp16")]
6472#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6473#[rustc_legacy_const_generics(3)]
6474#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6475pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6476 unsafe {
6477 static_assert_rounding!(ROUNDING);
6478 let extracta: f16 = simd_extract!(a, 0);
6479 let extractb: f16 = simd_extract!(b, 0);
6480 let extractc: f16 = simd_extract!(c, 0);
6481 let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6482 simd_insert!(a, 0, r)
6483 }
6484}
6485
6486/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6487/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6488/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6489/// elements of dst.
6490///
6491/// Rounding is done according to the rounding parameter, which can be one of:
6492///
6493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6498///
6499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6500#[inline]
6501#[target_feature(enable = "avx512fp16")]
6502#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6503#[rustc_legacy_const_generics(4)]
6504#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6505pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6506 a: __m128h,
6507 k: __mmask8,
6508 b: __m128h,
6509 c: __m128h,
6510) -> __m128h {
6511 unsafe {
6512 static_assert_rounding!(ROUNDING);
6513 let mut fnmadd: f16 = simd_extract!(a, 0);
6514 if k & 1 != 0 {
6515 let extractb: f16 = simd_extract!(b, 0);
6516 let extractc: f16 = simd_extract!(c, 0);
6517 fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6518 }
6519 simd_insert!(a, 0, fnmadd)
6520 }
6521}
6522
6523/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6524/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6525/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6526/// elements of dst.
6527///
6528/// Rounding is done according to the rounding parameter, which can be one of:
6529///
6530/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6531/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6532/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6533/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6534/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6535///
6536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6537#[inline]
6538#[target_feature(enable = "avx512fp16")]
6539#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6540#[rustc_legacy_const_generics(4)]
6541#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6542pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6543 a: __m128h,
6544 b: __m128h,
6545 c: __m128h,
6546 k: __mmask8,
6547) -> __m128h {
6548 unsafe {
6549 static_assert_rounding!(ROUNDING);
6550 let mut fnmadd: f16 = simd_extract!(c, 0);
6551 if k & 1 != 0 {
6552 let extracta: f16 = simd_extract!(a, 0);
6553 let extractb: f16 = simd_extract!(b, 0);
6554 fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6555 }
6556 simd_insert!(c, 0, fnmadd)
6557 }
6558}
6559
6560/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6561/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6562/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6563/// elements of dst.
6564///
6565/// Rounding is done according to the rounding parameter, which can be one of:
6566///
6567/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6568/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6569/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6570/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6571/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6572///
6573/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6574#[inline]
6575#[target_feature(enable = "avx512fp16")]
6576#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6577#[rustc_legacy_const_generics(4)]
6578#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6579pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6580 k: __mmask8,
6581 a: __m128h,
6582 b: __m128h,
6583 c: __m128h,
6584) -> __m128h {
6585 unsafe {
6586 static_assert_rounding!(ROUNDING);
6587 let mut fnmadd: f16 = 0.0;
6588 if k & 1 != 0 {
6589 let extracta: f16 = simd_extract!(a, 0);
6590 let extractb: f16 = simd_extract!(b, 0);
6591 let extractc: f16 = simd_extract!(c, 0);
6592 fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6593 }
6594 simd_insert!(a, 0, fnmadd)
6595 }
6596}
6597
6598/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6599/// in c from the negated intermediate result, and store the results in dst.
6600///
6601/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6602#[inline]
6603#[target_feature(enable = "avx512fp16,avx512vl")]
6604#[cfg_attr(test, assert_instr(vfnmsub))]
6605#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6606pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6607 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6608}
6609
6610/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6611/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6612/// copied from a when the corresponding mask bit is not set).
6613///
6614/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6615#[inline]
6616#[target_feature(enable = "avx512fp16,avx512vl")]
6617#[cfg_attr(test, assert_instr(vfnmsub))]
6618#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6619pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6620 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6621}
6622
6623/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6624/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6625/// copied from c when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6628#[inline]
6629#[target_feature(enable = "avx512fp16,avx512vl")]
6630#[cfg_attr(test, assert_instr(vfnmsub))]
6631#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6632pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6633 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6634}
6635
6636/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6637/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6638/// zeroed out when the corresponding mask bit is not set).
6639///
6640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6641#[inline]
6642#[target_feature(enable = "avx512fp16,avx512vl")]
6643#[cfg_attr(test, assert_instr(vfnmsub))]
6644#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6645pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6646 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6647}
6648
6649/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6650/// in c from the negated intermediate result, and store the results in dst.
6651///
6652/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6653#[inline]
6654#[target_feature(enable = "avx512fp16,avx512vl")]
6655#[cfg_attr(test, assert_instr(vfnmsub))]
6656#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6657pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6658 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6659}
6660
6661/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6662/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6663/// copied from a when the corresponding mask bit is not set).
6664///
6665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6666#[inline]
6667#[target_feature(enable = "avx512fp16,avx512vl")]
6668#[cfg_attr(test, assert_instr(vfnmsub))]
6669#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6670pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6671 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6672}
6673
6674/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6675/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6676/// copied from c when the corresponding mask bit is not set).
6677///
6678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6679#[inline]
6680#[target_feature(enable = "avx512fp16,avx512vl")]
6681#[cfg_attr(test, assert_instr(vfnmsub))]
6682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6683pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6684 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6685}
6686
6687/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6688/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6689/// zeroed out when the corresponding mask bit is not set).
6690///
6691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6692#[inline]
6693#[target_feature(enable = "avx512fp16,avx512vl")]
6694#[cfg_attr(test, assert_instr(vfnmsub))]
6695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6696pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6697 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6698}
6699
6700/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6701/// in c from the negated intermediate result, and store the results in dst.
6702///
6703/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6704#[inline]
6705#[target_feature(enable = "avx512fp16")]
6706#[cfg_attr(test, assert_instr(vfnmsub))]
6707#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6708pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6709 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6710}
6711
6712/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6713/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6714/// copied from a when the corresponding mask bit is not set).
6715///
6716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6717#[inline]
6718#[target_feature(enable = "avx512fp16")]
6719#[cfg_attr(test, assert_instr(vfnmsub))]
6720#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6721pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6722 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6723}
6724
6725/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6726/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6727/// copied from c when the corresponding mask bit is not set).
6728///
6729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6730#[inline]
6731#[target_feature(enable = "avx512fp16")]
6732#[cfg_attr(test, assert_instr(vfnmsub))]
6733#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6734pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6735 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6736}
6737
6738/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6739/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6740/// zeroed out when the corresponding mask bit is not set).
6741///
6742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
6743#[inline]
6744#[target_feature(enable = "avx512fp16")]
6745#[cfg_attr(test, assert_instr(vfnmsub))]
6746#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6747pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6748 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
6749}
6750
6751/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6752/// in c from the negated intermediate result, and store the results in dst.
6753///
6754/// Rounding is done according to the rounding parameter, which can be one of:
6755///
6756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6761///
6762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
6763#[inline]
6764#[target_feature(enable = "avx512fp16")]
6765#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6766#[rustc_legacy_const_generics(3)]
6767#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6768pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6769 unsafe {
6770 static_assert_rounding!(ROUNDING);
6771 vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
6772 }
6773}
6774
6775/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6776/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6777/// copied from a when the corresponding mask bit is not set).
6778///
6779/// Rounding is done according to the rounding parameter, which can be one of:
6780///
6781/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6782/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6783/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6784/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6785/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6786///
6787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
6788#[inline]
6789#[target_feature(enable = "avx512fp16")]
6790#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6791#[rustc_legacy_const_generics(4)]
6792#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6793pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
6794 a: __m512h,
6795 k: __mmask32,
6796 b: __m512h,
6797 c: __m512h,
6798) -> __m512h {
6799 unsafe {
6800 static_assert_rounding!(ROUNDING);
6801 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6802 }
6803}
6804
6805/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6806/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6807/// copied from c when the corresponding mask bit is not set).
6808///
6809/// Rounding is done according to the rounding parameter, which can be one of:
6810///
6811/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6812/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6813/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6814/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6815/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6816///
6817/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
6818#[inline]
6819#[target_feature(enable = "avx512fp16")]
6820#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6821#[rustc_legacy_const_generics(4)]
6822#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6823pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
6824 a: __m512h,
6825 b: __m512h,
6826 c: __m512h,
6827 k: __mmask32,
6828) -> __m512h {
6829 unsafe {
6830 static_assert_rounding!(ROUNDING);
6831 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6832 }
6833}
6834
6835/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6836/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6837/// zeroed out when the corresponding mask bit is not set).
6838///
6839/// Rounding is done according to the rounding parameter, which can be one of:
6840///
6841/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6842/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6843/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6844/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6845/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6846///
6847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
6848#[inline]
6849#[target_feature(enable = "avx512fp16")]
6850#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6851#[rustc_legacy_const_generics(4)]
6852#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6853pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
6854 k: __mmask32,
6855 a: __m512h,
6856 b: __m512h,
6857 c: __m512h,
6858) -> __m512h {
6859 unsafe {
6860 static_assert_rounding!(ROUNDING);
6861 simd_select_bitmask(
6862 m:k,
6863 yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
6864 no:_mm512_setzero_ph(),
6865 )
6866 }
6867}
6868
6869/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6870/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6871/// elements from a to the upper elements of dst.
6872///
6873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
6874#[inline]
6875#[target_feature(enable = "avx512fp16")]
6876#[cfg_attr(test, assert_instr(vfnmsub))]
6877#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6878pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6879 unsafe {
6880 let extracta: f16 = simd_extract!(a, 0);
6881 let extractb: f16 = simd_extract!(b, 0);
6882 let extractc: f16 = simd_extract!(c, 0);
6883 let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
6884 simd_insert!(a, 0, r)
6885 }
6886}
6887
6888/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6889/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6890/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6891/// elements of dst.
6892///
6893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
6894#[inline]
6895#[target_feature(enable = "avx512fp16")]
6896#[cfg_attr(test, assert_instr(vfnmsub))]
6897#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6898pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6899 unsafe {
6900 let mut fnmsub: f16 = simd_extract!(a, 0);
6901 if k & 1 != 0 {
6902 let extractb: f16 = simd_extract!(b, 0);
6903 let extractc: f16 = simd_extract!(c, 0);
6904 fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
6905 }
6906 simd_insert!(a, 0, fnmsub)
6907 }
6908}
6909
6910/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6911/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6912/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6913/// elements of dst.
6914///
6915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
6916#[inline]
6917#[target_feature(enable = "avx512fp16")]
6918#[cfg_attr(test, assert_instr(vfnmsub))]
6919#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6920pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6921 unsafe {
6922 let mut fnmsub: f16 = simd_extract!(c, 0);
6923 if k & 1 != 0 {
6924 let extracta: f16 = simd_extract!(a, 0);
6925 let extractb: f16 = simd_extract!(b, 0);
6926 fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
6927 }
6928 simd_insert!(c, 0, fnmsub)
6929 }
6930}
6931
6932/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6933/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6934/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6935/// elements of dst.
6936///
6937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
6938#[inline]
6939#[target_feature(enable = "avx512fp16")]
6940#[cfg_attr(test, assert_instr(vfnmsub))]
6941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6942pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6943 unsafe {
6944 let mut fnmsub: f16 = 0.0;
6945 if k & 1 != 0 {
6946 let extracta: f16 = simd_extract!(a, 0);
6947 let extractb: f16 = simd_extract!(b, 0);
6948 let extractc: f16 = simd_extract!(c, 0);
6949 fnmsub = fmaf16(-extracta, b:extractb, -extractc);
6950 }
6951 simd_insert!(a, 0, fnmsub)
6952 }
6953}
6954
6955/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6956/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6957/// elements from a to the upper elements of dst.
6958///
6959/// Rounding is done according to the rounding parameter, which can be one of:
6960///
6961/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6962/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6963/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6964/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6965/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6966///
6967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
6968#[inline]
6969#[target_feature(enable = "avx512fp16")]
6970#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6971#[rustc_legacy_const_generics(3)]
6972#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6973pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6974 unsafe {
6975 static_assert_rounding!(ROUNDING);
6976 let extracta: f16 = simd_extract!(a, 0);
6977 let extractb: f16 = simd_extract!(b, 0);
6978 let extractc: f16 = simd_extract!(c, 0);
6979 let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
6980 simd_insert!(a, 0, r)
6981 }
6982}
6983
6984/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6985/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6986/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6987/// elements of dst.
6988///
6989/// Rounding is done according to the rounding parameter, which can be one of:
6990///
6991/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6992/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6993/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6994/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6995/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6996///
6997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
6998#[inline]
6999#[target_feature(enable = "avx512fp16")]
7000#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7001#[rustc_legacy_const_generics(4)]
7002#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7003pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7004 a: __m128h,
7005 k: __mmask8,
7006 b: __m128h,
7007 c: __m128h,
7008) -> __m128h {
7009 unsafe {
7010 static_assert_rounding!(ROUNDING);
7011 let mut fnmsub: f16 = simd_extract!(a, 0);
7012 if k & 1 != 0 {
7013 let extractb: f16 = simd_extract!(b, 0);
7014 let extractc: f16 = simd_extract!(c, 0);
7015 fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7016 }
7017 simd_insert!(a, 0, fnmsub)
7018 }
7019}
7020
7021/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7022/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7023/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7024/// elements of dst.
7025///
7026/// Rounding is done according to the rounding parameter, which can be one of:
7027///
7028/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7029/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7030/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7031/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7032/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7033///
7034/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7035#[inline]
7036#[target_feature(enable = "avx512fp16")]
7037#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7038#[rustc_legacy_const_generics(4)]
7039#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7040pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7041 a: __m128h,
7042 b: __m128h,
7043 c: __m128h,
7044 k: __mmask8,
7045) -> __m128h {
7046 unsafe {
7047 static_assert_rounding!(ROUNDING);
7048 let mut fnmsub: f16 = simd_extract!(c, 0);
7049 if k & 1 != 0 {
7050 let extracta: f16 = simd_extract!(a, 0);
7051 let extractb: f16 = simd_extract!(b, 0);
7052 fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7053 }
7054 simd_insert!(c, 0, fnmsub)
7055 }
7056}
7057
7058/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7059/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7060/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7061/// elements of dst.
7062///
7063/// Rounding is done according to the rounding parameter, which can be one of:
7064///
7065/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7066/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7067/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7068/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7069/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7070///
7071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7072#[inline]
7073#[target_feature(enable = "avx512fp16")]
7074#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7075#[rustc_legacy_const_generics(4)]
7076#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7077pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7078 k: __mmask8,
7079 a: __m128h,
7080 b: __m128h,
7081 c: __m128h,
7082) -> __m128h {
7083 unsafe {
7084 static_assert_rounding!(ROUNDING);
7085 let mut fnmsub: f16 = 0.0;
7086 if k & 1 != 0 {
7087 let extracta: f16 = simd_extract!(a, 0);
7088 let extractb: f16 = simd_extract!(b, 0);
7089 let extractc: f16 = simd_extract!(c, 0);
7090 fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7091 }
7092 simd_insert!(a, 0, fnmsub)
7093 }
7094}
7095
7096/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7097/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7098///
7099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7100#[inline]
7101#[target_feature(enable = "avx512fp16,avx512vl")]
7102#[cfg_attr(test, assert_instr(vfmaddsub))]
7103#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7104pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7105 unsafe { vfmaddsubph_128(a, b, c) }
7106}
7107
7108/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7109/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7110/// (the element is copied from a when the corresponding mask bit is not set).
7111///
7112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7113#[inline]
7114#[target_feature(enable = "avx512fp16,avx512vl")]
7115#[cfg_attr(test, assert_instr(vfmaddsub))]
7116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7117pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7118 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7119}
7120
7121/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7122/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7123/// (the element is copied from c when the corresponding mask bit is not set).
7124///
7125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7126#[inline]
7127#[target_feature(enable = "avx512fp16,avx512vl")]
7128#[cfg_attr(test, assert_instr(vfmaddsub))]
7129#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7130pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7131 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7132}
7133
7134/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7135/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7136/// (the element is zeroed out when the corresponding mask bit is not set).
7137///
7138/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7139#[inline]
7140#[target_feature(enable = "avx512fp16,avx512vl")]
7141#[cfg_attr(test, assert_instr(vfmaddsub))]
7142#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7143pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7144 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7145}
7146
7147/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7148/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7149///
7150/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7151#[inline]
7152#[target_feature(enable = "avx512fp16,avx512vl")]
7153#[cfg_attr(test, assert_instr(vfmaddsub))]
7154#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7155pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7156 unsafe { vfmaddsubph_256(a, b, c) }
7157}
7158
7159/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7160/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7161/// (the element is copied from a when the corresponding mask bit is not set).
7162///
7163/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7164#[inline]
7165#[target_feature(enable = "avx512fp16,avx512vl")]
7166#[cfg_attr(test, assert_instr(vfmaddsub))]
7167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7168pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7169 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7170}
7171
7172/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7173/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7174/// (the element is copied from c when the corresponding mask bit is not set).
7175///
7176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7177#[inline]
7178#[target_feature(enable = "avx512fp16,avx512vl")]
7179#[cfg_attr(test, assert_instr(vfmaddsub))]
7180#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7181pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7182 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7183}
7184
7185/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7186/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7187/// (the element is zeroed out when the corresponding mask bit is not set).
7188///
7189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7190#[inline]
7191#[target_feature(enable = "avx512fp16,avx512vl")]
7192#[cfg_attr(test, assert_instr(vfmaddsub))]
7193#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7194pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7195 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7196}
7197
7198/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7199/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7200///
7201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7202#[inline]
7203#[target_feature(enable = "avx512fp16")]
7204#[cfg_attr(test, assert_instr(vfmaddsub))]
7205#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7206pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7207 _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7208}
7209
7210/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7211/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7212/// (the element is copied from a when the corresponding mask bit is not set).
7213///
7214/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7215#[inline]
7216#[target_feature(enable = "avx512fp16")]
7217#[cfg_attr(test, assert_instr(vfmaddsub))]
7218#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7219pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7220 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7221}
7222
7223/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7224/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7225/// (the element is copied from c when the corresponding mask bit is not set).
7226///
7227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7228#[inline]
7229#[target_feature(enable = "avx512fp16")]
7230#[cfg_attr(test, assert_instr(vfmaddsub))]
7231#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7232pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7233 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7234}
7235
7236/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7237/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7238/// (the element is zeroed out when the corresponding mask bit is not set).
7239///
7240/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7241#[inline]
7242#[target_feature(enable = "avx512fp16")]
7243#[cfg_attr(test, assert_instr(vfmaddsub))]
7244#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7245pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7246 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7247}
7248
7249/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7250/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7251///
7252/// Rounding is done according to the rounding parameter, which can be one of:
7253///
7254/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7255/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7256/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7257/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7258/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7259///
7260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7261#[inline]
7262#[target_feature(enable = "avx512fp16")]
7263#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7264#[rustc_legacy_const_generics(3)]
7265#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7266pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7267 a: __m512h,
7268 b: __m512h,
7269 c: __m512h,
7270) -> __m512h {
7271 unsafe {
7272 static_assert_rounding!(ROUNDING);
7273 vfmaddsubph_512(a, b, c, ROUNDING)
7274 }
7275}
7276
7277/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7278/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7279/// (the element is copied from a when the corresponding mask bit is not set).
7280///
7281/// Rounding is done according to the rounding parameter, which can be one of:
7282///
7283/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7284/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7285/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7286/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7288///
7289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7290#[inline]
7291#[target_feature(enable = "avx512fp16")]
7292#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7293#[rustc_legacy_const_generics(4)]
7294#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7295pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7296 a: __m512h,
7297 k: __mmask32,
7298 b: __m512h,
7299 c: __m512h,
7300) -> __m512h {
7301 unsafe {
7302 static_assert_rounding!(ROUNDING);
7303 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7304 }
7305}
7306
7307/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7308/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7309/// (the element is copied from c when the corresponding mask bit is not set).
7310///
7311/// Rounding is done according to the rounding parameter, which can be one of:
7312///
7313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7318///
7319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7320#[inline]
7321#[target_feature(enable = "avx512fp16")]
7322#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7323#[rustc_legacy_const_generics(4)]
7324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7325pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7326 a: __m512h,
7327 b: __m512h,
7328 c: __m512h,
7329 k: __mmask32,
7330) -> __m512h {
7331 unsafe {
7332 static_assert_rounding!(ROUNDING);
7333 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7334 }
7335}
7336
7337/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7338/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7339/// (the element is zeroed out when the corresponding mask bit is not set).
7340///
7341/// Rounding is done according to the rounding parameter, which can be one of:
7342///
7343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7348///
7349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7350#[inline]
7351#[target_feature(enable = "avx512fp16")]
7352#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7353#[rustc_legacy_const_generics(4)]
7354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7355pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7356 k: __mmask32,
7357 a: __m512h,
7358 b: __m512h,
7359 c: __m512h,
7360) -> __m512h {
7361 unsafe {
7362 static_assert_rounding!(ROUNDING);
7363 simd_select_bitmask(
7364 m:k,
7365 yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7366 no:_mm512_setzero_ph(),
7367 )
7368 }
7369}
7370
7371/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7372/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7373///
7374/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7375#[inline]
7376#[target_feature(enable = "avx512fp16,avx512vl")]
7377#[cfg_attr(test, assert_instr(vfmsubadd))]
7378#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7379pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7380 unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) }
7381}
7382
7383/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7384/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7385/// (the element is copied from a when the corresponding mask bit is not set).
7386///
7387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7388#[inline]
7389#[target_feature(enable = "avx512fp16,avx512vl")]
7390#[cfg_attr(test, assert_instr(vfmsubadd))]
7391#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7392pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7393 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7394}
7395
7396/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7397/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7398/// (the element is copied from c when the corresponding mask bit is not set).
7399///
7400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7401#[inline]
7402#[target_feature(enable = "avx512fp16,avx512vl")]
7403#[cfg_attr(test, assert_instr(vfmsubadd))]
7404#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7405pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7406 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7407}
7408
7409/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7410/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7411/// (the element is zeroed out when the corresponding mask bit is not set).
7412///
7413/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7414#[inline]
7415#[target_feature(enable = "avx512fp16,avx512vl")]
7416#[cfg_attr(test, assert_instr(vfmsubadd))]
7417#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7418pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7419 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7420}
7421
7422/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7423/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7424///
7425/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7426#[inline]
7427#[target_feature(enable = "avx512fp16,avx512vl")]
7428#[cfg_attr(test, assert_instr(vfmsubadd))]
7429#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7430pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7431 unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) }
7432}
7433
7434/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7435/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7436/// (the element is copied from a when the corresponding mask bit is not set).
7437///
7438/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7439#[inline]
7440#[target_feature(enable = "avx512fp16,avx512vl")]
7441#[cfg_attr(test, assert_instr(vfmsubadd))]
7442#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7443pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7444 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7445}
7446
7447/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7448/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7449/// (the element is copied from c when the corresponding mask bit is not set).
7450///
7451/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7452#[inline]
7453#[target_feature(enable = "avx512fp16,avx512vl")]
7454#[cfg_attr(test, assert_instr(vfmsubadd))]
7455#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7456pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7457 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7458}
7459
7460/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7461/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7462/// (the element is zeroed out when the corresponding mask bit is not set).
7463///
7464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7465#[inline]
7466#[target_feature(enable = "avx512fp16,avx512vl")]
7467#[cfg_attr(test, assert_instr(vfmsubadd))]
7468#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7469pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7470 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7471}
7472
7473/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7474/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7475///
7476/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7477#[inline]
7478#[target_feature(enable = "avx512fp16")]
7479#[cfg_attr(test, assert_instr(vfmsubadd))]
7480#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7481pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7482 _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7483}
7484
7485/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7486/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7487/// (the element is copied from a when the corresponding mask bit is not set).
7488///
7489/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7490#[inline]
7491#[target_feature(enable = "avx512fp16")]
7492#[cfg_attr(test, assert_instr(vfmsubadd))]
7493#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7494pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7495 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7496}
7497
7498/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7499/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7500/// (the element is copied from c when the corresponding mask bit is not set).
7501///
7502/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7503#[inline]
7504#[target_feature(enable = "avx512fp16")]
7505#[cfg_attr(test, assert_instr(vfmsubadd))]
7506#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7507pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7508 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7509}
7510
7511/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7512/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7513/// (the element is zeroed out when the corresponding mask bit is not set).
7514///
7515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7516#[inline]
7517#[target_feature(enable = "avx512fp16")]
7518#[cfg_attr(test, assert_instr(vfmsubadd))]
7519#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7520pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7521 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7522}
7523
7524/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7525/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7526///
7527/// Rounding is done according to the rounding parameter, which can be one of:
7528///
7529/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7530/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7531/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7532/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7533/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7534///
7535/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7536#[inline]
7537#[target_feature(enable = "avx512fp16")]
7538#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7539#[rustc_legacy_const_generics(3)]
7540#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7541pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7542 a: __m512h,
7543 b: __m512h,
7544 c: __m512h,
7545) -> __m512h {
7546 unsafe {
7547 static_assert_rounding!(ROUNDING);
7548 vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7549 }
7550}
7551
7552/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7553/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7554/// (the element is copied from a when the corresponding mask bit is not set).
7555///
7556/// Rounding is done according to the rounding parameter, which can be one of:
7557///
7558/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7559/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7560/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7561/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7562/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7563///
7564/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7565#[inline]
7566#[target_feature(enable = "avx512fp16")]
7567#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7568#[rustc_legacy_const_generics(4)]
7569#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7570pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7571 a: __m512h,
7572 k: __mmask32,
7573 b: __m512h,
7574 c: __m512h,
7575) -> __m512h {
7576 unsafe {
7577 static_assert_rounding!(ROUNDING);
7578 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7579 }
7580}
7581
7582/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7583/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7584/// (the element is copied from c when the corresponding mask bit is not set).
7585///
7586/// Rounding is done according to the rounding parameter, which can be one of:
7587///
7588/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7589/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7590/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7591/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7592/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7593///
7594/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7595#[inline]
7596#[target_feature(enable = "avx512fp16")]
7597#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7598#[rustc_legacy_const_generics(4)]
7599#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7600pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7601 a: __m512h,
7602 b: __m512h,
7603 c: __m512h,
7604 k: __mmask32,
7605) -> __m512h {
7606 unsafe {
7607 static_assert_rounding!(ROUNDING);
7608 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7609 }
7610}
7611
7612/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7613/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7614/// (the element is zeroed out when the corresponding mask bit is not set).
7615///
7616/// Rounding is done according to the rounding parameter, which can be one of:
7617///
7618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7623///
7624/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7625#[inline]
7626#[target_feature(enable = "avx512fp16")]
7627#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7628#[rustc_legacy_const_generics(4)]
7629#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7630pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7631 k: __mmask32,
7632 a: __m512h,
7633 b: __m512h,
7634 c: __m512h,
7635) -> __m512h {
7636 unsafe {
7637 static_assert_rounding!(ROUNDING);
7638 simd_select_bitmask(
7639 m:k,
7640 yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7641 no:_mm512_setzero_ph(),
7642 )
7643 }
7644}
7645
7646/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7647/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7648///
7649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7650#[inline]
7651#[target_feature(enable = "avx512fp16,avx512vl")]
7652#[cfg_attr(test, assert_instr(vrcpph))]
7653#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7654pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7655 _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a)
7656}
7657
7658/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7659/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7660/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7661///
7662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7663#[inline]
7664#[target_feature(enable = "avx512fp16,avx512vl")]
7665#[cfg_attr(test, assert_instr(vrcpph))]
7666#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7667pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7668 unsafe { vrcpph_128(a, src, k) }
7669}
7670
7671/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7672/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7673/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7674///
7675/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7676#[inline]
7677#[target_feature(enable = "avx512fp16,avx512vl")]
7678#[cfg_attr(test, assert_instr(vrcpph))]
7679#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7680pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7681 _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7682}
7683
7684/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7685/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7686///
7687/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
7688#[inline]
7689#[target_feature(enable = "avx512fp16,avx512vl")]
7690#[cfg_attr(test, assert_instr(vrcpph))]
7691#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7692pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
7693 _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7694}
7695
7696/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7697/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7698/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7699///
7700/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
7701#[inline]
7702#[target_feature(enable = "avx512fp16,avx512vl")]
7703#[cfg_attr(test, assert_instr(vrcpph))]
7704#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7705pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7706 unsafe { vrcpph_256(a, src, k) }
7707}
7708
7709/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7710/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7711/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7712///
7713/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
7714#[inline]
7715#[target_feature(enable = "avx512fp16,avx512vl")]
7716#[cfg_attr(test, assert_instr(vrcpph))]
7717#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7718pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
7719 _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
7720}
7721
7722/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7723/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7724///
7725/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
7726#[inline]
7727#[target_feature(enable = "avx512fp16")]
7728#[cfg_attr(test, assert_instr(vrcpph))]
7729#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7730pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
7731 _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
7732}
7733
7734/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7735/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7736/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7737///
7738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
7739#[inline]
7740#[target_feature(enable = "avx512fp16")]
7741#[cfg_attr(test, assert_instr(vrcpph))]
7742#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7743pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7744 unsafe { vrcpph_512(a, src, k) }
7745}
7746
7747/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7748/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7749/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7750///
7751/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
7752#[inline]
7753#[target_feature(enable = "avx512fp16")]
7754#[cfg_attr(test, assert_instr(vrcpph))]
7755#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7756pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
7757 _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
7758}
7759
7760/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7761/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
7762/// upper elements of dst.
7763/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7764///
7765/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
7766#[inline]
7767#[target_feature(enable = "avx512fp16")]
7768#[cfg_attr(test, assert_instr(vrcpsh))]
7769#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7770pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
7771 _mm_mask_rcp_sh(src:_mm_undefined_ph(), k:0xff, a, b)
7772}
7773
7774/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7775/// store the result in the lower element of dst using writemask k (the element is copied from src when
7776/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7777/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7778///
7779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
7780#[inline]
7781#[target_feature(enable = "avx512fp16")]
7782#[cfg_attr(test, assert_instr(vrcpsh))]
7783#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7784pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7785 unsafe { vrcpsh(a, b, src, k) }
7786}
7787
7788/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7789/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
7790/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7791/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7792///
7793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
7794#[inline]
7795#[target_feature(enable = "avx512fp16")]
7796#[cfg_attr(test, assert_instr(vrcpsh))]
7797#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7798pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7799 _mm_mask_rcp_sh(src:_mm_setzero_ph(), k, a, b)
7800}
7801
7802/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7803/// elements in a, and store the results in dst.
7804/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7805///
7806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
7807#[inline]
7808#[target_feature(enable = "avx512fp16,avx512vl")]
7809#[cfg_attr(test, assert_instr(vrsqrtph))]
7810#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7811pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
7812 _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a)
7813}
7814
7815/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7816/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7817/// the corresponding mask bit is not set).
7818/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7819///
7820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
7821#[inline]
7822#[target_feature(enable = "avx512fp16,avx512vl")]
7823#[cfg_attr(test, assert_instr(vrsqrtph))]
7824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7825pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7826 unsafe { vrsqrtph_128(a, src, k) }
7827}
7828
7829/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7830/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7831/// corresponding mask bit is not set).
7832/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7833///
7834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
7835#[inline]
7836#[target_feature(enable = "avx512fp16,avx512vl")]
7837#[cfg_attr(test, assert_instr(vrsqrtph))]
7838#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7839pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
7840 _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
7841}
7842
7843/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7844/// elements in a, and store the results in dst.
7845/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7846///
7847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
7848#[inline]
7849#[target_feature(enable = "avx512fp16,avx512vl")]
7850#[cfg_attr(test, assert_instr(vrsqrtph))]
7851#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7852pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
7853 _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7854}
7855
7856/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7857/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7858/// the corresponding mask bit is not set).
7859/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7860///
7861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
7862#[inline]
7863#[target_feature(enable = "avx512fp16,avx512vl")]
7864#[cfg_attr(test, assert_instr(vrsqrtph))]
7865#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7866pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7867 unsafe { vrsqrtph_256(a, src, k) }
7868}
7869
7870/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7871/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7872/// corresponding mask bit is not set).
7873/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7874///
7875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
7876#[inline]
7877#[target_feature(enable = "avx512fp16,avx512vl")]
7878#[cfg_attr(test, assert_instr(vrsqrtph))]
7879#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7880pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
7881 _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
7882}
7883
7884/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7885/// elements in a, and store the results in dst.
7886/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7887///
7888/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
7889#[inline]
7890#[target_feature(enable = "avx512fp16")]
7891#[cfg_attr(test, assert_instr(vrsqrtph))]
7892#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7893pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
7894 _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
7895}
7896
7897/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7898/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7899/// the corresponding mask bit is not set).
7900/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7901///
7902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
7903#[inline]
7904#[target_feature(enable = "avx512fp16")]
7905#[cfg_attr(test, assert_instr(vrsqrtph))]
7906#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7907pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7908 unsafe { vrsqrtph_512(a, src, k) }
7909}
7910
7911/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7912/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7913/// corresponding mask bit is not set).
7914/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7915///
7916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
7917#[inline]
7918#[target_feature(enable = "avx512fp16")]
7919#[cfg_attr(test, assert_instr(vrsqrtph))]
7920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7921pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
7922 _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
7923}
7924
7925/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7926/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
7927/// to the upper elements of dst.
7928/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7929///
7930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
7931#[inline]
7932#[target_feature(enable = "avx512fp16")]
7933#[cfg_attr(test, assert_instr(vrsqrtsh))]
7934#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7935pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
7936 _mm_mask_rsqrt_sh(src:_mm_undefined_ph(), k:0xff, a, b)
7937}
7938
7939/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7940/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
7941/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7942/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7943///
7944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
7945#[inline]
7946#[target_feature(enable = "avx512fp16")]
7947#[cfg_attr(test, assert_instr(vrsqrtsh))]
7948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7949pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7950 unsafe { vrsqrtsh(a, b, src, k) }
7951}
7952
7953/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7954/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
7955/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7956/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7957///
7958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
7959#[inline]
7960#[target_feature(enable = "avx512fp16")]
7961#[cfg_attr(test, assert_instr(vrsqrtsh))]
7962#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7963pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7964 _mm_mask_rsqrt_sh(src:_mm_setzero_ph(), k, a, b)
7965}
7966
7967/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7968/// results in dst.
7969///
7970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
7971#[inline]
7972#[target_feature(enable = "avx512fp16,avx512vl")]
7973#[cfg_attr(test, assert_instr(vsqrtph))]
7974#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7975pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
7976 unsafe { simd_fsqrt(a) }
7977}
7978
7979/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7980/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7981///
7982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
7983#[inline]
7984#[target_feature(enable = "avx512fp16,avx512vl")]
7985#[cfg_attr(test, assert_instr(vsqrtph))]
7986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7987pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7988 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
7989}
7990
7991/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7992/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7993///
7994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
7995#[inline]
7996#[target_feature(enable = "avx512fp16,avx512vl")]
7997#[cfg_attr(test, assert_instr(vsqrtph))]
7998#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7999pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8000 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8001}
8002
8003/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8004/// results in dst.
8005///
8006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8007#[inline]
8008#[target_feature(enable = "avx512fp16,avx512vl")]
8009#[cfg_attr(test, assert_instr(vsqrtph))]
8010#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8011pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8012 unsafe { simd_fsqrt(a) }
8013}
8014
8015/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8016/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8017///
8018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8019#[inline]
8020#[target_feature(enable = "avx512fp16,avx512vl")]
8021#[cfg_attr(test, assert_instr(vsqrtph))]
8022#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8023pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8024 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8025}
8026
8027/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8028/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8029///
8030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8031#[inline]
8032#[target_feature(enable = "avx512fp16,avx512vl")]
8033#[cfg_attr(test, assert_instr(vsqrtph))]
8034#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8035pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8036 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8037}
8038
8039/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8040/// results in dst.
8041///
8042/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8043#[inline]
8044#[target_feature(enable = "avx512fp16")]
8045#[cfg_attr(test, assert_instr(vsqrtph))]
8046#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8047pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8048 unsafe { simd_fsqrt(a) }
8049}
8050
8051/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8052/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8053///
8054/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8055#[inline]
8056#[target_feature(enable = "avx512fp16")]
8057#[cfg_attr(test, assert_instr(vsqrtph))]
8058#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8059pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8060 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8061}
8062
8063/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8064/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8065///
8066/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8067#[inline]
8068#[target_feature(enable = "avx512fp16")]
8069#[cfg_attr(test, assert_instr(vsqrtph))]
8070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8071pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8072 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8073}
8074
8075/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8076/// results in dst.
8077/// Rounding is done according to the rounding parameter, which can be one of:
8078///
8079/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8080/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8081/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8082/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8083/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8084///
8085/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8086#[inline]
8087#[target_feature(enable = "avx512fp16")]
8088#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8089#[rustc_legacy_const_generics(1)]
8090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8091pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8092 unsafe {
8093 static_assert_rounding!(ROUNDING);
8094 vsqrtph_512(a, ROUNDING)
8095 }
8096}
8097
8098/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8099/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8100/// Rounding is done according to the rounding parameter, which can be one of:
8101///
8102/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8103/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8104/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8105/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8106/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8107///
8108/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8109#[inline]
8110#[target_feature(enable = "avx512fp16")]
8111#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8112#[rustc_legacy_const_generics(3)]
8113#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8114pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8115 src: __m512h,
8116 k: __mmask32,
8117 a: __m512h,
8118) -> __m512h {
8119 unsafe {
8120 static_assert_rounding!(ROUNDING);
8121 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8122 }
8123}
8124
8125/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8126/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8127/// Rounding is done according to the rounding parameter, which can be one of:
8128///
8129/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8130/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8131/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8132/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8134///
8135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8136#[inline]
8137#[target_feature(enable = "avx512fp16")]
8138#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8139#[rustc_legacy_const_generics(2)]
8140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8141pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8142 unsafe {
8143 static_assert_rounding!(ROUNDING);
8144 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8145 }
8146}
8147
8148/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8149/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8150/// elements of dst.
8151///
8152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8153#[inline]
8154#[target_feature(enable = "avx512fp16")]
8155#[cfg_attr(test, assert_instr(vsqrtsh))]
8156#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8157pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8158 _mm_mask_sqrt_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8159}
8160
8161/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8162/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8163/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8164///
8165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8166#[inline]
8167#[target_feature(enable = "avx512fp16")]
8168#[cfg_attr(test, assert_instr(vsqrtsh))]
8169#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8170pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8171 _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8172}
8173
8174/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8175/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8176/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8177///
8178/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8179#[inline]
8180#[target_feature(enable = "avx512fp16")]
8181#[cfg_attr(test, assert_instr(vsqrtsh))]
8182#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8183pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8184 _mm_mask_sqrt_sh(src:_mm_setzero_ph(), k, a, b)
8185}
8186
8187/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8188/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8189/// elements of dst.
8190/// Rounding is done according to the rounding parameter, which can be one of:
8191///
8192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8197///
8198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8199#[inline]
8200#[target_feature(enable = "avx512fp16")]
8201#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8202#[rustc_legacy_const_generics(2)]
8203#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8204pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8205 static_assert_rounding!(ROUNDING);
8206 _mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
8207}
8208
8209/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8210/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8211/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8212/// Rounding is done according to the rounding parameter, which can be one of:
8213///
8214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8219///
8220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8221#[inline]
8222#[target_feature(enable = "avx512fp16")]
8223#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8224#[rustc_legacy_const_generics(4)]
8225#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8226pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8227 src: __m128h,
8228 k: __mmask8,
8229 a: __m128h,
8230 b: __m128h,
8231) -> __m128h {
8232 unsafe {
8233 static_assert_rounding!(ROUNDING);
8234 vsqrtsh(a, b, src, k, ROUNDING)
8235 }
8236}
8237
8238/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8239/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8240/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8241/// Rounding is done according to the rounding parameter, which can be one of:
8242///
8243/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8244/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8245/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8246/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8247/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8248///
8249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8250#[inline]
8251#[target_feature(enable = "avx512fp16")]
8252#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8253#[rustc_legacy_const_generics(3)]
8254#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8255pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8256 k: __mmask8,
8257 a: __m128h,
8258 b: __m128h,
8259) -> __m128h {
8260 static_assert_rounding!(ROUNDING);
8261 _mm_mask_sqrt_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
8262}
8263
8264/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8265/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8266/// value when inputs are NaN or signed-zero values.
8267///
8268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8269#[inline]
8270#[target_feature(enable = "avx512fp16,avx512vl")]
8271#[cfg_attr(test, assert_instr(vmaxph))]
8272#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8273pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8274 unsafe { vmaxph_128(a, b) }
8275}
8276
8277/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8278/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8279/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8280/// NaN or signed-zero values.
8281///
8282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8283#[inline]
8284#[target_feature(enable = "avx512fp16,avx512vl")]
8285#[cfg_attr(test, assert_instr(vmaxph))]
8286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8287pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8288 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8289}
8290
8291/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8292/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8293/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8294/// NaN or signed-zero values.
8295///
8296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8297#[inline]
8298#[target_feature(enable = "avx512fp16,avx512vl")]
8299#[cfg_attr(test, assert_instr(vmaxph))]
8300#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8301pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8302 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8303}
8304
8305/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8306/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8307/// value when inputs are NaN or signed-zero values.
8308///
8309/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8310#[inline]
8311#[target_feature(enable = "avx512fp16,avx512vl")]
8312#[cfg_attr(test, assert_instr(vmaxph))]
8313#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8314pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8315 unsafe { vmaxph_256(a, b) }
8316}
8317
8318/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8319/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8320/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8321/// NaN or signed-zero values.
8322///
8323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8324#[inline]
8325#[target_feature(enable = "avx512fp16,avx512vl")]
8326#[cfg_attr(test, assert_instr(vmaxph))]
8327#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8328pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8329 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8330}
8331
8332/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8333/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8334/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8335/// NaN or signed-zero values.
8336///
8337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8338#[inline]
8339#[target_feature(enable = "avx512fp16,avx512vl")]
8340#[cfg_attr(test, assert_instr(vmaxph))]
8341#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8342pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8343 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8344}
8345
8346/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8347/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8348/// value when inputs are NaN or signed-zero values.
8349///
8350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8351#[inline]
8352#[target_feature(enable = "avx512fp16")]
8353#[cfg_attr(test, assert_instr(vmaxph))]
8354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8355pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8356 _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8357}
8358
8359/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8360/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8361/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8362/// NaN or signed-zero values.
8363///
8364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8365#[inline]
8366#[target_feature(enable = "avx512fp16")]
8367#[cfg_attr(test, assert_instr(vmaxph))]
8368#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8369pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8370 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8371}
8372
8373/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8374/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8375/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8376/// NaN or signed-zero values.
8377///
8378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8379#[inline]
8380#[target_feature(enable = "avx512fp16")]
8381#[cfg_attr(test, assert_instr(vmaxph))]
8382#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8383pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8384 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8385}
8386
8387/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8388/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8389/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8390/// NaN or signed-zero values.
8391///
8392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8393#[inline]
8394#[target_feature(enable = "avx512fp16")]
8395#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8396#[rustc_legacy_const_generics(2)]
8397#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8398pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8399 unsafe {
8400 static_assert_sae!(SAE);
8401 vmaxph_512(a, b, SAE)
8402 }
8403}
8404
8405/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8406/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8407/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8408/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8409///
8410/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8411#[inline]
8412#[target_feature(enable = "avx512fp16")]
8413#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8414#[rustc_legacy_const_generics(4)]
8415#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8416pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8417 src: __m512h,
8418 k: __mmask32,
8419 a: __m512h,
8420 b: __m512h,
8421) -> __m512h {
8422 unsafe {
8423 static_assert_sae!(SAE);
8424 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8425 }
8426}
8427
8428/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8429/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8430/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8431/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8432///
8433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8434#[inline]
8435#[target_feature(enable = "avx512fp16")]
8436#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8437#[rustc_legacy_const_generics(3)]
8438#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8439pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8440 unsafe {
8441 static_assert_sae!(SAE);
8442 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8443 }
8444}
8445
8446/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8447/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8448/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8449/// when inputs are NaN or signed-zero values.
8450///
8451/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8452#[inline]
8453#[target_feature(enable = "avx512fp16,avx512vl")]
8454#[cfg_attr(test, assert_instr(vmaxsh))]
8455#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8456pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8457 _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8458}
8459
8460/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8461/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8462/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8463/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8464///
8465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8466#[inline]
8467#[target_feature(enable = "avx512fp16,avx512vl")]
8468#[cfg_attr(test, assert_instr(vmaxsh))]
8469#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8470pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8471 _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8472}
8473
8474/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8475/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8476/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8477/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8478///
8479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8480#[inline]
8481#[target_feature(enable = "avx512fp16,avx512vl")]
8482#[cfg_attr(test, assert_instr(vmaxsh))]
8483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8484pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8485 _mm_mask_max_sh(src:_mm_setzero_ph(), k, a, b)
8486}
8487
8488/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8489/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8490/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8491/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8492///
8493/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8494#[inline]
8495#[target_feature(enable = "avx512fp16,avx512vl")]
8496#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8497#[rustc_legacy_const_generics(2)]
8498#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8499pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8500 static_assert_sae!(SAE);
8501 _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8502}
8503
8504/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8505/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8506/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8507/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8508/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8509///
8510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8511#[inline]
8512#[target_feature(enable = "avx512fp16,avx512vl")]
8513#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8514#[rustc_legacy_const_generics(4)]
8515#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8516pub fn _mm_mask_max_round_sh<const SAE: i32>(
8517 src: __m128h,
8518 k: __mmask8,
8519 a: __m128h,
8520 b: __m128h,
8521) -> __m128h {
8522 unsafe {
8523 static_assert_sae!(SAE);
8524 vmaxsh(a, b, src, k, SAE)
8525 }
8526}
8527
8528/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8529/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8530/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8531/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8532/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8533///
8534/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8535#[inline]
8536#[target_feature(enable = "avx512fp16,avx512vl")]
8537#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8538#[rustc_legacy_const_generics(3)]
8539#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8540pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8541 static_assert_sae!(SAE);
8542 _mm_mask_max_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
8543}
8544
8545/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8546/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8547/// when inputs are NaN or signed-zero values.
8548///
8549/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8550#[inline]
8551#[target_feature(enable = "avx512fp16,avx512vl")]
8552#[cfg_attr(test, assert_instr(vminph))]
8553#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8554pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8555 unsafe { vminph_128(a, b) }
8556}
8557
8558/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8559/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8560/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8561/// NaN or signed-zero values.
8562///
8563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8564#[inline]
8565#[target_feature(enable = "avx512fp16,avx512vl")]
8566#[cfg_attr(test, assert_instr(vminph))]
8567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8568pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8569 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8570}
8571
8572/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8573/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8574/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8575/// NaN or signed-zero values.
8576///
8577/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8578#[inline]
8579#[target_feature(enable = "avx512fp16,avx512vl")]
8580#[cfg_attr(test, assert_instr(vminph))]
8581#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8582pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8583 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8584}
8585
8586/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8587/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8588/// when inputs are NaN or signed-zero values.
8589///
8590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8591#[inline]
8592#[target_feature(enable = "avx512fp16,avx512vl")]
8593#[cfg_attr(test, assert_instr(vminph))]
8594#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8595pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8596 unsafe { vminph_256(a, b) }
8597}
8598
8599/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8600/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8601/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8602/// NaN or signed-zero values.
8603///
8604/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8605#[inline]
8606#[target_feature(enable = "avx512fp16,avx512vl")]
8607#[cfg_attr(test, assert_instr(vminph))]
8608#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8609pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8610 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8611}
8612
8613/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8614/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8615/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8616/// NaN or signed-zero values.
8617///
8618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8619#[inline]
8620#[target_feature(enable = "avx512fp16,avx512vl")]
8621#[cfg_attr(test, assert_instr(vminph))]
8622#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8623pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8624 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8625}
8626
8627/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8628/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8629/// when inputs are NaN or signed-zero values.
8630///
8631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8632#[inline]
8633#[target_feature(enable = "avx512fp16")]
8634#[cfg_attr(test, assert_instr(vminph))]
8635#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8636pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8637 _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8638}
8639
8640/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8641/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8642/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8643/// NaN or signed-zero values.
8644///
8645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8646#[inline]
8647#[target_feature(enable = "avx512fp16")]
8648#[cfg_attr(test, assert_instr(vminph))]
8649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8650pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8651 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8652}
8653
8654/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8655/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8656/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8657/// NaN or signed-zero values.
8658///
8659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8660#[inline]
8661#[target_feature(enable = "avx512fp16")]
8662#[cfg_attr(test, assert_instr(vminph))]
8663#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8664pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8665 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8666}
8667
8668/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8669/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8670/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8671///
8672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8673#[inline]
8674#[target_feature(enable = "avx512fp16")]
8675#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8676#[rustc_legacy_const_generics(2)]
8677#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8678pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8679 unsafe {
8680 static_assert_sae!(SAE);
8681 vminph_512(a, b, SAE)
8682 }
8683}
8684
8685/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8686/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8687/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8688/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8689///
8690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
8691#[inline]
8692#[target_feature(enable = "avx512fp16")]
8693#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8694#[rustc_legacy_const_generics(4)]
8695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8696pub fn _mm512_mask_min_round_ph<const SAE: i32>(
8697 src: __m512h,
8698 k: __mmask32,
8699 a: __m512h,
8700 b: __m512h,
8701) -> __m512h {
8702 unsafe {
8703 static_assert_sae!(SAE);
8704 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
8705 }
8706}
8707
8708/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8709/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8711/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8712///
8713/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
8714#[inline]
8715#[target_feature(enable = "avx512fp16")]
8716#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8717#[rustc_legacy_const_generics(3)]
8718#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8719pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8720 unsafe {
8721 static_assert_sae!(SAE);
8722 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8723 }
8724}
8725
8726/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8727/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8728/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
8729/// inputs are NaN or signed-zero values.
8730///
8731/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
8732#[inline]
8733#[target_feature(enable = "avx512fp16,avx512vl")]
8734#[cfg_attr(test, assert_instr(vminsh))]
8735#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8736pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
8737 _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8738}
8739
8740/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8741/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8742/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8743/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8744///
8745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
8746#[inline]
8747#[target_feature(enable = "avx512fp16,avx512vl")]
8748#[cfg_attr(test, assert_instr(vminsh))]
8749#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8750pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8751 _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8752}
8753
8754/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8755/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8756/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8757/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8758///
8759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
8760#[inline]
8761#[target_feature(enable = "avx512fp16,avx512vl")]
8762#[cfg_attr(test, assert_instr(vminsh))]
8763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8764pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8765 _mm_mask_min_sh(src:_mm_setzero_ph(), k, a, b)
8766}
8767
8768/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8769/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8771/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8772///
8773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
8774#[inline]
8775#[target_feature(enable = "avx512fp16,avx512vl")]
8776#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8777#[rustc_legacy_const_generics(2)]
8778#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8779pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8780 static_assert_sae!(SAE);
8781 _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8782}
8783
8784/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8785/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8786/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8787/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8788/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8789///
8790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
8791#[inline]
8792#[target_feature(enable = "avx512fp16,avx512vl")]
8793#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8794#[rustc_legacy_const_generics(4)]
8795#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8796pub fn _mm_mask_min_round_sh<const SAE: i32>(
8797 src: __m128h,
8798 k: __mmask8,
8799 a: __m128h,
8800 b: __m128h,
8801) -> __m128h {
8802 unsafe {
8803 static_assert_sae!(SAE);
8804 vminsh(a, b, src, k, SAE)
8805 }
8806}
8807
8808/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8809/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8810/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8811/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8812/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8813///
8814/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
8815#[inline]
8816#[target_feature(enable = "avx512fp16,avx512vl")]
8817#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8818#[rustc_legacy_const_generics(3)]
8819#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8820pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8821 static_assert_sae!(SAE);
8822 _mm_mask_min_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
8823}
8824
8825/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8826/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8827/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8828///
8829/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
8830#[inline]
8831#[target_feature(enable = "avx512fp16,avx512vl")]
8832#[cfg_attr(test, assert_instr(vgetexpph))]
8833#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8834pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
8835 _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a)
8836}
8837
8838/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8839/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8840/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8841/// `floor(log2(x))` for each element.
8842///
8843/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
8844#[inline]
8845#[target_feature(enable = "avx512fp16,avx512vl")]
8846#[cfg_attr(test, assert_instr(vgetexpph))]
8847#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8848pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8849 unsafe { vgetexpph_128(a, src, k) }
8850}
8851
8852/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8853/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8854/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8855/// `floor(log2(x))` for each element.
8856///
8857/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
8858#[inline]
8859#[target_feature(enable = "avx512fp16,avx512vl")]
8860#[cfg_attr(test, assert_instr(vgetexpph))]
8861#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8862pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
8863 _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
8864}
8865
8866/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8867/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8868/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8869///
8870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
8871#[inline]
8872#[target_feature(enable = "avx512fp16,avx512vl")]
8873#[cfg_attr(test, assert_instr(vgetexpph))]
8874#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8875pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
8876 _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
8877}
8878
8879/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8880/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8881/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8882/// `floor(log2(x))` for each element.
8883///
8884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
8885#[inline]
8886#[target_feature(enable = "avx512fp16,avx512vl")]
8887#[cfg_attr(test, assert_instr(vgetexpph))]
8888#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8889pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8890 unsafe { vgetexpph_256(a, src, k) }
8891}
8892
8893/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8894/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8895/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8896/// `floor(log2(x))` for each element.
8897///
8898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
8899#[inline]
8900#[target_feature(enable = "avx512fp16,avx512vl")]
8901#[cfg_attr(test, assert_instr(vgetexpph))]
8902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8903pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
8904 _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
8905}
8906
8907/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8908/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8909/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8910///
8911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
8912#[inline]
8913#[target_feature(enable = "avx512fp16")]
8914#[cfg_attr(test, assert_instr(vgetexpph))]
8915#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8916pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
8917 _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
8918}
8919
8920/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8921/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8922/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8923/// `floor(log2(x))` for each element.
8924///
8925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
8926#[inline]
8927#[target_feature(enable = "avx512fp16")]
8928#[cfg_attr(test, assert_instr(vgetexpph))]
8929#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8930pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8931 _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
8932}
8933
8934/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8935/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8936/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8937/// `floor(log2(x))` for each element.
8938///
8939/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
8940#[inline]
8941#[target_feature(enable = "avx512fp16")]
8942#[cfg_attr(test, assert_instr(vgetexpph))]
8943#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8944pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
8945 _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
8946}
8947
8948/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8949/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8950/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
8951/// by passing _MM_FROUND_NO_EXC in the sae parameter
8952///
8953/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
8954#[inline]
8955#[target_feature(enable = "avx512fp16")]
8956#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
8957#[rustc_legacy_const_generics(1)]
8958#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8959pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
8960 static_assert_sae!(SAE);
8961 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
8962}
8963
8964/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8965/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8966/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8967/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8968///
8969/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
8970#[inline]
8971#[target_feature(enable = "avx512fp16")]
8972#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
8973#[rustc_legacy_const_generics(3)]
8974#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8975pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
8976 src: __m512h,
8977 k: __mmask32,
8978 a: __m512h,
8979) -> __m512h {
8980 unsafe {
8981 static_assert_sae!(SAE);
8982 vgetexpph_512(a, src, k, SAE)
8983 }
8984}
8985
8986/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8987/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8988/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8989/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8990///
8991/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
8992#[inline]
8993#[target_feature(enable = "avx512fp16")]
8994#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
8995#[rustc_legacy_const_generics(2)]
8996#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8997pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
8998 static_assert_sae!(SAE);
8999 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9000}
9001
9002/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9003/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9004/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9005/// calculates `floor(log2(x))` for the lower element.
9006///
9007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9008#[inline]
9009#[target_feature(enable = "avx512fp16")]
9010#[cfg_attr(test, assert_instr(vgetexpsh))]
9011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9012pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9013 _mm_mask_getexp_sh(src:_mm_undefined_ph(), k:0xff, a, b)
9014}
9015
9016/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9017/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9018/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9019/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9020/// for the lower element.
9021///
9022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9023#[inline]
9024#[target_feature(enable = "avx512fp16")]
9025#[cfg_attr(test, assert_instr(vgetexpsh))]
9026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9027pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9028 _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9029}
9030
9031/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9032/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9033/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9034/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9035/// lower element.
9036///
9037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9038#[inline]
9039#[target_feature(enable = "avx512fp16")]
9040#[cfg_attr(test, assert_instr(vgetexpsh))]
9041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9042pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9043 _mm_mask_getexp_sh(src:_mm_setzero_ph(), k, a, b)
9044}
9045
9046/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9047/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9048/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9049/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9050/// in the sae parameter
9051///
9052/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9053#[inline]
9054#[target_feature(enable = "avx512fp16")]
9055#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9056#[rustc_legacy_const_generics(2)]
9057#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9058pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9059 static_assert_sae!(SAE);
9060 _mm_mask_getexp_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
9061}
9062
9063/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9064/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9065/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9066/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9067/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9068///
9069/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9070#[inline]
9071#[target_feature(enable = "avx512fp16")]
9072#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9073#[rustc_legacy_const_generics(4)]
9074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9075pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9076 src: __m128h,
9077 k: __mmask8,
9078 a: __m128h,
9079 b: __m128h,
9080) -> __m128h {
9081 unsafe {
9082 static_assert_sae!(SAE);
9083 vgetexpsh(a, b, src, k, SAE)
9084 }
9085}
9086
9087/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9088/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9089/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9090/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9091/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9092///
9093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9094#[inline]
9095#[target_feature(enable = "avx512fp16")]
9096#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9097#[rustc_legacy_const_generics(3)]
9098#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9099pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9100 static_assert_sae!(SAE);
9101 _mm_mask_getexp_round_sh::<SAE>(src:_mm_setzero_ph(), k, a, b)
9102}
9103
9104/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9105/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9106/// on the interval range defined by norm and the sign depends on sign and the source sign.
9107///
9108/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9109///
9110/// _MM_MANT_NORM_1_2 // interval [1, 2)
9111/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9112/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9113/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9114///
9115/// The sign is determined by sc which can take the following values:
9116///
9117/// _MM_MANT_SIGN_src // sign = sign(src)
9118/// _MM_MANT_SIGN_zero // sign = 0
9119/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9120///
9121/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9122#[inline]
9123#[target_feature(enable = "avx512fp16,avx512vl")]
9124#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9125#[rustc_legacy_const_generics(1, 2)]
9126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9127pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9128 a: __m128h,
9129) -> __m128h {
9130 static_assert_uimm_bits!(NORM, 4);
9131 static_assert_uimm_bits!(SIGN, 2);
9132 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a)
9133}
9134
9135/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9136/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9137/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9138/// by norm and the sign depends on sign and the source sign.
9139///
9140/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9141///
9142/// _MM_MANT_NORM_1_2 // interval [1, 2)
9143/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9144/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9145/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9146///
9147/// The sign is determined by sc which can take the following values:
9148///
9149/// _MM_MANT_SIGN_src // sign = sign(src)
9150/// _MM_MANT_SIGN_zero // sign = 0
9151/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9152///
9153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9154#[inline]
9155#[target_feature(enable = "avx512fp16,avx512vl")]
9156#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9157#[rustc_legacy_const_generics(3, 4)]
9158#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9159pub fn _mm_mask_getmant_ph<
9160 const NORM: _MM_MANTISSA_NORM_ENUM,
9161 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9162>(
9163 src: __m128h,
9164 k: __mmask8,
9165 a: __m128h,
9166) -> __m128h {
9167 unsafe {
9168 static_assert_uimm_bits!(NORM, 4);
9169 static_assert_uimm_bits!(SIGN, 2);
9170 vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
9171 }
9172}
9173
9174/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9175/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9176/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9177/// by norm and the sign depends on sign and the source sign.
9178///
9179/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9180///
9181/// _MM_MANT_NORM_1_2 // interval [1, 2)
9182/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9183/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9184/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9185///
9186/// The sign is determined by sc which can take the following values:
9187///
9188/// _MM_MANT_SIGN_src // sign = sign(src)
9189/// _MM_MANT_SIGN_zero // sign = 0
9190/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9191///
9192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9193#[inline]
9194#[target_feature(enable = "avx512fp16,avx512vl")]
9195#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9196#[rustc_legacy_const_generics(2, 3)]
9197#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9198pub fn _mm_maskz_getmant_ph<
9199 const NORM: _MM_MANTISSA_NORM_ENUM,
9200 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9201>(
9202 k: __mmask8,
9203 a: __m128h,
9204) -> __m128h {
9205 static_assert_uimm_bits!(NORM, 4);
9206 static_assert_uimm_bits!(SIGN, 2);
9207 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9208}
9209
9210/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9211/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9212/// on the interval range defined by norm and the sign depends on sign and the source sign.
9213///
9214/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9215///
9216/// _MM_MANT_NORM_1_2 // interval [1, 2)
9217/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9218/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9219/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9220///
9221/// The sign is determined by sc which can take the following values:
9222///
9223/// _MM_MANT_SIGN_src // sign = sign(src)
9224/// _MM_MANT_SIGN_zero // sign = 0
9225/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9226///
9227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9228#[inline]
9229#[target_feature(enable = "avx512fp16,avx512vl")]
9230#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9231#[rustc_legacy_const_generics(1, 2)]
9232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9233pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9234 a: __m256h,
9235) -> __m256h {
9236 static_assert_uimm_bits!(NORM, 4);
9237 static_assert_uimm_bits!(SIGN, 2);
9238 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a)
9239}
9240
9241/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9242/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9243/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9244/// by norm and the sign depends on sign and the source sign.
9245///
9246/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9247///
9248/// _MM_MANT_NORM_1_2 // interval [1, 2)
9249/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9250/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9251/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9252///
9253/// The sign is determined by sc which can take the following values:
9254///
9255/// _MM_MANT_SIGN_src // sign = sign(src)
9256/// _MM_MANT_SIGN_zero // sign = 0
9257/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9258///
9259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9260#[inline]
9261#[target_feature(enable = "avx512fp16,avx512vl")]
9262#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9263#[rustc_legacy_const_generics(3, 4)]
9264#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9265pub fn _mm256_mask_getmant_ph<
9266 const NORM: _MM_MANTISSA_NORM_ENUM,
9267 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9268>(
9269 src: __m256h,
9270 k: __mmask16,
9271 a: __m256h,
9272) -> __m256h {
9273 unsafe {
9274 static_assert_uimm_bits!(NORM, 4);
9275 static_assert_uimm_bits!(SIGN, 2);
9276 vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
9277 }
9278}
9279
9280/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9281/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9282/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9283/// by norm and the sign depends on sign and the source sign.
9284///
9285/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9286///
9287/// _MM_MANT_NORM_1_2 // interval [1, 2)
9288/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9289/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9290/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9291///
9292/// The sign is determined by sc which can take the following values:
9293///
9294/// _MM_MANT_SIGN_src // sign = sign(src)
9295/// _MM_MANT_SIGN_zero // sign = 0
9296/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9297///
9298/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9299#[inline]
9300#[target_feature(enable = "avx512fp16,avx512vl")]
9301#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9302#[rustc_legacy_const_generics(2, 3)]
9303#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9304pub fn _mm256_maskz_getmant_ph<
9305 const NORM: _MM_MANTISSA_NORM_ENUM,
9306 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9307>(
9308 k: __mmask16,
9309 a: __m256h,
9310) -> __m256h {
9311 static_assert_uimm_bits!(NORM, 4);
9312 static_assert_uimm_bits!(SIGN, 2);
9313 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9314}
9315
9316/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9317/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9318/// on the interval range defined by norm and the sign depends on sign and the source sign.
9319///
9320/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9321///
9322/// _MM_MANT_NORM_1_2 // interval [1, 2)
9323/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9324/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9325/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9326///
9327/// The sign is determined by sc which can take the following values:
9328///
9329/// _MM_MANT_SIGN_src // sign = sign(src)
9330/// _MM_MANT_SIGN_zero // sign = 0
9331/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9332///
9333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9334#[inline]
9335#[target_feature(enable = "avx512fp16")]
9336#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9337#[rustc_legacy_const_generics(1, 2)]
9338#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9339pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9340 a: __m512h,
9341) -> __m512h {
9342 static_assert_uimm_bits!(NORM, 4);
9343 static_assert_uimm_bits!(SIGN, 2);
9344 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9345}
9346
9347/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9348/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9349/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9350/// by norm and the sign depends on sign and the source sign.
9351///
9352/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9353///
9354/// _MM_MANT_NORM_1_2 // interval [1, 2)
9355/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9356/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9357/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9358///
9359/// The sign is determined by sc which can take the following values:
9360///
9361/// _MM_MANT_SIGN_src // sign = sign(src)
9362/// _MM_MANT_SIGN_zero // sign = 0
9363/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9364///
9365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9366#[inline]
9367#[target_feature(enable = "avx512fp16")]
9368#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9369#[rustc_legacy_const_generics(3, 4)]
9370#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9371pub fn _mm512_mask_getmant_ph<
9372 const NORM: _MM_MANTISSA_NORM_ENUM,
9373 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9374>(
9375 src: __m512h,
9376 k: __mmask32,
9377 a: __m512h,
9378) -> __m512h {
9379 static_assert_uimm_bits!(NORM, 4);
9380 static_assert_uimm_bits!(SIGN, 2);
9381 _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9382}
9383
9384/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9385/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9386/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9387/// by norm and the sign depends on sign and the source sign.
9388///
9389/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9390///
9391/// _MM_MANT_NORM_1_2 // interval [1, 2)
9392/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9393/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9394/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9395///
9396/// The sign is determined by sc which can take the following values:
9397///
9398/// _MM_MANT_SIGN_src // sign = sign(src)
9399/// _MM_MANT_SIGN_zero // sign = 0
9400/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9401///
9402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9403#[inline]
9404#[target_feature(enable = "avx512fp16")]
9405#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9406#[rustc_legacy_const_generics(2, 3)]
9407#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9408pub fn _mm512_maskz_getmant_ph<
9409 const NORM: _MM_MANTISSA_NORM_ENUM,
9410 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9411>(
9412 k: __mmask32,
9413 a: __m512h,
9414) -> __m512h {
9415 static_assert_uimm_bits!(NORM, 4);
9416 static_assert_uimm_bits!(SIGN, 2);
9417 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9418}
9419
9420/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9421/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9422/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9423/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9424///
9425/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9426///
9427/// _MM_MANT_NORM_1_2 // interval [1, 2)
9428/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9429/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9430/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9431///
9432/// The sign is determined by sc which can take the following values:
9433///
9434/// _MM_MANT_SIGN_src // sign = sign(src)
9435/// _MM_MANT_SIGN_zero // sign = 0
9436/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9437///
9438/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9439///
9440/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9441#[inline]
9442#[target_feature(enable = "avx512fp16")]
9443#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9444#[rustc_legacy_const_generics(1, 2, 3)]
9445#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9446pub fn _mm512_getmant_round_ph<
9447 const NORM: _MM_MANTISSA_NORM_ENUM,
9448 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9449 const SAE: i32,
9450>(
9451 a: __m512h,
9452) -> __m512h {
9453 static_assert_uimm_bits!(NORM, 4);
9454 static_assert_uimm_bits!(SIGN, 2);
9455 static_assert_sae!(SAE);
9456 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9457}
9458
9459/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9460/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9461/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9462/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9463/// in the sae parameter
9464///
9465/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9466///
9467/// _MM_MANT_NORM_1_2 // interval [1, 2)
9468/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9469/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9470/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9471///
9472/// The sign is determined by sc which can take the following values:
9473///
9474/// _MM_MANT_SIGN_src // sign = sign(src)
9475/// _MM_MANT_SIGN_zero // sign = 0
9476/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9477///
9478/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9479///
9480/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9481#[inline]
9482#[target_feature(enable = "avx512fp16")]
9483#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9484#[rustc_legacy_const_generics(3, 4, 5)]
9485#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9486pub fn _mm512_mask_getmant_round_ph<
9487 const NORM: _MM_MANTISSA_NORM_ENUM,
9488 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9489 const SAE: i32,
9490>(
9491 src: __m512h,
9492 k: __mmask32,
9493 a: __m512h,
9494) -> __m512h {
9495 unsafe {
9496 static_assert_uimm_bits!(NORM, 4);
9497 static_assert_uimm_bits!(SIGN, 2);
9498 static_assert_sae!(SAE);
9499 vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
9500 }
9501}
9502
9503/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9504/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9505/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9506/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9507/// in the sae parameter
9508///
9509/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9510///
9511/// _MM_MANT_NORM_1_2 // interval [1, 2)
9512/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9513/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9514/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9515///
9516/// The sign is determined by sc which can take the following values:
9517///
9518/// _MM_MANT_SIGN_src // sign = sign(src)
9519/// _MM_MANT_SIGN_zero // sign = 0
9520/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9521///
9522/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9523///
9524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9525#[inline]
9526#[target_feature(enable = "avx512fp16")]
9527#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9528#[rustc_legacy_const_generics(2, 3, 4)]
9529#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9530pub fn _mm512_maskz_getmant_round_ph<
9531 const NORM: _MM_MANTISSA_NORM_ENUM,
9532 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9533 const SAE: i32,
9534>(
9535 k: __mmask32,
9536 a: __m512h,
9537) -> __m512h {
9538 static_assert_uimm_bits!(NORM, 4);
9539 static_assert_uimm_bits!(SIGN, 2);
9540 static_assert_sae!(SAE);
9541 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9542}
9543
9544/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9545/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9546/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9547/// on the interval range defined by norm and the sign depends on sign and the source sign.
9548///
9549/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9550///
9551/// _MM_MANT_NORM_1_2 // interval [1, 2)
9552/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9553/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9554/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9555///
9556/// The sign is determined by sc which can take the following values:
9557///
9558/// _MM_MANT_SIGN_src // sign = sign(src)
9559/// _MM_MANT_SIGN_zero // sign = 0
9560/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9561///
9562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9563#[inline]
9564#[target_feature(enable = "avx512fp16")]
9565#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9566#[rustc_legacy_const_generics(2, 3)]
9567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9568pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9569 a: __m128h,
9570 b: __m128h,
9571) -> __m128h {
9572 static_assert_uimm_bits!(NORM, 4);
9573 static_assert_uimm_bits!(SIGN, 2);
9574 _mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a, b)
9575}
9576
9577/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9578/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9579/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9580/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9581/// the source sign.
9582///
9583/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9584///
9585/// _MM_MANT_NORM_1_2 // interval [1, 2)
9586/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9587/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9588/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9589///
9590/// The sign is determined by sc which can take the following values:
9591///
9592/// _MM_MANT_SIGN_src // sign = sign(src)
9593/// _MM_MANT_SIGN_zero // sign = 0
9594/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9595///
9596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9597#[inline]
9598#[target_feature(enable = "avx512fp16")]
9599#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9600#[rustc_legacy_const_generics(4, 5)]
9601#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9602pub fn _mm_mask_getmant_sh<
9603 const NORM: _MM_MANTISSA_NORM_ENUM,
9604 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9605>(
9606 src: __m128h,
9607 k: __mmask8,
9608 a: __m128h,
9609 b: __m128h,
9610) -> __m128h {
9611 static_assert_uimm_bits!(NORM, 4);
9612 static_assert_uimm_bits!(SIGN, 2);
9613 _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9614}
9615
9616/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9617/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9618/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9619/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9620/// the source sign.
9621///
9622/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9623///
9624/// _MM_MANT_NORM_1_2 // interval [1, 2)
9625/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9626/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9627/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9628///
9629/// The sign is determined by sc which can take the following values:
9630///
9631/// _MM_MANT_SIGN_src // sign = sign(src)
9632/// _MM_MANT_SIGN_zero // sign = 0
9633/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9634///
9635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9636#[inline]
9637#[target_feature(enable = "avx512fp16")]
9638#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9639#[rustc_legacy_const_generics(3, 4)]
9640#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9641pub fn _mm_maskz_getmant_sh<
9642 const NORM: _MM_MANTISSA_NORM_ENUM,
9643 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9644>(
9645 k: __mmask8,
9646 a: __m128h,
9647 b: __m128h,
9648) -> __m128h {
9649 static_assert_uimm_bits!(NORM, 4);
9650 static_assert_uimm_bits!(SIGN, 2);
9651 _mm_mask_getmant_sh::<NORM, SIGN>(src:_mm_setzero_ph(), k, a, b)
9652}
9653
9654/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9655/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9656/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9657/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9658/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9659///
9660/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9661///
9662/// _MM_MANT_NORM_1_2 // interval [1, 2)
9663/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9664/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9665/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9666///
9667/// The sign is determined by sc which can take the following values:
9668///
9669/// _MM_MANT_SIGN_src // sign = sign(src)
9670/// _MM_MANT_SIGN_zero // sign = 0
9671/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9672///
9673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9674///
9675/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9676#[inline]
9677#[target_feature(enable = "avx512fp16")]
9678#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9679#[rustc_legacy_const_generics(2, 3, 4)]
9680#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9681pub fn _mm_getmant_round_sh<
9682 const NORM: _MM_MANTISSA_NORM_ENUM,
9683 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9684 const SAE: i32,
9685>(
9686 a: __m128h,
9687 b: __m128h,
9688) -> __m128h {
9689 static_assert_uimm_bits!(NORM, 4);
9690 static_assert_uimm_bits!(SIGN, 2);
9691 static_assert_sae!(SAE);
9692 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
9693}
9694
9695/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9696/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9697/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9698/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9699/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9700///
9701/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9702///
9703/// _MM_MANT_NORM_1_2 // interval [1, 2)
9704/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9705/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9706/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9707///
9708/// The sign is determined by sc which can take the following values:
9709///
9710/// _MM_MANT_SIGN_src // sign = sign(src)
9711/// _MM_MANT_SIGN_zero // sign = 0
9712/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9713///
9714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9715///
9716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
9717#[inline]
9718#[target_feature(enable = "avx512fp16")]
9719#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9720#[rustc_legacy_const_generics(4, 5, 6)]
9721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9722pub fn _mm_mask_getmant_round_sh<
9723 const NORM: _MM_MANTISSA_NORM_ENUM,
9724 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9725 const SAE: i32,
9726>(
9727 src: __m128h,
9728 k: __mmask8,
9729 a: __m128h,
9730 b: __m128h,
9731) -> __m128h {
9732 unsafe {
9733 static_assert_uimm_bits!(NORM, 4);
9734 static_assert_uimm_bits!(SIGN, 2);
9735 static_assert_sae!(SAE);
9736 vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
9737 }
9738}
9739
9740/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9741/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9742/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9743/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9744/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9745///
9746/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9747///
9748/// _MM_MANT_NORM_1_2 // interval [1, 2)
9749/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9750/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9751/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9752///
9753/// The sign is determined by sc which can take the following values:
9754///
9755/// _MM_MANT_SIGN_src // sign = sign(src)
9756/// _MM_MANT_SIGN_zero // sign = 0
9757/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9758///
9759/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9760///
9761/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
9762#[inline]
9763#[target_feature(enable = "avx512fp16")]
9764#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9765#[rustc_legacy_const_generics(3, 4, 5)]
9766#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9767pub fn _mm_maskz_getmant_round_sh<
9768 const NORM: _MM_MANTISSA_NORM_ENUM,
9769 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9770 const SAE: i32,
9771>(
9772 k: __mmask8,
9773 a: __m128h,
9774 b: __m128h,
9775) -> __m128h {
9776 static_assert_uimm_bits!(NORM, 4);
9777 static_assert_uimm_bits!(SIGN, 2);
9778 static_assert_sae!(SAE);
9779 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:_mm_setzero_ph(), k, a, b)
9780}
9781
9782/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9783/// specified by imm8, and store the results in dst.
9784///
9785/// Rounding is done according to the imm8 parameter, which can be one of:
9786///
9787/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9788/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9789/// * [`_MM_FROUND_TO_POS_INF`] : round up
9790/// * [`_MM_FROUND_TO_ZERO`] : truncate
9791/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9792///
9793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
9794#[inline]
9795#[target_feature(enable = "avx512fp16,avx512vl")]
9796#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9797#[rustc_legacy_const_generics(1)]
9798#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9799pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
9800 static_assert_uimm_bits!(IMM8, 8);
9801 _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
9802}
9803
9804/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9805/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9806/// the corresponding mask bit is not set).
9807///
9808/// Rounding is done according to the imm8 parameter, which can be one of:
9809///
9810/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9811/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9812/// * [`_MM_FROUND_TO_POS_INF`] : round up
9813/// * [`_MM_FROUND_TO_ZERO`] : truncate
9814/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9815///
9816/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
9817#[inline]
9818#[target_feature(enable = "avx512fp16,avx512vl")]
9819#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9820#[rustc_legacy_const_generics(3)]
9821#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9822pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9823 unsafe {
9824 static_assert_uimm_bits!(IMM8, 8);
9825 vrndscaleph_128(a, IMM8, src, k)
9826 }
9827}
9828
9829/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9830/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9831/// mask bit is not set).
9832///
9833/// Rounding is done according to the imm8 parameter, which can be one of:
9834///
9835/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9836/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9837/// * [`_MM_FROUND_TO_POS_INF`] : round up
9838/// * [`_MM_FROUND_TO_ZERO`] : truncate
9839/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9840///
9841/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
9842#[inline]
9843#[target_feature(enable = "avx512fp16,avx512vl")]
9844#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9845#[rustc_legacy_const_generics(2)]
9846#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9847pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
9848 static_assert_uimm_bits!(IMM8, 8);
9849 _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
9850}
9851
9852/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9853/// specified by imm8, and store the results in dst.
9854///
9855/// Rounding is done according to the imm8 parameter, which can be one of:
9856///
9857/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9858/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9859/// * [`_MM_FROUND_TO_POS_INF`] : round up
9860/// * [`_MM_FROUND_TO_ZERO`] : truncate
9861/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9862///
9863/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
9864#[inline]
9865#[target_feature(enable = "avx512fp16,avx512vl")]
9866#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9867#[rustc_legacy_const_generics(1)]
9868#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9869pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
9870 static_assert_uimm_bits!(IMM8, 8);
9871 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
9872}
9873
9874/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9875/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9876/// the corresponding mask bit is not set).
9877///
9878/// Rounding is done according to the imm8 parameter, which can be one of:
9879///
9880/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9881/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9882/// * [`_MM_FROUND_TO_POS_INF`] : round up
9883/// * [`_MM_FROUND_TO_ZERO`] : truncate
9884/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9885///
9886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
9887#[inline]
9888#[target_feature(enable = "avx512fp16,avx512vl")]
9889#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9890#[rustc_legacy_const_generics(3)]
9891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9892pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
9893 src: __m256h,
9894 k: __mmask16,
9895 a: __m256h,
9896) -> __m256h {
9897 unsafe {
9898 static_assert_uimm_bits!(IMM8, 8);
9899 vrndscaleph_256(a, IMM8, src, k)
9900 }
9901}
9902
9903/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9904/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9905/// mask bit is not set).
9906///
9907/// Rounding is done according to the imm8 parameter, which can be one of:
9908///
9909/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9910/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9911/// * [`_MM_FROUND_TO_POS_INF`] : round up
9912/// * [`_MM_FROUND_TO_ZERO`] : truncate
9913/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9914///
9915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
9916#[inline]
9917#[target_feature(enable = "avx512fp16,avx512vl")]
9918#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9919#[rustc_legacy_const_generics(2)]
9920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9921pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
9922 static_assert_uimm_bits!(IMM8, 8);
9923 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
9924}
9925
9926/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9927/// specified by imm8, and store the results in dst.
9928///
9929/// Rounding is done according to the imm8 parameter, which can be one of:
9930///
9931/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9932/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9933/// * [`_MM_FROUND_TO_POS_INF`] : round up
9934/// * [`_MM_FROUND_TO_ZERO`] : truncate
9935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9936///
9937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
9938#[inline]
9939#[target_feature(enable = "avx512fp16")]
9940#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9941#[rustc_legacy_const_generics(1)]
9942#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9943pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
9944 static_assert_uimm_bits!(IMM8, 8);
9945 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9946}
9947
9948/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9949/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9950/// the corresponding mask bit is not set).
9951///
9952/// Rounding is done according to the imm8 parameter, which can be one of:
9953///
9954/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9955/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9956/// * [`_MM_FROUND_TO_POS_INF`] : round up
9957/// * [`_MM_FROUND_TO_ZERO`] : truncate
9958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9959///
9960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
9961#[inline]
9962#[target_feature(enable = "avx512fp16")]
9963#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9964#[rustc_legacy_const_generics(3)]
9965#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9966pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
9967 src: __m512h,
9968 k: __mmask32,
9969 a: __m512h,
9970) -> __m512h {
9971 static_assert_uimm_bits!(IMM8, 8);
9972 _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9973}
9974
9975/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9976/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9977/// mask bit is not set).
9978///
9979/// Rounding is done according to the imm8 parameter, which can be one of:
9980///
9981/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9982/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9983/// * [`_MM_FROUND_TO_POS_INF`] : round up
9984/// * [`_MM_FROUND_TO_ZERO`] : truncate
9985/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9986///
9987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
9988#[inline]
9989#[target_feature(enable = "avx512fp16")]
9990#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9991#[rustc_legacy_const_generics(2)]
9992#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9993pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
9994 static_assert_uimm_bits!(IMM8, 8);
9995 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
9996}
9997
9998/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9999/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10000/// in the sae parameter
10001///
10002/// Rounding is done according to the imm8 parameter, which can be one of:
10003///
10004/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10005/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10006/// * [`_MM_FROUND_TO_POS_INF`] : round up
10007/// * [`_MM_FROUND_TO_ZERO`] : truncate
10008/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10009///
10010/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10011#[inline]
10012#[target_feature(enable = "avx512fp16")]
10013#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10014#[rustc_legacy_const_generics(1, 2)]
10015#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10016pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10017 static_assert_uimm_bits!(IMM8, 8);
10018 static_assert_sae!(SAE);
10019 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10020}
10021
10022/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10023/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10024/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10025/// in the sae parameter
10026///
10027/// Rounding is done according to the imm8 parameter, which can be one of:
10028///
10029/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10030/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10031/// * [`_MM_FROUND_TO_POS_INF`] : round up
10032/// * [`_MM_FROUND_TO_ZERO`] : truncate
10033/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10034///
10035/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10036#[inline]
10037#[target_feature(enable = "avx512fp16")]
10038#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10039#[rustc_legacy_const_generics(3, 4)]
10040#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10041pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10042 src: __m512h,
10043 k: __mmask32,
10044 a: __m512h,
10045) -> __m512h {
10046 unsafe {
10047 static_assert_uimm_bits!(IMM8, 8);
10048 static_assert_sae!(SAE);
10049 vrndscaleph_512(a, IMM8, src, k, SAE)
10050 }
10051}
10052
10053/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10054/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10055/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10056///
10057/// Rounding is done according to the imm8 parameter, which can be one of:
10058///
10059/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10060/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10061/// * [`_MM_FROUND_TO_POS_INF`] : round up
10062/// * [`_MM_FROUND_TO_ZERO`] : truncate
10063/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10064///
10065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10066#[inline]
10067#[target_feature(enable = "avx512fp16")]
10068#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10069#[rustc_legacy_const_generics(2, 3)]
10070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10071pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10072 k: __mmask32,
10073 a: __m512h,
10074) -> __m512h {
10075 static_assert_uimm_bits!(IMM8, 8);
10076 static_assert_sae!(SAE);
10077 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10078}
10079
10080/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10081/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10082/// from a to the upper elements of dst.
10083///
10084/// Rounding is done according to the imm8 parameter, which can be one of:
10085///
10086/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10087/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10088/// * [`_MM_FROUND_TO_POS_INF`] : round up
10089/// * [`_MM_FROUND_TO_ZERO`] : truncate
10090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10091///
10092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10093#[inline]
10094#[target_feature(enable = "avx512fp16")]
10095#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10096#[rustc_legacy_const_generics(2)]
10097#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10098pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10099 static_assert_uimm_bits!(IMM8, 8);
10100 _mm_mask_roundscale_sh::<IMM8>(src:_mm_undefined_ph(), k:0xff, a, b)
10101}
10102
10103/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10104/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10105/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10106///
10107/// Rounding is done according to the imm8 parameter, which can be one of:
10108///
10109/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10110/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10111/// * [`_MM_FROUND_TO_POS_INF`] : round up
10112/// * [`_MM_FROUND_TO_ZERO`] : truncate
10113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10114///
10115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10116#[inline]
10117#[target_feature(enable = "avx512fp16")]
10118#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10119#[rustc_legacy_const_generics(4)]
10120#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10121pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10122 src: __m128h,
10123 k: __mmask8,
10124 a: __m128h,
10125 b: __m128h,
10126) -> __m128h {
10127 static_assert_uimm_bits!(IMM8, 8);
10128 _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10129}
10130
10131/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10132/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10133/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10134///
10135/// Rounding is done according to the imm8 parameter, which can be one of:
10136///
10137/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10138/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10139/// * [`_MM_FROUND_TO_POS_INF`] : round up
10140/// * [`_MM_FROUND_TO_ZERO`] : truncate
10141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10142///
10143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10144#[inline]
10145#[target_feature(enable = "avx512fp16")]
10146#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10147#[rustc_legacy_const_generics(3)]
10148#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10149pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10150 static_assert_uimm_bits!(IMM8, 8);
10151 _mm_mask_roundscale_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b)
10152}
10153
10154/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10155/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10156/// from a to the upper elements of dst.
10157///
10158/// Rounding is done according to the imm8 parameter, which can be one of:
10159///
10160/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10161/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10162/// * [`_MM_FROUND_TO_POS_INF`] : round up
10163/// * [`_MM_FROUND_TO_ZERO`] : truncate
10164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10165///
10166/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10167///
10168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10169#[inline]
10170#[target_feature(enable = "avx512fp16")]
10171#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10172#[rustc_legacy_const_generics(2, 3)]
10173#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10174pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10175 static_assert_uimm_bits!(IMM8, 8);
10176 static_assert_sae!(SAE);
10177 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
10178}
10179
10180/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10181/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10182/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10183///
10184/// Rounding is done according to the imm8 parameter, which can be one of:
10185///
10186/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10187/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10188/// * [`_MM_FROUND_TO_POS_INF`] : round up
10189/// * [`_MM_FROUND_TO_ZERO`] : truncate
10190/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10191///
10192/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10193///
10194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10195#[inline]
10196#[target_feature(enable = "avx512fp16")]
10197#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10198#[rustc_legacy_const_generics(4, 5)]
10199#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10200pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10201 src: __m128h,
10202 k: __mmask8,
10203 a: __m128h,
10204 b: __m128h,
10205) -> __m128h {
10206 unsafe {
10207 static_assert_uimm_bits!(IMM8, 8);
10208 static_assert_sae!(SAE);
10209 vrndscalesh(a, b, src, k, IMM8, SAE)
10210 }
10211}
10212
10213/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10214/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10215/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10216///
10217/// Rounding is done according to the imm8 parameter, which can be one of:
10218///
10219/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10220/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10221/// * [`_MM_FROUND_TO_POS_INF`] : round up
10222/// * [`_MM_FROUND_TO_ZERO`] : truncate
10223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10224///
10225/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10226///
10227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10228#[inline]
10229#[target_feature(enable = "avx512fp16")]
10230#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10231#[rustc_legacy_const_generics(3, 4)]
10232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10233pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10234 k: __mmask8,
10235 a: __m128h,
10236 b: __m128h,
10237) -> __m128h {
10238 static_assert_uimm_bits!(IMM8, 8);
10239 static_assert_sae!(SAE);
10240 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b)
10241}
10242
10243/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10244/// the results in dst.
10245///
10246/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10247#[inline]
10248#[target_feature(enable = "avx512fp16,avx512vl")]
10249#[cfg_attr(test, assert_instr(vscalefph))]
10250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10251pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10252 _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b)
10253}
10254
10255/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10256/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10257///
10258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10259#[inline]
10260#[target_feature(enable = "avx512fp16,avx512vl")]
10261#[cfg_attr(test, assert_instr(vscalefph))]
10262#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10263pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10264 unsafe { vscalefph_128(a, b, src, k) }
10265}
10266
10267/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10268/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10269///
10270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10271#[inline]
10272#[target_feature(enable = "avx512fp16,avx512vl")]
10273#[cfg_attr(test, assert_instr(vscalefph))]
10274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10275pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10276 _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10277}
10278
10279/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10280/// the results in dst.
10281///
10282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10283#[inline]
10284#[target_feature(enable = "avx512fp16,avx512vl")]
10285#[cfg_attr(test, assert_instr(vscalefph))]
10286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10287pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10288 _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b)
10289}
10290
10291/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10292/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10293///
10294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10295#[inline]
10296#[target_feature(enable = "avx512fp16,avx512vl")]
10297#[cfg_attr(test, assert_instr(vscalefph))]
10298#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10299pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10300 unsafe { vscalefph_256(a, b, src, k) }
10301}
10302
10303/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10304/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10305///
10306/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10307#[inline]
10308#[target_feature(enable = "avx512fp16,avx512vl")]
10309#[cfg_attr(test, assert_instr(vscalefph))]
10310#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10311pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10312 _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10313}
10314
10315/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10316/// the results in dst.
10317///
10318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10319#[inline]
10320#[target_feature(enable = "avx512fp16")]
10321#[cfg_attr(test, assert_instr(vscalefph))]
10322#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10323pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10324 _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10325}
10326
10327/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10328/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10329///
10330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10331#[inline]
10332#[target_feature(enable = "avx512fp16")]
10333#[cfg_attr(test, assert_instr(vscalefph))]
10334#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10335pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10336 _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10337}
10338
10339/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10340/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10341///
10342/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10343#[inline]
10344#[target_feature(enable = "avx512fp16")]
10345#[cfg_attr(test, assert_instr(vscalefph))]
10346#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10347pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10348 _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10349}
10350
10351/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10352/// the results in dst.
10353///
10354/// Rounding is done according to the rounding parameter, which can be one of:
10355///
10356/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10357/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10358/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10359/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10360/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10361///
10362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10363#[inline]
10364#[target_feature(enable = "avx512fp16")]
10365#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10366#[rustc_legacy_const_generics(2)]
10367#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10368pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10369 static_assert_rounding!(ROUNDING);
10370 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10371}
10372
10373/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10374/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10375///
10376/// Rounding is done according to the rounding parameter, which can be one of:
10377///
10378/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10379/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10380/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10381/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10382/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10383///
10384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10385#[inline]
10386#[target_feature(enable = "avx512fp16")]
10387#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10390pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10391 src: __m512h,
10392 k: __mmask32,
10393 a: __m512h,
10394 b: __m512h,
10395) -> __m512h {
10396 unsafe {
10397 static_assert_rounding!(ROUNDING);
10398 vscalefph_512(a, b, src, k, ROUNDING)
10399 }
10400}
10401
10402/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10403/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10404///
10405/// Rounding is done according to the rounding parameter, which can be one of:
10406///
10407/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10408/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10409/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10410/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10411/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10412///
10413/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10414#[inline]
10415#[target_feature(enable = "avx512fp16")]
10416#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10417#[rustc_legacy_const_generics(3)]
10418#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10419pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10420 k: __mmask32,
10421 a: __m512h,
10422 b: __m512h,
10423) -> __m512h {
10424 static_assert_rounding!(ROUNDING);
10425 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10426}
10427
10428/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10429/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10430/// elements of dst.
10431///
10432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10433#[inline]
10434#[target_feature(enable = "avx512fp16")]
10435#[cfg_attr(test, assert_instr(vscalefsh))]
10436#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10437pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10438 _mm_mask_scalef_sh(src:_mm_undefined_ph(), k:0xff, a, b)
10439}
10440
10441/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10442/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10443/// and copy the upper 7 packed elements from a to the upper elements of dst.
10444///
10445/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10446#[inline]
10447#[target_feature(enable = "avx512fp16")]
10448#[cfg_attr(test, assert_instr(vscalefsh))]
10449#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10450pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10451 _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10452}
10453
10454/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10455/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10456/// and copy the upper 7 packed elements from a to the upper elements of dst.
10457///
10458/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10459#[inline]
10460#[target_feature(enable = "avx512fp16")]
10461#[cfg_attr(test, assert_instr(vscalefsh))]
10462#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10463pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10464 _mm_mask_scalef_sh(src:_mm_setzero_ph(), k, a, b)
10465}
10466
10467/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10468/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10469/// elements of dst.
10470///
10471/// Rounding is done according to the rounding parameter, which can be one of:
10472///
10473/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10474/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10475/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10476/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10477/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10478///
10479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10480#[inline]
10481#[target_feature(enable = "avx512fp16")]
10482#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10483#[rustc_legacy_const_generics(2)]
10484#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10485pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10486 static_assert_rounding!(ROUNDING);
10487 _mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
10488}
10489
10490/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10491/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10492/// and copy the upper 7 packed elements from a to the upper elements of dst.
10493///
10494/// Rounding is done according to the rounding parameter, which can be one of:
10495///
10496/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10497/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10498/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10499/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10500/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10501///
10502/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10503#[inline]
10504#[target_feature(enable = "avx512fp16")]
10505#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10506#[rustc_legacy_const_generics(4)]
10507#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10508pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10509 src: __m128h,
10510 k: __mmask8,
10511 a: __m128h,
10512 b: __m128h,
10513) -> __m128h {
10514 unsafe {
10515 static_assert_rounding!(ROUNDING);
10516 vscalefsh(a, b, src, k, ROUNDING)
10517 }
10518}
10519
10520/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10521/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10522/// and copy the upper 7 packed elements from a to the upper elements of dst.
10523///
10524/// Rounding is done according to the rounding parameter, which can be one of:
10525///
10526/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10527/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10528/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10529/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10531///
10532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10533#[inline]
10534#[target_feature(enable = "avx512fp16")]
10535#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10536#[rustc_legacy_const_generics(3)]
10537#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10538pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10539 k: __mmask8,
10540 a: __m128h,
10541 b: __m128h,
10542) -> __m128h {
10543 static_assert_rounding!(ROUNDING);
10544 _mm_mask_scalef_round_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
10545}
10546
10547/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10548/// number of bits specified by imm8, and store the results in dst.
10549///
10550/// Rounding is done according to the imm8 parameter, which can be one of:
10551///
10552/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10553/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10554/// * [`_MM_FROUND_TO_POS_INF`] : round up
10555/// * [`_MM_FROUND_TO_ZERO`] : truncate
10556/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10557///
10558/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10559#[inline]
10560#[target_feature(enable = "avx512fp16,avx512vl")]
10561#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10562#[rustc_legacy_const_generics(1)]
10563#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10564pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10565 static_assert_uimm_bits!(IMM8, 8);
10566 _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
10567}
10568
10569/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10570/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10571/// from src when the corresponding mask bit is not set).
10572///
10573/// Rounding is done according to the imm8 parameter, which can be one of:
10574///
10575/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10576/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10577/// * [`_MM_FROUND_TO_POS_INF`] : round up
10578/// * [`_MM_FROUND_TO_ZERO`] : truncate
10579/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10580///
10581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10582#[inline]
10583#[target_feature(enable = "avx512fp16,avx512vl")]
10584#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10585#[rustc_legacy_const_generics(3)]
10586#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10587pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10588 unsafe {
10589 static_assert_uimm_bits!(IMM8, 8);
10590 vreduceph_128(a, IMM8, src, k)
10591 }
10592}
10593
10594/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10595/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10596/// out when the corresponding mask bit is not set).
10597///
10598/// Rounding is done according to the imm8 parameter, which can be one of:
10599///
10600/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10601/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10602/// * [`_MM_FROUND_TO_POS_INF`] : round up
10603/// * [`_MM_FROUND_TO_ZERO`] : truncate
10604/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10605///
10606/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10607#[inline]
10608#[target_feature(enable = "avx512fp16,avx512vl")]
10609#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10610#[rustc_legacy_const_generics(2)]
10611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10612pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10613 static_assert_uimm_bits!(IMM8, 8);
10614 _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10615}
10616
10617/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10618/// number of bits specified by imm8, and store the results in dst.
10619///
10620/// Rounding is done according to the imm8 parameter, which can be one of:
10621///
10622/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10623/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10624/// * [`_MM_FROUND_TO_POS_INF`] : round up
10625/// * [`_MM_FROUND_TO_ZERO`] : truncate
10626/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10627///
10628/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10629#[inline]
10630#[target_feature(enable = "avx512fp16,avx512vl")]
10631#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10632#[rustc_legacy_const_generics(1)]
10633#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10634pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10635 static_assert_uimm_bits!(IMM8, 8);
10636 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
10637}
10638
10639/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10640/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10641/// from src when the corresponding mask bit is not set).
10642///
10643/// Rounding is done according to the imm8 parameter, which can be one of:
10644///
10645/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10646/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10647/// * [`_MM_FROUND_TO_POS_INF`] : round up
10648/// * [`_MM_FROUND_TO_ZERO`] : truncate
10649/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10650///
10651/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10652#[inline]
10653#[target_feature(enable = "avx512fp16,avx512vl")]
10654#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10655#[rustc_legacy_const_generics(3)]
10656#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10657pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10658 unsafe {
10659 static_assert_uimm_bits!(IMM8, 8);
10660 vreduceph_256(a, IMM8, src, k)
10661 }
10662}
10663
10664/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10665/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10666/// out when the corresponding mask bit is not set).
10667///
10668/// Rounding is done according to the imm8 parameter, which can be one of:
10669///
10670/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10671/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10672/// * [`_MM_FROUND_TO_POS_INF`] : round up
10673/// * [`_MM_FROUND_TO_ZERO`] : truncate
10674/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10675///
10676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10677#[inline]
10678#[target_feature(enable = "avx512fp16,avx512vl")]
10679#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10680#[rustc_legacy_const_generics(2)]
10681#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10682pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10683 static_assert_uimm_bits!(IMM8, 8);
10684 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10685}
10686
10687/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10688/// number of bits specified by imm8, and store the results in dst.
10689///
10690/// Rounding is done according to the imm8 parameter, which can be one of:
10691///
10692/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10693/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10694/// * [`_MM_FROUND_TO_POS_INF`] : round up
10695/// * [`_MM_FROUND_TO_ZERO`] : truncate
10696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10697///
10698/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
10699#[inline]
10700#[target_feature(enable = "avx512fp16")]
10701#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10702#[rustc_legacy_const_generics(1)]
10703#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10704pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10705 static_assert_uimm_bits!(IMM8, 8);
10706 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10707}
10708
10709/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10710/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10711/// from src when the corresponding mask bit is not set).
10712///
10713/// Rounding is done according to the imm8 parameter, which can be one of:
10714///
10715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10717/// * [`_MM_FROUND_TO_POS_INF`] : round up
10718/// * [`_MM_FROUND_TO_ZERO`] : truncate
10719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10720///
10721/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
10722#[inline]
10723#[target_feature(enable = "avx512fp16")]
10724#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10725#[rustc_legacy_const_generics(3)]
10726#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10727pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
10728 static_assert_uimm_bits!(IMM8, 8);
10729 _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10730}
10731
10732/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10733/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10734/// out when the corresponding mask bit is not set).
10735///
10736/// Rounding is done according to the imm8 parameter, which can be one of:
10737///
10738/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10739/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10740/// * [`_MM_FROUND_TO_POS_INF`] : round up
10741/// * [`_MM_FROUND_TO_ZERO`] : truncate
10742/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10743///
10744/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
10745#[inline]
10746#[target_feature(enable = "avx512fp16")]
10747#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10748#[rustc_legacy_const_generics(2)]
10749#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10750pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10751 static_assert_uimm_bits!(IMM8, 8);
10752 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10753}
10754
10755/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10756/// number of bits specified by imm8, and store the results in dst.
10757///
10758/// Rounding is done according to the imm8 parameter, which can be one of:
10759///
10760/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10761/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10762/// * [`_MM_FROUND_TO_POS_INF`] : round up
10763/// * [`_MM_FROUND_TO_ZERO`] : truncate
10764/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10765///
10766/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10767///
10768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
10769#[inline]
10770#[target_feature(enable = "avx512fp16")]
10771#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10772#[rustc_legacy_const_generics(1, 2)]
10773#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10774pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10775 static_assert_uimm_bits!(IMM8, 8);
10776 static_assert_sae!(SAE);
10777 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10778}
10779
10780/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10781/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10782/// from src when the corresponding mask bit is not set).
10783///
10784/// Rounding is done according to the imm8 parameter, which can be one of:
10785///
10786/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10787/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10788/// * [`_MM_FROUND_TO_POS_INF`] : round up
10789/// * [`_MM_FROUND_TO_ZERO`] : truncate
10790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10791///
10792/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10793///
10794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
10795#[inline]
10796#[target_feature(enable = "avx512fp16")]
10797#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10798#[rustc_legacy_const_generics(3, 4)]
10799#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10800pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10801 src: __m512h,
10802 k: __mmask32,
10803 a: __m512h,
10804) -> __m512h {
10805 unsafe {
10806 static_assert_uimm_bits!(IMM8, 8);
10807 static_assert_sae!(SAE);
10808 vreduceph_512(a, IMM8, src, k, SAE)
10809 }
10810}
10811
10812/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10813/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10814/// out when the corresponding mask bit is not set).
10815///
10816/// Rounding is done according to the imm8 parameter, which can be one of:
10817///
10818/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10819/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10820/// * [`_MM_FROUND_TO_POS_INF`] : round up
10821/// * [`_MM_FROUND_TO_ZERO`] : truncate
10822/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10823///
10824/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10825///
10826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
10827#[inline]
10828#[target_feature(enable = "avx512fp16")]
10829#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10830#[rustc_legacy_const_generics(2, 3)]
10831#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10832pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10833 k: __mmask32,
10834 a: __m512h,
10835) -> __m512h {
10836 static_assert_uimm_bits!(IMM8, 8);
10837 static_assert_sae!(SAE);
10838 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10839}
10840
10841/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10842/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
10843/// upper 7 packed elements from a to the upper elements of dst.
10844///
10845/// Rounding is done according to the imm8 parameter, which can be one of:
10846///
10847/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10848/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10849/// * [`_MM_FROUND_TO_POS_INF`] : round up
10850/// * [`_MM_FROUND_TO_ZERO`] : truncate
10851/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10852///
10853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
10854#[inline]
10855#[target_feature(enable = "avx512fp16")]
10856#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10857#[rustc_legacy_const_generics(2)]
10858#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10859pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10860 static_assert_uimm_bits!(IMM8, 8);
10861 _mm_mask_reduce_sh::<IMM8>(src:_mm_undefined_ph(), k:0xff, a, b)
10862}
10863
10864/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10865/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10866/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
10867/// a to the upper elements of dst.
10868///
10869/// Rounding is done according to the imm8 parameter, which can be one of:
10870///
10871/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10872/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10873/// * [`_MM_FROUND_TO_POS_INF`] : round up
10874/// * [`_MM_FROUND_TO_ZERO`] : truncate
10875/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10876///
10877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
10878#[inline]
10879#[target_feature(enable = "avx512fp16")]
10880#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10881#[rustc_legacy_const_generics(4)]
10882#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10883pub fn _mm_mask_reduce_sh<const IMM8: i32>(
10884 src: __m128h,
10885 k: __mmask8,
10886 a: __m128h,
10887 b: __m128h,
10888) -> __m128h {
10889 static_assert_uimm_bits!(IMM8, 8);
10890 _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10891}
10892
10893/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10894/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10895/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10896/// to the upper elements of dst.
10897///
10898/// Rounding is done according to the imm8 parameter, which can be one of:
10899///
10900/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10901/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10902/// * [`_MM_FROUND_TO_POS_INF`] : round up
10903/// * [`_MM_FROUND_TO_ZERO`] : truncate
10904/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10905///
10906/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
10907#[inline]
10908#[target_feature(enable = "avx512fp16")]
10909#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10910#[rustc_legacy_const_generics(3)]
10911#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10912pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10913 static_assert_uimm_bits!(IMM8, 8);
10914 _mm_mask_reduce_sh::<IMM8>(src:_mm_setzero_ph(), k, a, b)
10915}
10916
10917/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10918/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
10919/// 7 packed elements from a to the upper elements of dst.
10920///
10921/// Rounding is done according to the imm8 parameter, which can be one of:
10922///
10923/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10924/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10925/// * [`_MM_FROUND_TO_POS_INF`] : round up
10926/// * [`_MM_FROUND_TO_ZERO`] : truncate
10927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10928///
10929/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10930///
10931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
10932#[inline]
10933#[target_feature(enable = "avx512fp16")]
10934#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
10935#[rustc_legacy_const_generics(2, 3)]
10936#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10937pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10938 static_assert_uimm_bits!(IMM8, 8);
10939 static_assert_sae!(SAE);
10940 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
10941}
10942
10943/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10944/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10945/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
10946/// to the upper elements of dst.
10947///
10948/// Rounding is done according to the imm8 parameter, which can be one of:
10949///
10950/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10951/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10952/// * [`_MM_FROUND_TO_POS_INF`] : round up
10953/// * [`_MM_FROUND_TO_ZERO`] : truncate
10954/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10955///
10956/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10957///
10958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
10959#[inline]
10960#[target_feature(enable = "avx512fp16")]
10961#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
10962#[rustc_legacy_const_generics(4, 5)]
10963#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10964pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10965 src: __m128h,
10966 k: __mmask8,
10967 a: __m128h,
10968 b: __m128h,
10969) -> __m128h {
10970 unsafe {
10971 static_assert_uimm_bits!(IMM8, 8);
10972 static_assert_sae!(SAE);
10973 vreducesh(a, b, src, k, IMM8, SAE)
10974 }
10975}
10976
10977/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10978/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10979/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10980/// to the upper elements of dst.
10981///
10982/// Rounding is done according to the imm8 parameter, which can be one of:
10983///
10984/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10985/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10986/// * [`_MM_FROUND_TO_POS_INF`] : round up
10987/// * [`_MM_FROUND_TO_ZERO`] : truncate
10988/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10989///
10990/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10991///
10992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
10993#[inline]
10994#[target_feature(enable = "avx512fp16")]
10995#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
10996#[rustc_legacy_const_generics(3, 4)]
10997#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10998pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10999 k: __mmask8,
11000 a: __m128h,
11001 b: __m128h,
11002) -> __m128h {
11003 static_assert_uimm_bits!(IMM8, 8);
11004 static_assert_sae!(SAE);
11005 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:_mm_setzero_ph(), k, a, b)
11006}
11007
11008/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11009/// sum of all elements in a.
11010///
11011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11012#[inline]
11013#[target_feature(enable = "avx512fp16,avx512vl")]
11014#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11015pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11016 unsafe {
11017 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11018 let a: __m128h = _mm_add_ph(a, b);
11019 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11020 let a: __m128h = _mm_add_ph(a, b);
11021 simd_extract::<_, f16>(x:a, idx:0) + simd_extract::<_, f16>(x:a, idx:1)
11022 }
11023}
11024
11025/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11026/// sum of all elements in a.
11027///
11028/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11029#[inline]
11030#[target_feature(enable = "avx512fp16,avx512vl")]
11031#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11032pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11033 unsafe {
11034 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11035 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11036 _mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11037 }
11038}
11039
11040/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11041/// sum of all elements in a.
11042///
11043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11044#[inline]
11045#[target_feature(enable = "avx512fp16")]
11046#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11047pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11048 unsafe {
11049 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11050 let q: __m256h = simd_shuffle!(
11051 a,
11052 a,
11053 [
11054 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11055 ]
11056 );
11057 _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11058 }
11059}
11060
11061/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11062/// the product of all elements in a.
11063///
11064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11065#[inline]
11066#[target_feature(enable = "avx512fp16,avx512vl")]
11067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11068pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11069 unsafe {
11070 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11071 let a: __m128h = _mm_mul_ph(a, b);
11072 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11073 let a: __m128h = _mm_mul_ph(a, b);
11074 simd_extract::<_, f16>(x:a, idx:0) * simd_extract::<_, f16>(x:a, idx:1)
11075 }
11076}
11077
11078/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11079/// the product of all elements in a.
11080///
11081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11082#[inline]
11083#[target_feature(enable = "avx512fp16,avx512vl")]
11084#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11085pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11086 unsafe {
11087 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11088 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11089 _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11090 }
11091}
11092
11093/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11094/// the product of all elements in a.
11095///
11096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11097#[inline]
11098#[target_feature(enable = "avx512fp16")]
11099#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11100pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11101 unsafe {
11102 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11103 let q: __m256h = simd_shuffle!(
11104 a,
11105 a,
11106 [
11107 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11108 ]
11109 );
11110 _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11111 }
11112}
11113
11114/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11115/// minimum of all elements in a.
11116///
11117/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11118#[inline]
11119#[target_feature(enable = "avx512fp16,avx512vl")]
11120#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11121pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11122 unsafe {
11123 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11124 let a: __m128h = _mm_min_ph(a, b);
11125 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11126 let a: __m128h = _mm_min_ph(a, b);
11127 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11128 simd_extract!(_mm_min_sh(a, b), 0)
11129 }
11130}
11131
11132/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11133/// minimum of all elements in a.
11134///
11135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11136#[inline]
11137#[target_feature(enable = "avx512fp16,avx512vl")]
11138#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11139pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11140 unsafe {
11141 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11142 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11143 _mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11144 }
11145}
11146
11147/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11148/// minimum of all elements in a.
11149///
11150/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11151#[inline]
11152#[target_feature(enable = "avx512fp16")]
11153#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11154pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11155 unsafe {
11156 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11157 let q: __m256h = simd_shuffle!(
11158 a,
11159 a,
11160 [
11161 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11162 ]
11163 );
11164 _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11165 }
11166}
11167
11168/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11169/// maximum of all elements in a.
11170///
11171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11172#[inline]
11173#[target_feature(enable = "avx512fp16,avx512vl")]
11174#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11175pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11176 unsafe {
11177 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11178 let a: __m128h = _mm_max_ph(a, b);
11179 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11180 let a: __m128h = _mm_max_ph(a, b);
11181 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11182 simd_extract!(_mm_max_sh(a, b), 0)
11183 }
11184}
11185
11186/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11187/// maximum of all elements in a.
11188///
11189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11190#[inline]
11191#[target_feature(enable = "avx512fp16,avx512vl")]
11192#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11193pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11194 unsafe {
11195 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11196 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11197 _mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11198 }
11199}
11200
11201/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11202/// maximum of all elements in a.
11203///
11204/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11205#[inline]
11206#[target_feature(enable = "avx512fp16")]
11207#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11208pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11209 unsafe {
11210 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11211 let q: __m256h = simd_shuffle!(
11212 a,
11213 a,
11214 [
11215 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11216 ]
11217 );
11218 _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11219 }
11220}
11221
11222macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11223 ($mask_type: ty, $reg: ident, $a: expr) => {{
11224 let dst: $mask_type;
11225 asm!(
11226 "vfpclassph {k}, {src}, {imm8}",
11227 k = lateout(kreg) dst,
11228 src = in($reg) $a,
11229 imm8 = const IMM8,
11230 options(pure, nomem, nostack)
11231 );
11232 dst
11233 }};
11234 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11235 let dst: $mask_type;
11236 asm!(
11237 "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11238 k = lateout(kreg) dst,
11239 mask = in(kreg) $mask,
11240 src = in($reg) $a,
11241 imm8 = const IMM8,
11242 options(pure, nomem, nostack)
11243 );
11244 dst
11245 }};
11246}
11247
11248/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11249/// by imm8, and store the results in mask vector k.
11250/// imm can be a combination of:
11251///
11252/// 0x01 // QNaN
11253/// 0x02 // Positive Zero
11254/// 0x04 // Negative Zero
11255/// 0x08 // Positive Infinity
11256/// 0x10 // Negative Infinity
11257/// 0x20 // Denormal
11258/// 0x40 // Negative
11259/// 0x80 // SNaN
11260///
11261/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11262#[inline]
11263#[target_feature(enable = "avx512fp16,avx512vl")]
11264#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11265#[rustc_legacy_const_generics(1)]
11266#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11267pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11268 unsafe {
11269 static_assert_uimm_bits!(IMM8, 8);
11270 fpclass_asm!(__mmask8, xmm_reg, a)
11271 }
11272}
11273
11274/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11275/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11276/// corresponding mask bit is not set).
11277/// imm can be a combination of:
11278///
11279/// 0x01 // QNaN
11280/// 0x02 // Positive Zero
11281/// 0x04 // Negative Zero
11282/// 0x08 // Positive Infinity
11283/// 0x10 // Negative Infinity
11284/// 0x20 // Denormal
11285/// 0x40 // Negative
11286/// 0x80 // SNaN
11287///
11288/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11289#[inline]
11290#[target_feature(enable = "avx512fp16,avx512vl")]
11291#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11292#[rustc_legacy_const_generics(2)]
11293#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11294pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11295 unsafe {
11296 static_assert_uimm_bits!(IMM8, 8);
11297 fpclass_asm!(__mmask8, k1, xmm_reg, a)
11298 }
11299}
11300
11301/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11302/// by imm8, and store the results in mask vector k.
11303/// imm can be a combination of:
11304///
11305/// 0x01 // QNaN
11306/// 0x02 // Positive Zero
11307/// 0x04 // Negative Zero
11308/// 0x08 // Positive Infinity
11309/// 0x10 // Negative Infinity
11310/// 0x20 // Denormal
11311/// 0x40 // Negative
11312/// 0x80 // SNaN
11313///
11314/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11315#[inline]
11316#[target_feature(enable = "avx512fp16,avx512vl")]
11317#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11318#[rustc_legacy_const_generics(1)]
11319#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11320pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11321 unsafe {
11322 static_assert_uimm_bits!(IMM8, 8);
11323 fpclass_asm!(__mmask16, ymm_reg, a)
11324 }
11325}
11326
11327/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11328/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11329/// corresponding mask bit is not set).
11330/// imm can be a combination of:
11331///
11332/// 0x01 // QNaN
11333/// 0x02 // Positive Zero
11334/// 0x04 // Negative Zero
11335/// 0x08 // Positive Infinity
11336/// 0x10 // Negative Infinity
11337/// 0x20 // Denormal
11338/// 0x40 // Negative
11339/// 0x80 // SNaN
11340///
11341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11342#[inline]
11343#[target_feature(enable = "avx512fp16,avx512vl")]
11344#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11345#[rustc_legacy_const_generics(2)]
11346#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11347pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11348 unsafe {
11349 static_assert_uimm_bits!(IMM8, 8);
11350 fpclass_asm!(__mmask16, k1, ymm_reg, a)
11351 }
11352}
11353
11354/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11355/// by imm8, and store the results in mask vector k.
11356/// imm can be a combination of:
11357///
11358/// 0x01 // QNaN
11359/// 0x02 // Positive Zero
11360/// 0x04 // Negative Zero
11361/// 0x08 // Positive Infinity
11362/// 0x10 // Negative Infinity
11363/// 0x20 // Denormal
11364/// 0x40 // Negative
11365/// 0x80 // SNaN
11366///
11367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11368#[inline]
11369#[target_feature(enable = "avx512fp16")]
11370#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11371#[rustc_legacy_const_generics(1)]
11372#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11373pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11374 unsafe {
11375 static_assert_uimm_bits!(IMM8, 8);
11376 fpclass_asm!(__mmask32, zmm_reg, a)
11377 }
11378}
11379
11380/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11381/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11382/// corresponding mask bit is not set).
11383/// imm can be a combination of:
11384///
11385/// 0x01 // QNaN
11386/// 0x02 // Positive Zero
11387/// 0x04 // Negative Zero
11388/// 0x08 // Positive Infinity
11389/// 0x10 // Negative Infinity
11390/// 0x20 // Denormal
11391/// 0x40 // Negative
11392/// 0x80 // SNaN
11393///
11394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11395#[inline]
11396#[target_feature(enable = "avx512fp16")]
11397#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11398#[rustc_legacy_const_generics(2)]
11399#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11400pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11401 unsafe {
11402 static_assert_uimm_bits!(IMM8, 8);
11403 fpclass_asm!(__mmask32, k1, zmm_reg, a)
11404 }
11405}
11406
11407/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11408/// by imm8, and store the result in mask vector k.
11409/// imm can be a combination of:
11410///
11411/// 0x01 // QNaN
11412/// 0x02 // Positive Zero
11413/// 0x04 // Negative Zero
11414/// 0x08 // Positive Infinity
11415/// 0x10 // Negative Infinity
11416/// 0x20 // Denormal
11417/// 0x40 // Negative
11418/// 0x80 // SNaN
11419///
11420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11421#[inline]
11422#[target_feature(enable = "avx512fp16")]
11423#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11424#[rustc_legacy_const_generics(1)]
11425#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11426pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11427 _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a)
11428}
11429
11430/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11431/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11432/// corresponding mask bit is not set).
11433/// imm can be a combination of:
11434///
11435/// 0x01 // QNaN
11436/// 0x02 // Positive Zero
11437/// 0x04 // Negative Zero
11438/// 0x08 // Positive Infinity
11439/// 0x10 // Negative Infinity
11440/// 0x20 // Denormal
11441/// 0x40 // Negative
11442/// 0x80 // SNaN
11443///
11444/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11445#[inline]
11446#[target_feature(enable = "avx512fp16")]
11447#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11448#[rustc_legacy_const_generics(2)]
11449#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11450pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11451 unsafe {
11452 static_assert_uimm_bits!(IMM8, 8);
11453 vfpclasssh(a, IMM8, k:k1)
11454 }
11455}
11456
11457/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11458/// and store the results in dst.
11459///
11460/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11461#[inline]
11462#[target_feature(enable = "avx512fp16,avx512vl")]
11463#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11464pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11465 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11466}
11467
11468/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11469/// and store the results in dst.
11470///
11471/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11472#[inline]
11473#[target_feature(enable = "avx512fp16,avx512vl")]
11474#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11475pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11476 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11477}
11478
11479/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11480/// and store the results in dst.
11481///
11482/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11483#[inline]
11484#[target_feature(enable = "avx512fp16")]
11485#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11486pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11487 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11488}
11489
11490/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11491/// and index in idx, and store the results in dst.
11492///
11493/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11494#[inline]
11495#[target_feature(enable = "avx512fp16,avx512vl")]
11496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11497pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11498 _mm_castsi128_ph(_mm_permutex2var_epi16(
11499 a:_mm_castph_si128(a),
11500 idx,
11501 b:_mm_castph_si128(b),
11502 ))
11503}
11504
11505/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11506/// and index in idx, and store the results in dst.
11507///
11508/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11509#[inline]
11510#[target_feature(enable = "avx512fp16,avx512vl")]
11511#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11512pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11513 _mm256_castsi256_ph(_mm256_permutex2var_epi16(
11514 a:_mm256_castph_si256(a),
11515 idx,
11516 b:_mm256_castph_si256(b),
11517 ))
11518}
11519
11520/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11521/// and index in idx, and store the results in dst.
11522///
11523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11524#[inline]
11525#[target_feature(enable = "avx512fp16")]
11526#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11527pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11528 _mm512_castsi512_ph(_mm512_permutex2var_epi16(
11529 a:_mm512_castph_si512(a),
11530 idx,
11531 b:_mm512_castph_si512(b),
11532 ))
11533}
11534
11535/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11536/// and store the results in dst.
11537///
11538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11539#[inline]
11540#[target_feature(enable = "avx512fp16,avx512vl")]
11541#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11542pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11543 _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11544}
11545
11546/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11547/// and store the results in dst.
11548///
11549/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11550#[inline]
11551#[target_feature(enable = "avx512fp16,avx512vl")]
11552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11553pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11554 _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11555}
11556
11557/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11558/// and store the results in dst.
11559///
11560/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11561#[inline]
11562#[target_feature(enable = "avx512fp16")]
11563#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11564pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11565 _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11566}
11567
11568/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11569/// and store the results in dst.
11570///
11571/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11572#[inline]
11573#[target_feature(enable = "avx512fp16,avx512vl")]
11574#[cfg_attr(test, assert_instr(vcvtw2ph))]
11575#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11576pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11577 unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11578}
11579
11580/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11581/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11582/// mask bit is not set).
11583///
11584/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11585#[inline]
11586#[target_feature(enable = "avx512fp16,avx512vl")]
11587#[cfg_attr(test, assert_instr(vcvtw2ph))]
11588#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11589pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11590 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11591}
11592
11593/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11594/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11595///
11596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11597#[inline]
11598#[target_feature(enable = "avx512fp16,avx512vl")]
11599#[cfg_attr(test, assert_instr(vcvtw2ph))]
11600#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11601pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11602 _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11603}
11604
11605/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11606/// and store the results in dst.
11607///
11608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11609#[inline]
11610#[target_feature(enable = "avx512fp16,avx512vl")]
11611#[cfg_attr(test, assert_instr(vcvtw2ph))]
11612#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11613pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11614 unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11615}
11616
11617/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11618/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11619/// mask bit is not set).
11620///
11621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11622#[inline]
11623#[target_feature(enable = "avx512fp16,avx512vl")]
11624#[cfg_attr(test, assert_instr(vcvtw2ph))]
11625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11626pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11627 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11628}
11629
11630/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11631/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11632///
11633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11634#[inline]
11635#[target_feature(enable = "avx512fp16,avx512vl")]
11636#[cfg_attr(test, assert_instr(vcvtw2ph))]
11637#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11638pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11639 _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11640}
11641
11642/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11643/// and store the results in dst.
11644///
11645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11646#[inline]
11647#[target_feature(enable = "avx512fp16")]
11648#[cfg_attr(test, assert_instr(vcvtw2ph))]
11649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11650pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11651 unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11652}
11653
11654/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11655/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11656/// mask bit is not set).
11657///
11658/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11659#[inline]
11660#[target_feature(enable = "avx512fp16")]
11661#[cfg_attr(test, assert_instr(vcvtw2ph))]
11662#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11663pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11664 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11665}
11666
11667/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11668/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11669///
11670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11671#[inline]
11672#[target_feature(enable = "avx512fp16")]
11673#[cfg_attr(test, assert_instr(vcvtw2ph))]
11674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11675pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11676 _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11677}
11678
11679/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11680/// and store the results in dst.
11681///
11682/// Rounding is done according to the rounding parameter, which can be one of:
11683///
11684/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11685/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11686/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11687/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11688/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11689///
11690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
11691#[inline]
11692#[target_feature(enable = "avx512fp16")]
11693#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11694#[rustc_legacy_const_generics(1)]
11695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11696pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11697 unsafe {
11698 static_assert_rounding!(ROUNDING);
11699 vcvtw2ph_512(a.as_i16x32(), ROUNDING)
11700 }
11701}
11702
11703/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11704/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11705/// mask bit is not set).
11706///
11707/// Rounding is done according to the rounding parameter, which can be one of:
11708///
11709/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11710/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11711/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11712/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11713/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11714///
11715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
11716#[inline]
11717#[target_feature(enable = "avx512fp16")]
11718#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11719#[rustc_legacy_const_generics(3)]
11720#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11721pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
11722 src: __m512h,
11723 k: __mmask32,
11724 a: __m512i,
11725) -> __m512h {
11726 unsafe {
11727 static_assert_rounding!(ROUNDING);
11728 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
11729 }
11730}
11731
11732/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11733/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11734///
11735/// Rounding is done according to the rounding parameter, which can be one of:
11736///
11737/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11738/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11739/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11740/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11741/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11742///
11743/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
11744#[inline]
11745#[target_feature(enable = "avx512fp16")]
11746#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11747#[rustc_legacy_const_generics(2)]
11748#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11749pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11750 static_assert_rounding!(ROUNDING);
11751 _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11752}
11753
11754/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11755/// and store the results in dst.
11756///
11757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
11758#[inline]
11759#[target_feature(enable = "avx512fp16,avx512vl")]
11760#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11761#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11762pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
11763 unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
11764}
11765
11766/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11767/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11768/// mask bit is not set).
11769///
11770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
11771#[inline]
11772#[target_feature(enable = "avx512fp16,avx512vl")]
11773#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11774#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11775pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11776 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
11777}
11778
11779/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11780/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11781///
11782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
11783#[inline]
11784#[target_feature(enable = "avx512fp16,avx512vl")]
11785#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11786#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11787pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
11788 _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
11789}
11790
11791/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11792/// and store the results in dst.
11793///
11794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
11795#[inline]
11796#[target_feature(enable = "avx512fp16,avx512vl")]
11797#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11798#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11799pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
11800 unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
11801}
11802
11803/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11804/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11805/// mask bit is not set).
11806///
11807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
11808#[inline]
11809#[target_feature(enable = "avx512fp16,avx512vl")]
11810#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11811#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11812pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11813 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
11814}
11815
11816/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11817/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
11820#[inline]
11821#[target_feature(enable = "avx512fp16,avx512vl")]
11822#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11823#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11824pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
11825 _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
11826}
11827
11828/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11829/// and store the results in dst.
11830///
11831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
11832#[inline]
11833#[target_feature(enable = "avx512fp16")]
11834#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11835#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11836pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
11837 unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
11838}
11839
11840/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11841/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11842/// mask bit is not set).
11843///
11844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
11845#[inline]
11846#[target_feature(enable = "avx512fp16")]
11847#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11848#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11849pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11850 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
11851}
11852
11853/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11854/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11855///
11856/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
11857#[inline]
11858#[target_feature(enable = "avx512fp16")]
11859#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11860#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11861pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
11862 _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
11863}
11864
11865/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11866/// and store the results in dst.
11867///
11868/// Rounding is done according to the rounding parameter, which can be one of:
11869///
11870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11875///
11876/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
11877#[inline]
11878#[target_feature(enable = "avx512fp16")]
11879#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11880#[rustc_legacy_const_generics(1)]
11881#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11882pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11883 unsafe {
11884 static_assert_rounding!(ROUNDING);
11885 vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
11886 }
11887}
11888
11889/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11890/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11891/// mask bit is not set).
11892///
11893/// Rounding is done according to the rounding parameter, which can be one of:
11894///
11895/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11896/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11897/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11898/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11899/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11900///
11901/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
11902#[inline]
11903#[target_feature(enable = "avx512fp16")]
11904#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11905#[rustc_legacy_const_generics(3)]
11906#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11907pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
11908 src: __m512h,
11909 k: __mmask32,
11910 a: __m512i,
11911) -> __m512h {
11912 unsafe {
11913 static_assert_rounding!(ROUNDING);
11914 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
11915 }
11916}
11917
11918/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11919/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11920///
11921/// Rounding is done according to the rounding parameter, which can be one of:
11922///
11923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11928///
11929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
11930#[inline]
11931#[target_feature(enable = "avx512fp16")]
11932#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11933#[rustc_legacy_const_generics(2)]
11934#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11935pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11936 static_assert_rounding!(ROUNDING);
11937 _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11938}
11939
11940/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11941/// and store the results in dst. The upper 64 bits of dst are zeroed out.
11942///
11943/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
11944#[inline]
11945#[target_feature(enable = "avx512fp16,avx512vl")]
11946#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11947#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11948pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
11949 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a)
11950}
11951
11952/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11953/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11954/// mask bit is not set). The upper 64 bits of dst are zeroed out.
11955///
11956/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
11957#[inline]
11958#[target_feature(enable = "avx512fp16,avx512vl")]
11959#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11960#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11961pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11962 unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
11963}
11964
11965/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11966/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11967/// The upper 64 bits of dst are zeroed out.
11968///
11969/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
11970#[inline]
11971#[target_feature(enable = "avx512fp16,avx512vl")]
11972#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11973#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11974pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
11975 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
11976}
11977
11978/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11979/// and store the results in dst.
11980///
11981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
11982#[inline]
11983#[target_feature(enable = "avx512fp16,avx512vl")]
11984#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11985#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11986pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
11987 unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
11988}
11989
11990/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11991/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11992/// mask bit is not set).
11993///
11994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
11995#[inline]
11996#[target_feature(enable = "avx512fp16,avx512vl")]
11997#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11998#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11999pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12000 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12001}
12002
12003/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12004/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12005///
12006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12007#[inline]
12008#[target_feature(enable = "avx512fp16,avx512vl")]
12009#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12010#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12011pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12012 _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12013}
12014
12015/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12016/// and store the results in dst.
12017///
12018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12019#[inline]
12020#[target_feature(enable = "avx512fp16")]
12021#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12022#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12023pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12024 unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12025}
12026
12027/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12028/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12029/// mask bit is not set).
12030///
12031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12032#[inline]
12033#[target_feature(enable = "avx512fp16")]
12034#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12035#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12036pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12037 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12038}
12039
12040/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12041/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12042///
12043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12044#[inline]
12045#[target_feature(enable = "avx512fp16")]
12046#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12048pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12049 _mm512_mask_cvtepi32_ph(src:_mm256_setzero_ph(), k, a)
12050}
12051
12052/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12053/// and store the results in dst.
12054///
12055/// Rounding is done according to the rounding parameter, which can be one of:
12056///
12057/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12058/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12059/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12060/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12061/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12062///
12063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12064#[inline]
12065#[target_feature(enable = "avx512fp16")]
12066#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12067#[rustc_legacy_const_generics(1)]
12068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12069pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12070 unsafe {
12071 static_assert_rounding!(ROUNDING);
12072 vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12073 }
12074}
12075
12076/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12077/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12078/// mask bit is not set).
12079///
12080/// Rounding is done according to the rounding parameter, which can be one of:
12081///
12082/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12083/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12084/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12085/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12086/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12087///
12088/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12089#[inline]
12090#[target_feature(enable = "avx512fp16")]
12091#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12092#[rustc_legacy_const_generics(3)]
12093#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12094pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12095 src: __m256h,
12096 k: __mmask16,
12097 a: __m512i,
12098) -> __m256h {
12099 unsafe {
12100 static_assert_rounding!(ROUNDING);
12101 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12102 }
12103}
12104
12105/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12106/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12107///
12108/// Rounding is done according to the rounding parameter, which can be one of:
12109///
12110/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12111/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12112/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12113/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12114/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12115///
12116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12117#[inline]
12118#[target_feature(enable = "avx512fp16")]
12119#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12120#[rustc_legacy_const_generics(2)]
12121#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12122pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12123 static_assert_rounding!(ROUNDING);
12124 _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12125}
12126
12127/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12128/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12129/// of dst.
12130///
12131/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12132#[inline]
12133#[target_feature(enable = "avx512fp16")]
12134#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12135#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12136pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12137 unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12138}
12139
12140/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12141/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12142/// of dst.
12143///
12144/// Rounding is done according to the rounding parameter, which can be one of:
12145///
12146/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12147/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12148/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12149/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12150/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12151///
12152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12153#[inline]
12154#[target_feature(enable = "avx512fp16")]
12155#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
12156#[rustc_legacy_const_generics(2)]
12157#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12158pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12159 unsafe {
12160 static_assert_rounding!(ROUNDING);
12161 vcvtsi2sh(a, b, ROUNDING)
12162 }
12163}
12164
12165/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12166/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12167///
12168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12169#[inline]
12170#[target_feature(enable = "avx512fp16,avx512vl")]
12171#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12173pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12174 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a)
12175}
12176
12177/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12178/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12179/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12180///
12181/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12182#[inline]
12183#[target_feature(enable = "avx512fp16,avx512vl")]
12184#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12185#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12186pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12187 unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12188}
12189
12190/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12191/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12192/// The upper 64 bits of dst are zeroed out.
12193///
12194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12195#[inline]
12196#[target_feature(enable = "avx512fp16,avx512vl")]
12197#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12199pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12200 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12201}
12202
12203/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12204/// and store the results in dst.
12205///
12206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12207#[inline]
12208#[target_feature(enable = "avx512fp16,avx512vl")]
12209#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12210#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12211pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12212 unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12213}
12214
12215/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12216/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12217/// mask bit is not set).
12218///
12219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12220#[inline]
12221#[target_feature(enable = "avx512fp16,avx512vl")]
12222#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12223#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12224pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12225 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12226}
12227
12228/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12229/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12230///
12231/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12232#[inline]
12233#[target_feature(enable = "avx512fp16,avx512vl")]
12234#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12235#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12236pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12237 _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12238}
12239
12240/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12241/// and store the results in dst.
12242///
12243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12244#[inline]
12245#[target_feature(enable = "avx512fp16")]
12246#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12247#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12248pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12249 unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12250}
12251
12252/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12253/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12254/// mask bit is not set).
12255///
12256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12257#[inline]
12258#[target_feature(enable = "avx512fp16")]
12259#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12261pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12262 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12263}
12264
12265/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12266/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12267///
12268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12269#[inline]
12270#[target_feature(enable = "avx512fp16")]
12271#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12272#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12273pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12274 _mm512_mask_cvtepu32_ph(src:_mm256_setzero_ph(), k, a)
12275}
12276
12277/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12278/// and store the results in dst.
12279///
12280/// Rounding is done according to the rounding parameter, which can be one of:
12281///
12282/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12283/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12284/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12285/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12286/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12287///
12288/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12289#[inline]
12290#[target_feature(enable = "avx512fp16")]
12291#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12292#[rustc_legacy_const_generics(1)]
12293#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12294pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12295 unsafe {
12296 static_assert_rounding!(ROUNDING);
12297 vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12298 }
12299}
12300
12301/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12302/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12303/// mask bit is not set).
12304///
12305/// Rounding is done according to the rounding parameter, which can be one of:
12306///
12307/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12308/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12309/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12310/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12311/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12312///
12313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12314#[inline]
12315#[target_feature(enable = "avx512fp16")]
12316#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12317#[rustc_legacy_const_generics(3)]
12318#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12319pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12320 src: __m256h,
12321 k: __mmask16,
12322 a: __m512i,
12323) -> __m256h {
12324 unsafe {
12325 static_assert_rounding!(ROUNDING);
12326 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12327 }
12328}
12329
12330/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12331/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12332///
12333/// Rounding is done according to the rounding parameter, which can be one of:
12334///
12335/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12336/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12337/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12338/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12339/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12340///
12341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12342#[inline]
12343#[target_feature(enable = "avx512fp16")]
12344#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12345#[rustc_legacy_const_generics(2)]
12346#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12347pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12348 static_assert_rounding!(ROUNDING);
12349 _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12350}
12351
12352/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12353/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12354/// of dst.
12355///
12356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12357#[inline]
12358#[target_feature(enable = "avx512fp16")]
12359#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12360#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12361pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12362 unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12363}
12364
12365/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12366/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12367/// of dst.
12368///
12369/// Rounding is done according to the rounding parameter, which can be one of:
12370///
12371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12376///
12377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12378#[inline]
12379#[target_feature(enable = "avx512fp16")]
12380#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
12381#[rustc_legacy_const_generics(2)]
12382#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12383pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12384 unsafe {
12385 static_assert_rounding!(ROUNDING);
12386 vcvtusi2sh(a, b, ROUNDING)
12387 }
12388}
12389
12390/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12391/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12392///
12393/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12394#[inline]
12395#[target_feature(enable = "avx512fp16,avx512vl")]
12396#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12397#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12398pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12399 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12400}
12401
12402/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12403/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12404/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12405///
12406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12407#[inline]
12408#[target_feature(enable = "avx512fp16,avx512vl")]
12409#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12411pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12412 unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12413}
12414
12415/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12416/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12417/// The upper 96 bits of dst are zeroed out.
12418///
12419/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12420#[inline]
12421#[target_feature(enable = "avx512fp16,avx512vl")]
12422#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12423#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12424pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12425 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12426}
12427
12428/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12429/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12430///
12431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12432#[inline]
12433#[target_feature(enable = "avx512fp16,avx512vl")]
12434#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12435#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12436pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12437 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12438}
12439
12440/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12441/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12442/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12443///
12444/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12445#[inline]
12446#[target_feature(enable = "avx512fp16,avx512vl")]
12447#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12448#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12449pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12450 unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12451}
12452
12453/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12454/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12455/// The upper 64 bits of dst are zeroed out.
12456///
12457/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12458#[inline]
12459#[target_feature(enable = "avx512fp16,avx512vl")]
12460#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12461#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12462pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12463 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12464}
12465
12466/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12467/// and store the results in dst.
12468///
12469/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12470#[inline]
12471#[target_feature(enable = "avx512fp16")]
12472#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12473#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12474pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12475 unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12476}
12477
12478/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12479/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12480/// mask bit is not set).
12481///
12482/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12483#[inline]
12484#[target_feature(enable = "avx512fp16")]
12485#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12486#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12487pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12488 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12489}
12490
12491/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12492/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12493///
12494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12495#[inline]
12496#[target_feature(enable = "avx512fp16")]
12497#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12498#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12499pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12500 _mm512_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12501}
12502
12503/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12504/// and store the results in dst.
12505///
12506/// Rounding is done according to the rounding parameter, which can be one of:
12507///
12508/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12509/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12510/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12511/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12512/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12513///
12514/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12515#[inline]
12516#[target_feature(enable = "avx512fp16")]
12517#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12518#[rustc_legacy_const_generics(1)]
12519#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12520pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12521 unsafe {
12522 static_assert_rounding!(ROUNDING);
12523 vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12524 }
12525}
12526
12527/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12528/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12529/// mask bit is not set).
12530///
12531/// Rounding is done according to the rounding parameter, which can be one of:
12532///
12533/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12534/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12535/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12536/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12537/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12538///
12539/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12540#[inline]
12541#[target_feature(enable = "avx512fp16")]
12542#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12543#[rustc_legacy_const_generics(3)]
12544#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12545pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12546 src: __m128h,
12547 k: __mmask8,
12548 a: __m512i,
12549) -> __m128h {
12550 unsafe {
12551 static_assert_rounding!(ROUNDING);
12552 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12553 }
12554}
12555
12556/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12557/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12558///
12559/// Rounding is done according to the rounding parameter, which can be one of:
12560///
12561/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12562/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12563/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12564/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12565/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12566///
12567/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12568#[inline]
12569#[target_feature(enable = "avx512fp16")]
12570#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12571#[rustc_legacy_const_generics(2)]
12572#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12573pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12574 static_assert_rounding!(ROUNDING);
12575 _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
12576}
12577
12578/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12579/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12580///
12581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12582#[inline]
12583#[target_feature(enable = "avx512fp16,avx512vl")]
12584#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12585#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12586pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12587 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12588}
12589
12590/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12591/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12592/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12593///
12594/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12595#[inline]
12596#[target_feature(enable = "avx512fp16,avx512vl")]
12597#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12598#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12599pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12600 unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12601}
12602
12603/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12604/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12605/// The upper 96 bits of dst are zeroed out.
12606///
12607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12608#[inline]
12609#[target_feature(enable = "avx512fp16,avx512vl")]
12610#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12612pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12613 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12614}
12615
12616/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12617/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12618///
12619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12620#[inline]
12621#[target_feature(enable = "avx512fp16,avx512vl")]
12622#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12623#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12624pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12625 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12626}
12627
12628/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12629/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12630/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12631///
12632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12633#[inline]
12634#[target_feature(enable = "avx512fp16,avx512vl")]
12635#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12636#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12637pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12638 unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12639}
12640
12641/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12642/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12643/// The upper 64 bits of dst are zeroed out.
12644///
12645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12646#[inline]
12647#[target_feature(enable = "avx512fp16,avx512vl")]
12648#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12650pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12651 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12652}
12653
12654/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12655/// and store the results in dst.
12656///
12657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12658#[inline]
12659#[target_feature(enable = "avx512fp16")]
12660#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12661#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12662pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12663 unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12664}
12665
12666/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12667/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12668/// mask bit is not set).
12669///
12670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12671#[inline]
12672#[target_feature(enable = "avx512fp16")]
12673#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12675pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12676 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12677}
12678
12679/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12680/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12681///
12682/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
12683#[inline]
12684#[target_feature(enable = "avx512fp16")]
12685#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12686#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12687pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
12688 _mm512_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12689}
12690
12691/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12692/// and store the results in dst.
12693///
12694/// Rounding is done according to the rounding parameter, which can be one of:
12695///
12696/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12697/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12698/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12699/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12700/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12701///
12702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
12703#[inline]
12704#[target_feature(enable = "avx512fp16")]
12705#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12706#[rustc_legacy_const_generics(1)]
12707#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12708pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12709 unsafe {
12710 static_assert_rounding!(ROUNDING);
12711 vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
12712 }
12713}
12714
12715/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12716/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12717/// mask bit is not set).
12718///
12719/// Rounding is done according to the rounding parameter, which can be one of:
12720///
12721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12726///
12727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
12728#[inline]
12729#[target_feature(enable = "avx512fp16")]
12730#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12731#[rustc_legacy_const_generics(3)]
12732#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12733pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
12734 src: __m128h,
12735 k: __mmask8,
12736 a: __m512i,
12737) -> __m128h {
12738 unsafe {
12739 static_assert_rounding!(ROUNDING);
12740 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
12741 }
12742}
12743
12744/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12745/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12746///
12747/// Rounding is done according to the rounding parameter, which can be one of:
12748///
12749/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12750/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12751/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12752/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12753/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12754///
12755/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
12756#[inline]
12757#[target_feature(enable = "avx512fp16")]
12758#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12759#[rustc_legacy_const_generics(2)]
12760#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12761pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12762 static_assert_rounding!(ROUNDING);
12763 _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
12764}
12765
12766/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12767/// floating-point elements, and store the results in dst.
12768///
12769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
12770#[inline]
12771#[target_feature(enable = "avx512fp16,avx512vl")]
12772#[cfg_attr(test, assert_instr(vcvtps2phx))]
12773#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12774pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
12775 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12776}
12777
12778/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12779/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12780/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12781///
12782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
12783#[inline]
12784#[target_feature(enable = "avx512fp16,avx512vl")]
12785#[cfg_attr(test, assert_instr(vcvtps2phx))]
12786#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12787pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
12788 unsafe { vcvtps2phx_128(a, src, k) }
12789}
12790
12791/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12792/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12793/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12794///
12795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
12796#[inline]
12797#[target_feature(enable = "avx512fp16,avx512vl")]
12798#[cfg_attr(test, assert_instr(vcvtps2phx))]
12799#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12800pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
12801 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12802}
12803
12804/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12805/// floating-point elements, and store the results in dst.
12806///
12807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
12808#[inline]
12809#[target_feature(enable = "avx512fp16,avx512vl")]
12810#[cfg_attr(test, assert_instr(vcvtps2phx))]
12811#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12812pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
12813 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12814}
12815
12816/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12817/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12818/// when the corresponding mask bit is not set).
12819///
12820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
12821#[inline]
12822#[target_feature(enable = "avx512fp16,avx512vl")]
12823#[cfg_attr(test, assert_instr(vcvtps2phx))]
12824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12825pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
12826 unsafe { vcvtps2phx_256(a, src, k) }
12827}
12828
12829/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12830/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12831/// corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
12834#[inline]
12835#[target_feature(enable = "avx512fp16,avx512vl")]
12836#[cfg_attr(test, assert_instr(vcvtps2phx))]
12837#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12838pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
12839 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12840}
12841
12842/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12843/// floating-point elements, and store the results in dst.
12844///
12845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
12846#[inline]
12847#[target_feature(enable = "avx512fp16")]
12848#[cfg_attr(test, assert_instr(vcvtps2phx))]
12849#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12850pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
12851 _mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k:0xffff, a)
12852}
12853
12854/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12855/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12856/// when the corresponding mask bit is not set).
12857///
12858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
12859#[inline]
12860#[target_feature(enable = "avx512fp16")]
12861#[cfg_attr(test, assert_instr(vcvtps2phx))]
12862#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12863pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
12864 unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
12865}
12866
12867/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12868/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12869/// corresponding mask bit is not set).
12870///
12871/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
12872#[inline]
12873#[target_feature(enable = "avx512fp16")]
12874#[cfg_attr(test, assert_instr(vcvtps2phx))]
12875#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12876pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
12877 _mm512_mask_cvtxps_ph(src:_mm256_setzero_ph(), k, a)
12878}
12879
12880/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12881/// floating-point elements, and store the results in dst.
12882///
12883/// Rounding is done according to the rounding parameter, which can be one of:
12884///
12885/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12886/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12887/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12888/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12889/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12890///
12891/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
12892#[inline]
12893#[target_feature(enable = "avx512fp16")]
12894#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12895#[rustc_legacy_const_generics(1)]
12896#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12897pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
12898 static_assert_rounding!(ROUNDING);
12899 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k:0xffff, a)
12900}
12901
12902/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12903/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12904/// when the corresponding mask bit is not set).
12905///
12906/// Rounding is done according to the rounding parameter, which can be one of:
12907///
12908/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12909/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12910/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12911/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12912/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12913///
12914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
12915#[inline]
12916#[target_feature(enable = "avx512fp16")]
12917#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12918#[rustc_legacy_const_generics(3)]
12919#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12920pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
12921 src: __m256h,
12922 k: __mmask16,
12923 a: __m512,
12924) -> __m256h {
12925 unsafe {
12926 static_assert_rounding!(ROUNDING);
12927 vcvtps2phx_512(a, src, k, ROUNDING)
12928 }
12929}
12930
12931/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12932/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12933/// corresponding mask bit is not set).
12934///
12935/// Rounding is done according to the rounding parameter, which can be one of:
12936///
12937/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12938/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12939/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12940/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12942///
12943/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
12944#[inline]
12945#[target_feature(enable = "avx512fp16")]
12946#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12947#[rustc_legacy_const_generics(2)]
12948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12949pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
12950 static_assert_rounding!(ROUNDING);
12951 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:_mm256_setzero_ph(), k, a)
12952}
12953
12954/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12955/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12956/// elements from a to the upper elements of dst.
12957///
12958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
12959#[inline]
12960#[target_feature(enable = "avx512fp16")]
12961#[cfg_attr(test, assert_instr(vcvtss2sh))]
12962#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12963pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
12964 _mm_mask_cvtss_sh(src:_mm_undefined_ph(), k:0xff, a, b)
12965}
12966
12967/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12968/// floating-point elements, store the result in the lower element of dst using writemask k (the element
12969/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
12970/// upper elements of dst.
12971///
12972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
12973#[inline]
12974#[target_feature(enable = "avx512fp16")]
12975#[cfg_attr(test, assert_instr(vcvtss2sh))]
12976#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12977pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12978 unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
12979}
12980
12981/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12982/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
12983/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
12984/// elements of dst.
12985///
12986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
12987#[inline]
12988#[target_feature(enable = "avx512fp16")]
12989#[cfg_attr(test, assert_instr(vcvtss2sh))]
12990#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12991pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12992 _mm_mask_cvtss_sh(src:_mm_setzero_ph(), k, a, b)
12993}
12994
12995/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12996/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12997/// elements from a to the upper elements of dst.
12998///
12999/// Rounding is done according to the rounding parameter, which can be one of:
13000///
13001/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13002/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13003/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13004/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13005/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13006///
13007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13008#[inline]
13009#[target_feature(enable = "avx512fp16")]
13010#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13011#[rustc_legacy_const_generics(2)]
13012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13013pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13014 static_assert_rounding!(ROUNDING);
13015 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
13016}
13017
13018/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13019/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13020/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13021/// upper elements of dst.
13022///
13023/// Rounding is done according to the rounding parameter, which can be one of:
13024///
13025/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13026/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13027/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13028/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13029/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13030///
13031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13032#[inline]
13033#[target_feature(enable = "avx512fp16")]
13034#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13035#[rustc_legacy_const_generics(4)]
13036#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13037pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13038 src: __m128h,
13039 k: __mmask8,
13040 a: __m128h,
13041 b: __m128,
13042) -> __m128h {
13043 unsafe {
13044 static_assert_rounding!(ROUNDING);
13045 vcvtss2sh(a, b, src, k, ROUNDING)
13046 }
13047}
13048
13049/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13050/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13051/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13052/// elements of dst.
13053///
13054/// Rounding is done according to the rounding parameter, which can be one of:
13055///
13056/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13057/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13058/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13059/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13060/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13061///
13062/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13063#[inline]
13064#[target_feature(enable = "avx512fp16")]
13065#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13066#[rustc_legacy_const_generics(3)]
13067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13068pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13069 k: __mmask8,
13070 a: __m128h,
13071 b: __m128,
13072) -> __m128h {
13073 static_assert_rounding!(ROUNDING);
13074 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
13075}
13076
13077/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13078/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13079///
13080/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13081#[inline]
13082#[target_feature(enable = "avx512fp16,avx512vl")]
13083#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13084#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13085pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13086 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13087}
13088
13089/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13090/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13091/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13092///
13093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13094#[inline]
13095#[target_feature(enable = "avx512fp16,avx512vl")]
13096#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13097#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13098pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13099 unsafe { vcvtpd2ph_128(a, src, k) }
13100}
13101
13102/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13103/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13104/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13105///
13106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13107#[inline]
13108#[target_feature(enable = "avx512fp16,avx512vl")]
13109#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13110#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13111pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13112 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13113}
13114
13115/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13116/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13117///
13118/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13119#[inline]
13120#[target_feature(enable = "avx512fp16,avx512vl")]
13121#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13122#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13123pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13124 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13125}
13126
13127/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13128/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13129/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13130///
13131/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13132#[inline]
13133#[target_feature(enable = "avx512fp16,avx512vl")]
13134#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13135#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13136pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13137 unsafe { vcvtpd2ph_256(a, src, k) }
13138}
13139
13140/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13141/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13142/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13143///
13144/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13145#[inline]
13146#[target_feature(enable = "avx512fp16,avx512vl")]
13147#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13148#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13149pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13150 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13151}
13152
13153/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13154/// floating-point elements, and store the results in dst.
13155///
13156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13157#[inline]
13158#[target_feature(enable = "avx512fp16")]
13159#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13160#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13161pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13162 _mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13163}
13164
13165/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13166/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13167/// when the corresponding mask bit is not set).
13168///
13169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13170#[inline]
13171#[target_feature(enable = "avx512fp16")]
13172#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13173#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13174pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13175 unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13176}
13177
13178/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13179/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13180/// corresponding mask bit is not set).
13181///
13182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13183#[inline]
13184#[target_feature(enable = "avx512fp16")]
13185#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13186#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13187pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13188 _mm512_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13189}
13190
13191/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13192/// floating-point elements, and store the results in dst.
13193///
13194/// Rounding is done according to the rounding parameter, which can be one of:
13195///
13196/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13197/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13198/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13199/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13200/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13201///
13202/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13203#[inline]
13204#[target_feature(enable = "avx512fp16")]
13205#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13206#[rustc_legacy_const_generics(1)]
13207#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13208pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13209 static_assert_rounding!(ROUNDING);
13210 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k:0xff, a)
13211}
13212
13213/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13214/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13215/// when the corresponding mask bit is not set).
13216///
13217/// Rounding is done according to the rounding parameter, which can be one of:
13218///
13219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13224///
13225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13226#[inline]
13227#[target_feature(enable = "avx512fp16")]
13228#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13229#[rustc_legacy_const_generics(3)]
13230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13231pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13232 src: __m128h,
13233 k: __mmask8,
13234 a: __m512d,
13235) -> __m128h {
13236 unsafe {
13237 static_assert_rounding!(ROUNDING);
13238 vcvtpd2ph_512(a, src, k, ROUNDING)
13239 }
13240}
13241
13242/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13243/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13244/// corresponding mask bit is not set).
13245///
13246/// Rounding is done according to the rounding parameter, which can be one of:
13247///
13248/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13249/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13250/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13251/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13252/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13253///
13254/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13255#[inline]
13256#[target_feature(enable = "avx512fp16")]
13257#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13258#[rustc_legacy_const_generics(2)]
13259#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13260pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13261 static_assert_rounding!(ROUNDING);
13262 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:_mm_setzero_ph(), k, a)
13263}
13264
13265/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13266/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13267/// elements from a to the upper elements of dst.
13268///
13269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13270#[inline]
13271#[target_feature(enable = "avx512fp16")]
13272#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13274pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13275 _mm_mask_cvtsd_sh(src:_mm_undefined_ph(), k:0xff, a, b)
13276}
13277
13278/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13279/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13280/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13281/// upper elements of dst.
13282///
13283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13284#[inline]
13285#[target_feature(enable = "avx512fp16")]
13286#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13287#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13288pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13289 unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13290}
13291
13292/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13293/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13294/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13295/// elements of dst.
13296///
13297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13298#[inline]
13299#[target_feature(enable = "avx512fp16")]
13300#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13301#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13302pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13303 _mm_mask_cvtsd_sh(src:_mm_setzero_ph(), k, a, b)
13304}
13305
13306/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13307/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13308/// elements from a to the upper elements of dst.
13309///
13310/// Rounding is done according to the rounding parameter, which can be one of:
13311///
13312/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13313/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13314/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13315/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13316/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13317///
13318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13319#[inline]
13320#[target_feature(enable = "avx512fp16")]
13321#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13322#[rustc_legacy_const_generics(2)]
13323#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13324pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13325 static_assert_rounding!(ROUNDING);
13326 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_undefined_ph(), k:0xff, a, b)
13327}
13328
13329/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13330/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13331/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13332/// upper elements of dst.
13333///
13334/// Rounding is done according to the rounding parameter, which can be one of:
13335///
13336/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13337/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13338/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13339/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13340/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13341///
13342/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13343#[inline]
13344#[target_feature(enable = "avx512fp16")]
13345#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13346#[rustc_legacy_const_generics(4)]
13347#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13348pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13349 src: __m128h,
13350 k: __mmask8,
13351 a: __m128h,
13352 b: __m128d,
13353) -> __m128h {
13354 unsafe {
13355 static_assert_rounding!(ROUNDING);
13356 vcvtsd2sh(a, b, src, k, ROUNDING)
13357 }
13358}
13359
13360/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13361/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13362/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13363/// elements of dst.
13364///
13365/// Rounding is done according to the rounding parameter, which can be one of:
13366///
13367/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13368/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13369/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13370/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13371/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13372///
13373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13374#[inline]
13375#[target_feature(enable = "avx512fp16")]
13376#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13377#[rustc_legacy_const_generics(3)]
13378#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13379pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13380 k: __mmask8,
13381 a: __m128h,
13382 b: __m128d,
13383) -> __m128h {
13384 static_assert_rounding!(ROUNDING);
13385 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:_mm_setzero_ph(), k, a, b)
13386}
13387
13388/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13389/// store the results in dst.
13390///
13391/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13392#[inline]
13393#[target_feature(enable = "avx512fp16,avx512vl")]
13394#[cfg_attr(test, assert_instr(vcvtph2w))]
13395#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13396pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13397 _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13398}
13399
13400/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13401/// store the results in dst using writemask k (elements are copied from src when the corresponding
13402/// mask bit is not set).
13403///
13404/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13405#[inline]
13406#[target_feature(enable = "avx512fp16,avx512vl")]
13407#[cfg_attr(test, assert_instr(vcvtph2w))]
13408#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13409pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13410 unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13411}
13412
13413/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13414/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13415///
13416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13417#[inline]
13418#[target_feature(enable = "avx512fp16,avx512vl")]
13419#[cfg_attr(test, assert_instr(vcvtph2w))]
13420#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13421pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13422 _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13423}
13424
13425/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13426/// store the results in dst.
13427///
13428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13429#[inline]
13430#[target_feature(enable = "avx512fp16,avx512vl")]
13431#[cfg_attr(test, assert_instr(vcvtph2w))]
13432#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13433pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13434 _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13435}
13436
13437/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13438/// store the results in dst using writemask k (elements are copied from src when the corresponding
13439/// mask bit is not set).
13440///
13441/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13442#[inline]
13443#[target_feature(enable = "avx512fp16,avx512vl")]
13444#[cfg_attr(test, assert_instr(vcvtph2w))]
13445#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13446pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13447 unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13448}
13449
13450/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13451/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13452///
13453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13454#[inline]
13455#[target_feature(enable = "avx512fp16,avx512vl")]
13456#[cfg_attr(test, assert_instr(vcvtph2w))]
13457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13458pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13459 _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13460}
13461
13462/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13463/// store the results in dst.
13464///
13465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13466#[inline]
13467#[target_feature(enable = "avx512fp16")]
13468#[cfg_attr(test, assert_instr(vcvtph2w))]
13469#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13470pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13471 _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13472}
13473
13474/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13475/// store the results in dst using writemask k (elements are copied from src when the corresponding
13476/// mask bit is not set).
13477///
13478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13479#[inline]
13480#[target_feature(enable = "avx512fp16")]
13481#[cfg_attr(test, assert_instr(vcvtph2w))]
13482#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13483pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13484 unsafe {
13485 transmute(src:vcvtph2w_512(
13486 a,
13487 src.as_i16x32(),
13488 k,
13489 _MM_FROUND_CUR_DIRECTION,
13490 ))
13491 }
13492}
13493
13494/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13495/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13496///
13497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13498#[inline]
13499#[target_feature(enable = "avx512fp16")]
13500#[cfg_attr(test, assert_instr(vcvtph2w))]
13501#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13502pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13503 _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13504}
13505
13506/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13507/// store the results in dst.
13508///
13509/// Rounding is done according to the rounding parameter, which can be one of:
13510///
13511/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13512/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13513/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13514/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13515/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13516///
13517/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13518#[inline]
13519#[target_feature(enable = "avx512fp16")]
13520#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13521#[rustc_legacy_const_generics(1)]
13522#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13523pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13524 static_assert_rounding!(ROUNDING);
13525 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13526}
13527
13528/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13529/// store the results in dst using writemask k (elements are copied from src when the corresponding
13530/// mask bit is not set).
13531///
13532/// Rounding is done according to the rounding parameter, which can be one of:
13533///
13534/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13535/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13536/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13537/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13538/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13539///
13540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13541#[inline]
13542#[target_feature(enable = "avx512fp16")]
13543#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13544#[rustc_legacy_const_generics(3)]
13545#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13546pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13547 src: __m512i,
13548 k: __mmask32,
13549 a: __m512h,
13550) -> __m512i {
13551 unsafe {
13552 static_assert_rounding!(ROUNDING);
13553 transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13554 }
13555}
13556
13557/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13558/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13559///
13560/// Rounding is done according to the rounding parameter, which can be one of:
13561///
13562/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13563/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13564/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13565/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13566/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13567///
13568/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13569#[inline]
13570#[target_feature(enable = "avx512fp16")]
13571#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13572#[rustc_legacy_const_generics(2)]
13573#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13574pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13575 static_assert_rounding!(ROUNDING);
13576 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13577}
13578
13579/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13580/// and store the results in dst.
13581///
13582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13583#[inline]
13584#[target_feature(enable = "avx512fp16,avx512vl")]
13585#[cfg_attr(test, assert_instr(vcvtph2uw))]
13586#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13587pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13588 _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13589}
13590
13591/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13592/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13593/// mask bit is not set).
13594///
13595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13596#[inline]
13597#[target_feature(enable = "avx512fp16,avx512vl")]
13598#[cfg_attr(test, assert_instr(vcvtph2uw))]
13599#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13600pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13601 unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13602}
13603
13604/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13605/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13606///
13607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13608#[inline]
13609#[target_feature(enable = "avx512fp16,avx512vl")]
13610#[cfg_attr(test, assert_instr(vcvtph2uw))]
13611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13612pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13613 _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13614}
13615
13616/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13617/// and store the results in dst.
13618///
13619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13620#[inline]
13621#[target_feature(enable = "avx512fp16,avx512vl")]
13622#[cfg_attr(test, assert_instr(vcvtph2uw))]
13623#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13624pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13625 _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13626}
13627
13628/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13629/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13630/// mask bit is not set).
13631///
13632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13633#[inline]
13634#[target_feature(enable = "avx512fp16,avx512vl")]
13635#[cfg_attr(test, assert_instr(vcvtph2uw))]
13636#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13637pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13638 unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13639}
13640
13641/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13642/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13645#[inline]
13646#[target_feature(enable = "avx512fp16,avx512vl")]
13647#[cfg_attr(test, assert_instr(vcvtph2uw))]
13648#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13649pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13650 _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13651}
13652
13653/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13654/// and store the results in dst.
13655///
13656/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13657#[inline]
13658#[target_feature(enable = "avx512fp16")]
13659#[cfg_attr(test, assert_instr(vcvtph2uw))]
13660#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13661pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13662 _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13663}
13664
13665/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13666/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13667/// mask bit is not set).
13668///
13669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13670#[inline]
13671#[target_feature(enable = "avx512fp16")]
13672#[cfg_attr(test, assert_instr(vcvtph2uw))]
13673#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13674pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13675 unsafe {
13676 transmute(src:vcvtph2uw_512(
13677 a,
13678 src.as_u16x32(),
13679 k,
13680 _MM_FROUND_CUR_DIRECTION,
13681 ))
13682 }
13683}
13684
13685/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13686/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
13689#[inline]
13690#[target_feature(enable = "avx512fp16")]
13691#[cfg_attr(test, assert_instr(vcvtph2uw))]
13692#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13693pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
13694 _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
13695}
13696
13697/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13698/// and store the results in dst.
13699///
13700/// Rounding is done according to the rounding parameter, which can be one of:
13701///
13702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13707///
13708/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
13709#[inline]
13710#[target_feature(enable = "avx512fp16")]
13711#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
13712#[rustc_legacy_const_generics(1)]
13713#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13714pub fn _mm512_cvt_roundph_epu16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13715 static_assert_rounding!(ROUNDING);
13716 _mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13717}
13718
13719/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13720/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13721/// mask bit is not set).
13722///
13723/// Rounding is done according to the rounding parameter, which can be one of:
13724///
13725/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13726/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13727/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13728/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13729/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13730///
13731/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
13732#[inline]
13733#[target_feature(enable = "avx512fp16")]
13734#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
13735#[rustc_legacy_const_generics(3)]
13736#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13737pub fn _mm512_mask_cvt_roundph_epu16<const ROUNDING: i32>(
13738 src: __m512i,
13739 k: __mmask32,
13740 a: __m512h,
13741) -> __m512i {
13742 unsafe {
13743 static_assert_rounding!(ROUNDING);
13744 transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING))
13745 }
13746}
13747
13748/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13749/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750///
13751/// Rounding is done according to the rounding parameter, which can be one of:
13752///
13753/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13754/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13755/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13756/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13757/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13758///
13759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
13760#[inline]
13761#[target_feature(enable = "avx512fp16")]
13762#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
13763#[rustc_legacy_const_generics(2)]
13764#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13765pub fn _mm512_maskz_cvt_roundph_epu16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13766 static_assert_rounding!(ROUNDING);
13767 _mm512_mask_cvt_roundph_epu16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13768}
13769
13770/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13771/// truncation, and store the results in dst.
13772///
13773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
13774#[inline]
13775#[target_feature(enable = "avx512fp16,avx512vl")]
13776#[cfg_attr(test, assert_instr(vcvttph2w))]
13777#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13778pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
13779 _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13780}
13781
13782/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13783/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13784/// mask bit is not set).
13785///
13786/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
13787#[inline]
13788#[target_feature(enable = "avx512fp16,avx512vl")]
13789#[cfg_attr(test, assert_instr(vcvttph2w))]
13790#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13791pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13792 unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
13793}
13794
13795/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13796/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13797/// mask bit is not set).
13798///
13799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
13800#[inline]
13801#[target_feature(enable = "avx512fp16,avx512vl")]
13802#[cfg_attr(test, assert_instr(vcvttph2w))]
13803#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13804pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13805 _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
13806}
13807
13808/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13809/// truncation, and store the results in dst.
13810///
13811/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
13812#[inline]
13813#[target_feature(enable = "avx512fp16,avx512vl")]
13814#[cfg_attr(test, assert_instr(vcvttph2w))]
13815#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13816pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
13817 _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13818}
13819
13820/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13821/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13822/// mask bit is not set).
13823///
13824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
13825#[inline]
13826#[target_feature(enable = "avx512fp16,avx512vl")]
13827#[cfg_attr(test, assert_instr(vcvttph2w))]
13828#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13829pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13830 unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
13831}
13832
13833/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13834/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13835/// mask bit is not set).
13836///
13837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
13838#[inline]
13839#[target_feature(enable = "avx512fp16,avx512vl")]
13840#[cfg_attr(test, assert_instr(vcvttph2w))]
13841#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13842pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13843 _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
13844}
13845
13846/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13847/// truncation, and store the results in dst.
13848///
13849/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
13850#[inline]
13851#[target_feature(enable = "avx512fp16")]
13852#[cfg_attr(test, assert_instr(vcvttph2w))]
13853#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13854pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
13855 _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13856}
13857
13858/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13859/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13860/// mask bit is not set).
13861///
13862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
13863#[inline]
13864#[target_feature(enable = "avx512fp16")]
13865#[cfg_attr(test, assert_instr(vcvttph2w))]
13866#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13867pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13868 unsafe {
13869 transmute(src:vcvttph2w_512(
13870 a,
13871 src.as_i16x32(),
13872 k,
13873 _MM_FROUND_CUR_DIRECTION,
13874 ))
13875 }
13876}
13877
13878/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13879/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13880/// mask bit is not set).
13881///
13882/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
13883#[inline]
13884#[target_feature(enable = "avx512fp16")]
13885#[cfg_attr(test, assert_instr(vcvttph2w))]
13886#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13887pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13888 _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
13889}
13890
13891/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13892/// truncation, and store the results in dst.
13893///
13894/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13895///
13896/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
13897#[inline]
13898#[target_feature(enable = "avx512fp16")]
13899#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13900#[rustc_legacy_const_generics(1)]
13901#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13902pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
13903 static_assert_sae!(SAE);
13904 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13905}
13906
13907/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13908/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13909/// mask bit is not set).
13910///
13911/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13912///
13913/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
13914#[inline]
13915#[target_feature(enable = "avx512fp16")]
13916#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13917#[rustc_legacy_const_generics(3)]
13918#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13919pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
13920 src: __m512i,
13921 k: __mmask32,
13922 a: __m512h,
13923) -> __m512i {
13924 unsafe {
13925 static_assert_sae!(SAE);
13926 transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
13927 }
13928}
13929
13930/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13931/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13932/// mask bit is not set).
13933///
13934/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13935///
13936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
13937#[inline]
13938#[target_feature(enable = "avx512fp16")]
13939#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13940#[rustc_legacy_const_generics(2)]
13941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13942pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13943 static_assert_sae!(SAE);
13944 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
13945}
13946
13947/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13948/// truncation, and store the results in dst.
13949///
13950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
13951#[inline]
13952#[target_feature(enable = "avx512fp16,avx512vl")]
13953#[cfg_attr(test, assert_instr(vcvttph2uw))]
13954#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13955pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
13956 _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13957}
13958
13959/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13960/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13961/// mask bit is not set).
13962///
13963/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
13964#[inline]
13965#[target_feature(enable = "avx512fp16,avx512vl")]
13966#[cfg_attr(test, assert_instr(vcvttph2uw))]
13967#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13968pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13969 unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
13970}
13971
13972/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13973/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13974/// mask bit is not set).
13975///
13976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
13977#[inline]
13978#[target_feature(enable = "avx512fp16,avx512vl")]
13979#[cfg_attr(test, assert_instr(vcvttph2uw))]
13980#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13981pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13982 _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
13983}
13984
13985/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13986/// truncation, and store the results in dst.
13987///
13988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
13989#[inline]
13990#[target_feature(enable = "avx512fp16,avx512vl")]
13991#[cfg_attr(test, assert_instr(vcvttph2uw))]
13992#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13993pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
13994 _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13995}
13996
13997/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13998/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13999/// mask bit is not set).
14000///
14001/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
14002#[inline]
14003#[target_feature(enable = "avx512fp16,avx512vl")]
14004#[cfg_attr(test, assert_instr(vcvttph2uw))]
14005#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14006pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14007 unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14008}
14009
14010/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14011/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14012/// mask bit is not set).
14013///
14014/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14015#[inline]
14016#[target_feature(enable = "avx512fp16,avx512vl")]
14017#[cfg_attr(test, assert_instr(vcvttph2uw))]
14018#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14019pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14020 _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14021}
14022
14023/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14024/// truncation, and store the results in dst.
14025///
14026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14027#[inline]
14028#[target_feature(enable = "avx512fp16")]
14029#[cfg_attr(test, assert_instr(vcvttph2uw))]
14030#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14031pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14032 _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14033}
14034
14035/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14036/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14037/// mask bit is not set).
14038///
14039/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14040#[inline]
14041#[target_feature(enable = "avx512fp16")]
14042#[cfg_attr(test, assert_instr(vcvttph2uw))]
14043#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14044pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14045 unsafe {
14046 transmute(src:vcvttph2uw_512(
14047 a,
14048 src.as_u16x32(),
14049 k,
14050 _MM_FROUND_CUR_DIRECTION,
14051 ))
14052 }
14053}
14054
14055/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14056/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14057/// mask bit is not set).
14058///
14059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14060#[inline]
14061#[target_feature(enable = "avx512fp16")]
14062#[cfg_attr(test, assert_instr(vcvttph2uw))]
14063#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14064pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14065 _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14066}
14067
14068/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14069/// truncation, and store the results in dst.
14070///
14071/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14072///
14073/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14074#[inline]
14075#[target_feature(enable = "avx512fp16")]
14076#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14077#[rustc_legacy_const_generics(1)]
14078#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14079pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14080 static_assert_sae!(SAE);
14081 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14082}
14083
14084/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14085/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14086/// mask bit is not set).
14087///
14088/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14089///
14090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14091#[inline]
14092#[target_feature(enable = "avx512fp16")]
14093#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14094#[rustc_legacy_const_generics(3)]
14095#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14096pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14097 src: __m512i,
14098 k: __mmask32,
14099 a: __m512h,
14100) -> __m512i {
14101 unsafe {
14102 static_assert_sae!(SAE);
14103 transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14104 }
14105}
14106
14107/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14108/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14109/// mask bit is not set).
14110///
14111/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14112///
14113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14114#[inline]
14115#[target_feature(enable = "avx512fp16")]
14116#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14117#[rustc_legacy_const_generics(2)]
14118#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14119pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14120 static_assert_sae!(SAE);
14121 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14122}
14123
14124/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14125/// results in dst.
14126///
14127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14128#[inline]
14129#[target_feature(enable = "avx512fp16,avx512vl")]
14130#[cfg_attr(test, assert_instr(vcvtph2dq))]
14131#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14132pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14133 _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14134}
14135
14136/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14137/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14140#[inline]
14141#[target_feature(enable = "avx512fp16,avx512vl")]
14142#[cfg_attr(test, assert_instr(vcvtph2dq))]
14143#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14144pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14145 unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14146}
14147
14148/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14149/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14150///
14151/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14152#[inline]
14153#[target_feature(enable = "avx512fp16,avx512vl")]
14154#[cfg_attr(test, assert_instr(vcvtph2dq))]
14155#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14156pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14157 _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14158}
14159
14160/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14161/// results in dst.
14162///
14163/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14164#[inline]
14165#[target_feature(enable = "avx512fp16,avx512vl")]
14166#[cfg_attr(test, assert_instr(vcvtph2dq))]
14167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14168pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14169 _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14170}
14171
14172/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14173/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14174///
14175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14176#[inline]
14177#[target_feature(enable = "avx512fp16,avx512vl")]
14178#[cfg_attr(test, assert_instr(vcvtph2dq))]
14179#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14180pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14181 unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14182}
14183
14184/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14185/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14186///
14187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14188#[inline]
14189#[target_feature(enable = "avx512fp16,avx512vl")]
14190#[cfg_attr(test, assert_instr(vcvtph2dq))]
14191#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14192pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14193 _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14194}
14195
14196/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14197/// results in dst.
14198///
14199/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14200#[inline]
14201#[target_feature(enable = "avx512fp16")]
14202#[cfg_attr(test, assert_instr(vcvtph2dq))]
14203#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14204pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14205 _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14206}
14207
14208/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14209/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14210///
14211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14212#[inline]
14213#[target_feature(enable = "avx512fp16")]
14214#[cfg_attr(test, assert_instr(vcvtph2dq))]
14215#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14216pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14217 unsafe {
14218 transmute(src:vcvtph2dq_512(
14219 a,
14220 src.as_i32x16(),
14221 k,
14222 _MM_FROUND_CUR_DIRECTION,
14223 ))
14224 }
14225}
14226
14227/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14228/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14229///
14230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14231#[inline]
14232#[target_feature(enable = "avx512fp16")]
14233#[cfg_attr(test, assert_instr(vcvtph2dq))]
14234#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14235pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14236 _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14237}
14238
14239/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14240/// results in dst.
14241///
14242/// Rounding is done according to the rounding parameter, which can be one of:
14243///
14244/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14245/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14246/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14247/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14248/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14249///
14250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14251#[inline]
14252#[target_feature(enable = "avx512fp16")]
14253#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14254#[rustc_legacy_const_generics(1)]
14255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14256pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14257 static_assert_rounding!(ROUNDING);
14258 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14259}
14260
14261/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14262/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14263///
14264/// Rounding is done according to the rounding parameter, which can be one of:
14265///
14266/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14267/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14268/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14269/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14270/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14271///
14272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14273#[inline]
14274#[target_feature(enable = "avx512fp16")]
14275#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14276#[rustc_legacy_const_generics(3)]
14277#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14278pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14279 src: __m512i,
14280 k: __mmask16,
14281 a: __m256h,
14282) -> __m512i {
14283 unsafe {
14284 static_assert_rounding!(ROUNDING);
14285 transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14286 }
14287}
14288
14289/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14290/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14291///
14292/// Rounding is done according to the rounding parameter, which can be one of:
14293///
14294/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14295/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14296/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14297/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14299///
14300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14301#[inline]
14302#[target_feature(enable = "avx512fp16")]
14303#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14304#[rustc_legacy_const_generics(2)]
14305#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14306pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14307 static_assert_rounding!(ROUNDING);
14308 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14309}
14310
14311/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14312/// the result in dst.
14313///
14314/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14315#[inline]
14316#[target_feature(enable = "avx512fp16")]
14317#[cfg_attr(test, assert_instr(vcvtsh2si))]
14318#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14319pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14320 unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14321}
14322
14323/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14324/// the result in dst.
14325///
14326/// Rounding is done according to the rounding parameter, which can be one of:
14327///
14328/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14329/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14330/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14331/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14332/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14333///
14334/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14335#[inline]
14336#[target_feature(enable = "avx512fp16")]
14337#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
14338#[rustc_legacy_const_generics(1)]
14339#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14340pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14341 unsafe {
14342 static_assert_rounding!(ROUNDING);
14343 vcvtsh2si32(a, ROUNDING)
14344 }
14345}
14346
14347/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14348/// results in dst.
14349///
14350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14351#[inline]
14352#[target_feature(enable = "avx512fp16,avx512vl")]
14353#[cfg_attr(test, assert_instr(vcvtph2udq))]
14354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14355pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14356 _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14357}
14358
14359/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14360/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14361///
14362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14363#[inline]
14364#[target_feature(enable = "avx512fp16,avx512vl")]
14365#[cfg_attr(test, assert_instr(vcvtph2udq))]
14366#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14367pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14368 unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14369}
14370
14371/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14372/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14373///
14374/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14375#[inline]
14376#[target_feature(enable = "avx512fp16,avx512vl")]
14377#[cfg_attr(test, assert_instr(vcvtph2udq))]
14378#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14379pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14380 _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14381}
14382
14383/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14384/// the results in dst.
14385///
14386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14387#[inline]
14388#[target_feature(enable = "avx512fp16,avx512vl")]
14389#[cfg_attr(test, assert_instr(vcvtph2udq))]
14390#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14391pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14392 _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14393}
14394
14395/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14396/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14397///
14398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14399#[inline]
14400#[target_feature(enable = "avx512fp16,avx512vl")]
14401#[cfg_attr(test, assert_instr(vcvtph2udq))]
14402#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14403pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14404 unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14405}
14406
14407/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14408/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14409///
14410/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14411#[inline]
14412#[target_feature(enable = "avx512fp16,avx512vl")]
14413#[cfg_attr(test, assert_instr(vcvtph2udq))]
14414#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14415pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14416 _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14417}
14418
14419/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14420/// the results in dst.
14421///
14422/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14423#[inline]
14424#[target_feature(enable = "avx512fp16")]
14425#[cfg_attr(test, assert_instr(vcvtph2udq))]
14426#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14427pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14428 _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14429}
14430
14431/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14432/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14433///
14434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14435#[inline]
14436#[target_feature(enable = "avx512fp16")]
14437#[cfg_attr(test, assert_instr(vcvtph2udq))]
14438#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14439pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14440 unsafe {
14441 transmute(src:vcvtph2udq_512(
14442 a,
14443 src.as_u32x16(),
14444 k,
14445 _MM_FROUND_CUR_DIRECTION,
14446 ))
14447 }
14448}
14449
14450/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14451/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14452///
14453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14454#[inline]
14455#[target_feature(enable = "avx512fp16")]
14456#[cfg_attr(test, assert_instr(vcvtph2udq))]
14457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14458pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14459 _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14460}
14461
14462/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14463/// the results in dst.
14464///
14465/// Rounding is done according to the rounding parameter, which can be one of:
14466///
14467/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14468/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14469/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14470/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14471/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14472///
14473/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14474#[inline]
14475#[target_feature(enable = "avx512fp16")]
14476#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14477#[rustc_legacy_const_generics(1)]
14478#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14479pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14480 static_assert_rounding!(ROUNDING);
14481 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14482}
14483
14484/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14485/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14486///
14487/// Rounding is done according to the rounding parameter, which can be one of:
14488///
14489/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14490/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14491/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14492/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14493/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14494///
14495/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14496#[inline]
14497#[target_feature(enable = "avx512fp16")]
14498#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14499#[rustc_legacy_const_generics(3)]
14500#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14501pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14502 src: __m512i,
14503 k: __mmask16,
14504 a: __m256h,
14505) -> __m512i {
14506 unsafe {
14507 static_assert_rounding!(ROUNDING);
14508 transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14509 }
14510}
14511
14512/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14513/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14514///
14515/// Rounding is done according to the rounding parameter, which can be one of:
14516///
14517/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14518/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14519/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14520/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14521/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14522///
14523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14524#[inline]
14525#[target_feature(enable = "avx512fp16")]
14526#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14527#[rustc_legacy_const_generics(2)]
14528#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14529pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14530 static_assert_rounding!(ROUNDING);
14531 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14532}
14533
14534/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14535/// the result in dst.
14536///
14537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14538#[inline]
14539#[target_feature(enable = "avx512fp16")]
14540#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14541#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14542pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14543 unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14544}
14545
14546/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14547/// the result in dst.
14548///
14549/// Rounding is done according to the rounding parameter, which can be one of:
14550///
14551/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14552/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14553/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14554/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14555/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14556///
14557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14558#[inline]
14559#[target_feature(enable = "avx512fp16")]
14560#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
14561#[rustc_legacy_const_generics(1)]
14562#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14563pub fn _mm_cvt_roundsh_u32<const ROUNDING: i32>(a: __m128h) -> u32 {
14564 unsafe {
14565 static_assert_rounding!(ROUNDING);
14566 vcvtsh2usi32(a, ROUNDING)
14567 }
14568}
14569
14570/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14571/// store the results in dst.
14572///
14573/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14574#[inline]
14575#[target_feature(enable = "avx512fp16,avx512vl")]
14576#[cfg_attr(test, assert_instr(vcvttph2dq))]
14577#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14578pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14579 _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14580}
14581
14582/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14583/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14584///
14585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14586#[inline]
14587#[target_feature(enable = "avx512fp16,avx512vl")]
14588#[cfg_attr(test, assert_instr(vcvttph2dq))]
14589#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14590pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14591 unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14592}
14593
14594/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14595/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14596///
14597/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14598#[inline]
14599#[target_feature(enable = "avx512fp16,avx512vl")]
14600#[cfg_attr(test, assert_instr(vcvttph2dq))]
14601#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14602pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14603 _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14604}
14605
14606/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14607/// store the results in dst.
14608///
14609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14610#[inline]
14611#[target_feature(enable = "avx512fp16,avx512vl")]
14612#[cfg_attr(test, assert_instr(vcvttph2dq))]
14613#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14614pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14615 _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14616}
14617
14618/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14619/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14620///
14621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14622#[inline]
14623#[target_feature(enable = "avx512fp16,avx512vl")]
14624#[cfg_attr(test, assert_instr(vcvttph2dq))]
14625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14626pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14627 unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14628}
14629
14630/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14631/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14632///
14633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14634#[inline]
14635#[target_feature(enable = "avx512fp16,avx512vl")]
14636#[cfg_attr(test, assert_instr(vcvttph2dq))]
14637#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14638pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14639 _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14640}
14641
14642/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14643/// store the results in dst.
14644///
14645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14646#[inline]
14647#[target_feature(enable = "avx512fp16")]
14648#[cfg_attr(test, assert_instr(vcvttph2dq))]
14649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14650pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14651 _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14652}
14653
14654/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14655/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14656///
14657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14658#[inline]
14659#[target_feature(enable = "avx512fp16")]
14660#[cfg_attr(test, assert_instr(vcvttph2dq))]
14661#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14662pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14663 unsafe {
14664 transmute(src:vcvttph2dq_512(
14665 a,
14666 src.as_i32x16(),
14667 k,
14668 _MM_FROUND_CUR_DIRECTION,
14669 ))
14670 }
14671}
14672
14673/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14674/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14675///
14676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14677#[inline]
14678#[target_feature(enable = "avx512fp16")]
14679#[cfg_attr(test, assert_instr(vcvttph2dq))]
14680#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14681pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14682 _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14683}
14684
14685/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14686/// store the results in dst.
14687///
14688/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14689///
14690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14691#[inline]
14692#[target_feature(enable = "avx512fp16")]
14693#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14694#[rustc_legacy_const_generics(1)]
14695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14696pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14697 static_assert_sae!(SAE);
14698 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14699}
14700
14701/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14702/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14703///
14704/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14705///
14706/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
14707#[inline]
14708#[target_feature(enable = "avx512fp16")]
14709#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14710#[rustc_legacy_const_generics(3)]
14711#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14712pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
14713 src: __m512i,
14714 k: __mmask16,
14715 a: __m256h,
14716) -> __m512i {
14717 unsafe {
14718 static_assert_sae!(SAE);
14719 transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
14720 }
14721}
14722
14723/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14724/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14725///
14726/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14727///
14728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
14729#[inline]
14730#[target_feature(enable = "avx512fp16")]
14731#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14732#[rustc_legacy_const_generics(2)]
14733#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14734pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14735 static_assert_sae!(SAE);
14736 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
14737}
14738
14739/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14740/// the result in dst.
14741///
14742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
14743#[inline]
14744#[target_feature(enable = "avx512fp16")]
14745#[cfg_attr(test, assert_instr(vcvttsh2si))]
14746#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14747pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
14748 unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14749}
14750
14751/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14752/// the result in dst.
14753///
14754/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14755///
14756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
14757#[inline]
14758#[target_feature(enable = "avx512fp16")]
14759#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
14760#[rustc_legacy_const_generics(1)]
14761#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14762pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
14763 unsafe {
14764 static_assert_sae!(SAE);
14765 vcvttsh2si32(a, SAE)
14766 }
14767}
14768
14769/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14770/// store the results in dst.
14771///
14772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
14773#[inline]
14774#[target_feature(enable = "avx512fp16,avx512vl")]
14775#[cfg_attr(test, assert_instr(vcvttph2udq))]
14776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14777pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
14778 _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14779}
14780
14781/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14782/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14783///
14784/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
14785#[inline]
14786#[target_feature(enable = "avx512fp16,avx512vl")]
14787#[cfg_attr(test, assert_instr(vcvttph2udq))]
14788#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14789pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14790 unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
14791}
14792
14793/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14794/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14795///
14796/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
14797#[inline]
14798#[target_feature(enable = "avx512fp16,avx512vl")]
14799#[cfg_attr(test, assert_instr(vcvttph2udq))]
14800#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14801pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14802 _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
14803}
14804
14805/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14806/// store the results in dst.
14807///
14808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
14809#[inline]
14810#[target_feature(enable = "avx512fp16,avx512vl")]
14811#[cfg_attr(test, assert_instr(vcvttph2udq))]
14812#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14813pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
14814 _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14815}
14816
14817/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14818/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14819///
14820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
14821#[inline]
14822#[target_feature(enable = "avx512fp16,avx512vl")]
14823#[cfg_attr(test, assert_instr(vcvttph2udq))]
14824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14825pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14826 unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
14827}
14828
14829/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14830/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14831///
14832/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
14833#[inline]
14834#[target_feature(enable = "avx512fp16,avx512vl")]
14835#[cfg_attr(test, assert_instr(vcvttph2udq))]
14836#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14837pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14838 _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
14839}
14840
14841/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14842/// store the results in dst.
14843///
14844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
14845#[inline]
14846#[target_feature(enable = "avx512fp16")]
14847#[cfg_attr(test, assert_instr(vcvttph2udq))]
14848#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14849pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
14850 _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14851}
14852
14853/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14854/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14855///
14856/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
14857#[inline]
14858#[target_feature(enable = "avx512fp16")]
14859#[cfg_attr(test, assert_instr(vcvttph2udq))]
14860#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14861pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14862 unsafe {
14863 transmute(src:vcvttph2udq_512(
14864 a,
14865 src.as_u32x16(),
14866 k,
14867 _MM_FROUND_CUR_DIRECTION,
14868 ))
14869 }
14870}
14871
14872/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14873/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14874///
14875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
14876#[inline]
14877#[target_feature(enable = "avx512fp16")]
14878#[cfg_attr(test, assert_instr(vcvttph2udq))]
14879#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14880pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14881 _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
14882}
14883
14884/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14885/// store the results in dst.
14886///
14887/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14888///
14889/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
14890#[inline]
14891#[target_feature(enable = "avx512fp16")]
14892#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14893#[rustc_legacy_const_generics(1)]
14894#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14895pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
14896 static_assert_sae!(SAE);
14897 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14898}
14899
14900/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14901/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14902///
14903/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14904///
14905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
14906#[inline]
14907#[target_feature(enable = "avx512fp16")]
14908#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14909#[rustc_legacy_const_generics(3)]
14910#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14911pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
14912 src: __m512i,
14913 k: __mmask16,
14914 a: __m256h,
14915) -> __m512i {
14916 unsafe {
14917 static_assert_sae!(SAE);
14918 transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
14919 }
14920}
14921
14922/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14923/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14924///
14925/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14926///
14927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
14928#[inline]
14929#[target_feature(enable = "avx512fp16")]
14930#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14931#[rustc_legacy_const_generics(2)]
14932#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14933pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14934 static_assert_sae!(SAE);
14935 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
14936}
14937
14938/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14939/// the result in dst.
14940///
14941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
14942#[inline]
14943#[target_feature(enable = "avx512fp16")]
14944#[cfg_attr(test, assert_instr(vcvttsh2usi))]
14945#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14946pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
14947 unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14948}
14949
14950/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14951/// the result in dst.
14952///
14953/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14954///
14955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
14956#[inline]
14957#[target_feature(enable = "avx512fp16")]
14958#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
14959#[rustc_legacy_const_generics(1)]
14960#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14961pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14962 unsafe {
14963 static_assert_sae!(SAE);
14964 vcvttsh2usi32(a, SAE)
14965 }
14966}
14967
14968/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14969/// store the results in dst.
14970///
14971/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
14972#[inline]
14973#[target_feature(enable = "avx512fp16,avx512vl")]
14974#[cfg_attr(test, assert_instr(vcvtph2qq))]
14975#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14976pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
14977 _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a)
14978}
14979
14980/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14981/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14982///
14983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
14984#[inline]
14985#[target_feature(enable = "avx512fp16,avx512vl")]
14986#[cfg_attr(test, assert_instr(vcvtph2qq))]
14987#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14988pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14989 unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
14990}
14991
14992/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14993/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14994///
14995/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
14996#[inline]
14997#[target_feature(enable = "avx512fp16,avx512vl")]
14998#[cfg_attr(test, assert_instr(vcvtph2qq))]
14999#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15000pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15001 _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
15002}
15003
15004/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15005/// store the results in dst.
15006///
15007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
15008#[inline]
15009#[target_feature(enable = "avx512fp16,avx512vl")]
15010#[cfg_attr(test, assert_instr(vcvtph2qq))]
15011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15012pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15013 _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15014}
15015
15016/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15017/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15018///
15019/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15020#[inline]
15021#[target_feature(enable = "avx512fp16,avx512vl")]
15022#[cfg_attr(test, assert_instr(vcvtph2qq))]
15023#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15024pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15025 unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15026}
15027
15028/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15029/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15030///
15031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15032#[inline]
15033#[target_feature(enable = "avx512fp16,avx512vl")]
15034#[cfg_attr(test, assert_instr(vcvtph2qq))]
15035#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15036pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15037 _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15038}
15039
15040/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15041/// store the results in dst.
15042///
15043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15044#[inline]
15045#[target_feature(enable = "avx512fp16")]
15046#[cfg_attr(test, assert_instr(vcvtph2qq))]
15047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15048pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15049 _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15050}
15051
15052/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15053/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15054///
15055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15056#[inline]
15057#[target_feature(enable = "avx512fp16")]
15058#[cfg_attr(test, assert_instr(vcvtph2qq))]
15059#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15060pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15061 unsafe {
15062 transmute(src:vcvtph2qq_512(
15063 a,
15064 src.as_i64x8(),
15065 k,
15066 _MM_FROUND_CUR_DIRECTION,
15067 ))
15068 }
15069}
15070
15071/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15072/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15073///
15074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15075#[inline]
15076#[target_feature(enable = "avx512fp16")]
15077#[cfg_attr(test, assert_instr(vcvtph2qq))]
15078#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15079pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15080 _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15081}
15082
15083/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15084/// store the results in dst.
15085///
15086/// Rounding is done according to the rounding parameter, which can be one of:
15087///
15088/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15089/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15090/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15091/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15092/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15093///
15094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15095#[inline]
15096#[target_feature(enable = "avx512fp16")]
15097#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15098#[rustc_legacy_const_generics(1)]
15099#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15100pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15101 static_assert_rounding!(ROUNDING);
15102 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15103}
15104
15105/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15106/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15107///
15108/// Rounding is done according to the rounding parameter, which can be one of:
15109///
15110/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15111/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15112/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15113/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15114/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15115///
15116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15117#[inline]
15118#[target_feature(enable = "avx512fp16")]
15119#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15120#[rustc_legacy_const_generics(3)]
15121#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15122pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15123 src: __m512i,
15124 k: __mmask8,
15125 a: __m128h,
15126) -> __m512i {
15127 unsafe {
15128 static_assert_rounding!(ROUNDING);
15129 transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15130 }
15131}
15132
15133/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15134/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15135///
15136/// Rounding is done according to the rounding parameter, which can be one of:
15137///
15138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143///
15144/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15145#[inline]
15146#[target_feature(enable = "avx512fp16")]
15147#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15148#[rustc_legacy_const_generics(2)]
15149#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15150pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15151 static_assert_rounding!(ROUNDING);
15152 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15153}
15154
15155/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15156/// store the results in dst.
15157///
15158/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15159#[inline]
15160#[target_feature(enable = "avx512fp16,avx512vl")]
15161#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15163pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15164 _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15165}
15166
15167/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15168/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15169///
15170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15171#[inline]
15172#[target_feature(enable = "avx512fp16,avx512vl")]
15173#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15174#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15175pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15176 unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15177}
15178
15179/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15180/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15181///
15182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15183#[inline]
15184#[target_feature(enable = "avx512fp16,avx512vl")]
15185#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15186#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15187pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15188 _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15189}
15190
15191/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15192/// store the results in dst.
15193///
15194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15195#[inline]
15196#[target_feature(enable = "avx512fp16,avx512vl")]
15197#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15199pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15200 _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15201}
15202
15203/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15204/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15205///
15206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15207#[inline]
15208#[target_feature(enable = "avx512fp16,avx512vl")]
15209#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15210#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15211pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15212 unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15213}
15214
15215/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15216/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15217///
15218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15219#[inline]
15220#[target_feature(enable = "avx512fp16,avx512vl")]
15221#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15222#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15223pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15224 _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15225}
15226
15227/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15228/// store the results in dst.
15229///
15230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15231#[inline]
15232#[target_feature(enable = "avx512fp16")]
15233#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15234#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15235pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15236 _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15237}
15238
15239/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15240/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15241///
15242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15243#[inline]
15244#[target_feature(enable = "avx512fp16")]
15245#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15247pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15248 unsafe {
15249 transmute(src:vcvtph2uqq_512(
15250 a,
15251 src.as_u64x8(),
15252 k,
15253 _MM_FROUND_CUR_DIRECTION,
15254 ))
15255 }
15256}
15257
15258/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15259/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15260///
15261/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15262#[inline]
15263#[target_feature(enable = "avx512fp16")]
15264#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15265#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15266pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15267 _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15268}
15269
15270/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15271/// store the results in dst.
15272///
15273/// Rounding is done according to the rounding parameter, which can be one of:
15274///
15275/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15276/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15277/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15278/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15279/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15280///
15281/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15282#[inline]
15283#[target_feature(enable = "avx512fp16")]
15284#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15285#[rustc_legacy_const_generics(1)]
15286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15287pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15288 static_assert_rounding!(ROUNDING);
15289 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15290}
15291
15292/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15293/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15294///
15295/// Rounding is done according to the rounding parameter, which can be one of:
15296///
15297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15302///
15303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15304#[inline]
15305#[target_feature(enable = "avx512fp16")]
15306#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15307#[rustc_legacy_const_generics(3)]
15308#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15309pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15310 src: __m512i,
15311 k: __mmask8,
15312 a: __m128h,
15313) -> __m512i {
15314 unsafe {
15315 static_assert_rounding!(ROUNDING);
15316 transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15317 }
15318}
15319
15320/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15321/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15322///
15323/// Rounding is done according to the rounding parameter, which can be one of:
15324///
15325/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15326/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15327/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15328/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15329/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15330///
15331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15332#[inline]
15333#[target_feature(enable = "avx512fp16")]
15334#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15335#[rustc_legacy_const_generics(2)]
15336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15337pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15338 static_assert_rounding!(ROUNDING);
15339 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15340}
15341
15342/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15343/// store the results in dst.
15344///
15345/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15346#[inline]
15347#[target_feature(enable = "avx512fp16,avx512vl")]
15348#[cfg_attr(test, assert_instr(vcvttph2qq))]
15349#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15350pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15351 _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15352}
15353
15354/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15355/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15356///
15357/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15358#[inline]
15359#[target_feature(enable = "avx512fp16,avx512vl")]
15360#[cfg_attr(test, assert_instr(vcvttph2qq))]
15361#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15362pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15363 unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15364}
15365
15366/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15367/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15368///
15369/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15370#[inline]
15371#[target_feature(enable = "avx512fp16,avx512vl")]
15372#[cfg_attr(test, assert_instr(vcvttph2qq))]
15373#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15374pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15375 _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15376}
15377
15378/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15379/// store the results in dst.
15380///
15381/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15382#[inline]
15383#[target_feature(enable = "avx512fp16,avx512vl")]
15384#[cfg_attr(test, assert_instr(vcvttph2qq))]
15385#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15386pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15387 _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15388}
15389
15390/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15391/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15392///
15393/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15394#[inline]
15395#[target_feature(enable = "avx512fp16,avx512vl")]
15396#[cfg_attr(test, assert_instr(vcvttph2qq))]
15397#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15398pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15399 unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15400}
15401
15402/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15403/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15404///
15405/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15406#[inline]
15407#[target_feature(enable = "avx512fp16,avx512vl")]
15408#[cfg_attr(test, assert_instr(vcvttph2qq))]
15409#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15410pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15411 _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15412}
15413
15414/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15415/// store the results in dst.
15416///
15417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15418#[inline]
15419#[target_feature(enable = "avx512fp16")]
15420#[cfg_attr(test, assert_instr(vcvttph2qq))]
15421#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15422pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15423 _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15424}
15425
15426/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15427/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428///
15429/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15430#[inline]
15431#[target_feature(enable = "avx512fp16")]
15432#[cfg_attr(test, assert_instr(vcvttph2qq))]
15433#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15434pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15435 unsafe {
15436 transmute(src:vcvttph2qq_512(
15437 a,
15438 src.as_i64x8(),
15439 k,
15440 _MM_FROUND_CUR_DIRECTION,
15441 ))
15442 }
15443}
15444
15445/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15446/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15447///
15448/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15449#[inline]
15450#[target_feature(enable = "avx512fp16")]
15451#[cfg_attr(test, assert_instr(vcvttph2qq))]
15452#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15453pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15454 _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15455}
15456
15457/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15458/// store the results in dst.
15459///
15460/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15461///
15462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15463#[inline]
15464#[target_feature(enable = "avx512fp16")]
15465#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15466#[rustc_legacy_const_generics(1)]
15467#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15468pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15469 static_assert_sae!(SAE);
15470 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15471}
15472
15473/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15474/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15475///
15476/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15477///
15478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15479#[inline]
15480#[target_feature(enable = "avx512fp16")]
15481#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15482#[rustc_legacy_const_generics(3)]
15483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15484pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15485 src: __m512i,
15486 k: __mmask8,
15487 a: __m128h,
15488) -> __m512i {
15489 unsafe {
15490 static_assert_sae!(SAE);
15491 transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15492 }
15493}
15494
15495/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15496/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15497///
15498/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15499///
15500/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15501#[inline]
15502#[target_feature(enable = "avx512fp16")]
15503#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15504#[rustc_legacy_const_generics(2)]
15505#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15506pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15507 static_assert_sae!(SAE);
15508 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15509}
15510
15511/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15512/// store the results in dst.
15513///
15514/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15515#[inline]
15516#[target_feature(enable = "avx512fp16,avx512vl")]
15517#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15518#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15519pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15520 _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15521}
15522
15523/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15524/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15525///
15526/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15527#[inline]
15528#[target_feature(enable = "avx512fp16,avx512vl")]
15529#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15530#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15531pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15532 unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15533}
15534
15535/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15536/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15537///
15538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15539#[inline]
15540#[target_feature(enable = "avx512fp16,avx512vl")]
15541#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15542#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15543pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15544 _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15545}
15546
15547/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15548/// store the results in dst.
15549///
15550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15551#[inline]
15552#[target_feature(enable = "avx512fp16,avx512vl")]
15553#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15554#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15555pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15556 _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15557}
15558
15559/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15560/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15561///
15562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15563#[inline]
15564#[target_feature(enable = "avx512fp16,avx512vl")]
15565#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15566#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15567pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15568 unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15569}
15570
15571/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15572/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15573///
15574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15575#[inline]
15576#[target_feature(enable = "avx512fp16,avx512vl")]
15577#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15578#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15579pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15580 _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15581}
15582
15583/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15584/// store the results in dst.
15585///
15586/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15587#[inline]
15588#[target_feature(enable = "avx512fp16")]
15589#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15591pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15592 _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15593}
15594
15595/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15596/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15597///
15598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15599#[inline]
15600#[target_feature(enable = "avx512fp16")]
15601#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15602#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15603pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15604 unsafe {
15605 transmute(src:vcvttph2uqq_512(
15606 a,
15607 src.as_u64x8(),
15608 k,
15609 _MM_FROUND_CUR_DIRECTION,
15610 ))
15611 }
15612}
15613
15614/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15615/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15616///
15617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15618#[inline]
15619#[target_feature(enable = "avx512fp16")]
15620#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15621#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15622pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15623 _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15624}
15625
15626/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15627/// store the results in dst.
15628///
15629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15630///
15631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15632#[inline]
15633#[target_feature(enable = "avx512fp16")]
15634#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15635#[rustc_legacy_const_generics(1)]
15636#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15637pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15638 static_assert_sae!(SAE);
15639 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15640}
15641
15642/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15643/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15644///
15645/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15646///
15647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15648#[inline]
15649#[target_feature(enable = "avx512fp16")]
15650#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15651#[rustc_legacy_const_generics(3)]
15652#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15653pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15654 src: __m512i,
15655 k: __mmask8,
15656 a: __m128h,
15657) -> __m512i {
15658 unsafe {
15659 static_assert_sae!(SAE);
15660 transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15661 }
15662}
15663
15664/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15665/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15666///
15667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15668///
15669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15670#[inline]
15671#[target_feature(enable = "avx512fp16")]
15672#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15673#[rustc_legacy_const_generics(2)]
15674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15675pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15676 static_assert_sae!(SAE);
15677 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15678}
15679
15680/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15681/// floating-point elements, and store the results in dst.
15682///
15683/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15684#[inline]
15685#[target_feature(enable = "avx512fp16,avx512vl")]
15686#[cfg_attr(test, assert_instr(vcvtph2psx))]
15687#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15688pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15689 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a)
15690}
15691
15692/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15693/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15694/// dst when the corresponding mask bit is not set).
15695///
15696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15697#[inline]
15698#[target_feature(enable = "avx512fp16,avx512vl")]
15699#[cfg_attr(test, assert_instr(vcvtph2psx))]
15700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15701pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
15702 unsafe { vcvtph2psx_128(a, src, k) }
15703}
15704
15705/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15706/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15707/// corresponding mask bit is not set).
15708///
15709/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
15710#[inline]
15711#[target_feature(enable = "avx512fp16,avx512vl")]
15712#[cfg_attr(test, assert_instr(vcvtph2psx))]
15713#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15714pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
15715 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
15716}
15717
15718/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15719/// floating-point elements, and store the results in dst.
15720///
15721/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
15722#[inline]
15723#[target_feature(enable = "avx512fp16,avx512vl")]
15724#[cfg_attr(test, assert_instr(vcvtph2psx))]
15725#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15726pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
15727 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a)
15728}
15729
15730/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15731/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15732/// dst when the corresponding mask bit is not set).
15733///
15734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
15735#[inline]
15736#[target_feature(enable = "avx512fp16,avx512vl")]
15737#[cfg_attr(test, assert_instr(vcvtph2psx))]
15738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15739pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
15740 unsafe { vcvtph2psx_256(a, src, k) }
15741}
15742
15743/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15744/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15745/// corresponding mask bit is not set).
15746///
15747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
15748#[inline]
15749#[target_feature(enable = "avx512fp16,avx512vl")]
15750#[cfg_attr(test, assert_instr(vcvtph2psx))]
15751#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15752pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
15753 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
15754}
15755
15756/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15757/// floating-point elements, and store the results in dst.
15758///
15759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
15760#[inline]
15761#[target_feature(enable = "avx512fp16")]
15762#[cfg_attr(test, assert_instr(vcvtph2psx))]
15763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15764pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
15765 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a)
15766}
15767
15768/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15769/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15770/// dst when the corresponding mask bit is not set).
15771///
15772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
15773#[inline]
15774#[target_feature(enable = "avx512fp16")]
15775#[cfg_attr(test, assert_instr(vcvtph2psx))]
15776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15777pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
15778 unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
15779}
15780
15781/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15782/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15783/// corresponding mask bit is not set).
15784///
15785/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
15786#[inline]
15787#[target_feature(enable = "avx512fp16")]
15788#[cfg_attr(test, assert_instr(vcvtph2psx))]
15789#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15790pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
15791 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
15792}
15793
15794/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15795/// floating-point elements, and store the results in dst.
15796///
15797/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15798///
15799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
15800#[inline]
15801#[target_feature(enable = "avx512fp16")]
15802#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15803#[rustc_legacy_const_generics(1)]
15804#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15805pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
15806 static_assert_sae!(SAE);
15807 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a)
15808}
15809
15810/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15811/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15812/// dst when the corresponding mask bit is not set).
15813///
15814/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15815///
15816/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
15817#[inline]
15818#[target_feature(enable = "avx512fp16")]
15819#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15820#[rustc_legacy_const_generics(3)]
15821#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15822pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
15823 src: __m512,
15824 k: __mmask16,
15825 a: __m256h,
15826) -> __m512 {
15827 unsafe {
15828 static_assert_sae!(SAE);
15829 vcvtph2psx_512(a, src, k, SAE)
15830 }
15831}
15832
15833/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15834/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15835/// corresponding mask bit is not set).
15836///
15837/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15838///
15839/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
15840#[inline]
15841#[target_feature(enable = "avx512fp16")]
15842#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15843#[rustc_legacy_const_generics(2)]
15844#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15845pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
15846 static_assert_sae!(SAE);
15847 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
15848}
15849
15850/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15851/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
15852/// elements from a to the upper elements of dst.
15853///
15854/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
15855#[inline]
15856#[target_feature(enable = "avx512fp16")]
15857#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15858#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15859pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
15860 _mm_mask_cvtsh_ss(src:a, k:0xff, a, b)
15861}
15862
15863/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15864/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15865/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15866/// upper elements of dst.
15867///
15868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
15869#[inline]
15870#[target_feature(enable = "avx512fp16")]
15871#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15872#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15873pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15874 unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
15875}
15876
15877/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15878/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15879/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15880/// of dst.
15881///
15882/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
15883#[inline]
15884#[target_feature(enable = "avx512fp16")]
15885#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15886#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15887pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15888 _mm_mask_cvtsh_ss(src:_mm_setzero_ps(), k, a, b)
15889}
15890
15891/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15892/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
15893/// from a to the upper elements of dst.
15894///
15895/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15896///
15897/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
15898#[inline]
15899#[target_feature(enable = "avx512fp16")]
15900#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15901#[rustc_legacy_const_generics(2)]
15902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15903pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
15904 static_assert_sae!(SAE);
15905 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b)
15906}
15907
15908/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15909/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15910/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15911/// upper elements of dst.
15912///
15913/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15914///
15915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
15916#[inline]
15917#[target_feature(enable = "avx512fp16")]
15918#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15919#[rustc_legacy_const_generics(4)]
15920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15921pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
15922 src: __m128,
15923 k: __mmask8,
15924 a: __m128,
15925 b: __m128h,
15926) -> __m128 {
15927 unsafe {
15928 static_assert_sae!(SAE);
15929 vcvtsh2ss(a, b, src, k, SAE)
15930 }
15931}
15932
15933/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15934/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15935/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15936/// of dst.
15937///
15938/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15939///
15940/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
15941#[inline]
15942#[target_feature(enable = "avx512fp16")]
15943#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15944#[rustc_legacy_const_generics(3)]
15945#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15946pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15947 static_assert_sae!(SAE);
15948 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_setzero_ps(), k, a, b)
15949}
15950
15951/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15952/// floating-point elements, and store the results in dst.
15953///
15954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
15955#[inline]
15956#[target_feature(enable = "avx512fp16,avx512vl")]
15957#[cfg_attr(test, assert_instr(vcvtph2pd))]
15958#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15959pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
15960 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a)
15961}
15962
15963/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15964/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15965/// dst when the corresponding mask bit is not set).
15966///
15967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
15968#[inline]
15969#[target_feature(enable = "avx512fp16,avx512vl")]
15970#[cfg_attr(test, assert_instr(vcvtph2pd))]
15971#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15972pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
15973 unsafe { vcvtph2pd_128(a, src, k) }
15974}
15975
15976/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15977/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15978/// corresponding mask bit is not set).
15979///
15980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
15981#[inline]
15982#[target_feature(enable = "avx512fp16,avx512vl")]
15983#[cfg_attr(test, assert_instr(vcvtph2pd))]
15984#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15985pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
15986 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
15987}
15988
15989/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15990/// floating-point elements, and store the results in dst.
15991///
15992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
15993#[inline]
15994#[target_feature(enable = "avx512fp16,avx512vl")]
15995#[cfg_attr(test, assert_instr(vcvtph2pd))]
15996#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15997pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
15998 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a)
15999}
16000
16001/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16002/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16003/// dst when the corresponding mask bit is not set).
16004///
16005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
16006#[inline]
16007#[target_feature(enable = "avx512fp16,avx512vl")]
16008#[cfg_attr(test, assert_instr(vcvtph2pd))]
16009#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16010pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16011 unsafe { vcvtph2pd_256(a, src, k) }
16012}
16013
16014/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16015/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16016/// corresponding mask bit is not set).
16017///
16018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16019#[inline]
16020#[target_feature(enable = "avx512fp16,avx512vl")]
16021#[cfg_attr(test, assert_instr(vcvtph2pd))]
16022#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16023pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16024 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16025}
16026
16027/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16028/// floating-point elements, and store the results in dst.
16029///
16030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16031#[inline]
16032#[target_feature(enable = "avx512fp16")]
16033#[cfg_attr(test, assert_instr(vcvtph2pd))]
16034#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16035pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16036 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a)
16037}
16038
16039/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16040/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16041/// dst when the corresponding mask bit is not set).
16042///
16043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16044#[inline]
16045#[target_feature(enable = "avx512fp16")]
16046#[cfg_attr(test, assert_instr(vcvtph2pd))]
16047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16048pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16049 unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16050}
16051
16052/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16053/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16054/// corresponding mask bit is not set).
16055///
16056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16057#[inline]
16058#[target_feature(enable = "avx512fp16")]
16059#[cfg_attr(test, assert_instr(vcvtph2pd))]
16060#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16061pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16062 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16063}
16064
16065/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16066/// floating-point elements, and store the results in dst.
16067///
16068/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16069///
16070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16071#[inline]
16072#[target_feature(enable = "avx512fp16")]
16073#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16074#[rustc_legacy_const_generics(1)]
16075#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16076pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16077 static_assert_sae!(SAE);
16078 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a)
16079}
16080
16081/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16082/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16083/// dst when the corresponding mask bit is not set).
16084///
16085/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16086///
16087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16088#[inline]
16089#[target_feature(enable = "avx512fp16")]
16090#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16091#[rustc_legacy_const_generics(3)]
16092#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16093pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16094 src: __m512d,
16095 k: __mmask8,
16096 a: __m128h,
16097) -> __m512d {
16098 unsafe {
16099 static_assert_sae!(SAE);
16100 vcvtph2pd_512(a, src, k, SAE)
16101 }
16102}
16103
16104/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16105/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16106/// corresponding mask bit is not set).
16107///
16108/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16109///
16110/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16111#[inline]
16112#[target_feature(enable = "avx512fp16")]
16113#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16114#[rustc_legacy_const_generics(2)]
16115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16116pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16117 static_assert_sae!(SAE);
16118 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16119}
16120
16121/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16122/// floating-point element, store the result in the lower element of dst, and copy the upper element
16123/// from a to the upper element of dst.
16124///
16125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16126#[inline]
16127#[target_feature(enable = "avx512fp16")]
16128#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16129#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16130pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16131 _mm_mask_cvtsh_sd(src:a, k:0xff, a, b)
16132}
16133
16134/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16135/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16136/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16137/// of dst.
16138///
16139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16140#[inline]
16141#[target_feature(enable = "avx512fp16")]
16142#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16143#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16144pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16145 unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16146}
16147
16148/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16149/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16150/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16151///
16152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16153#[inline]
16154#[target_feature(enable = "avx512fp16")]
16155#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16156#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16157pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16158 _mm_mask_cvtsh_sd(src:_mm_setzero_pd(), k, a, b)
16159}
16160
16161/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16162/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16163/// to the upper element of dst.
16164///
16165/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16166///
16167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16168#[inline]
16169#[target_feature(enable = "avx512fp16")]
16170#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16171#[rustc_legacy_const_generics(2)]
16172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16173pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16174 static_assert_sae!(SAE);
16175 _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b)
16176}
16177
16178/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16179/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16180/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16181/// of dst.
16182///
16183/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16184///
16185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16186#[inline]
16187#[target_feature(enable = "avx512fp16")]
16188#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16189#[rustc_legacy_const_generics(4)]
16190#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16191pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16192 src: __m128d,
16193 k: __mmask8,
16194 a: __m128d,
16195 b: __m128h,
16196) -> __m128d {
16197 unsafe {
16198 static_assert_sae!(SAE);
16199 vcvtsh2sd(a, b, src, k, SAE)
16200 }
16201}
16202
16203/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16204/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16205/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16206///
16207/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16208///
16209/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16210#[inline]
16211#[target_feature(enable = "avx512fp16")]
16212#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16213#[rustc_legacy_const_generics(3)]
16214#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16215pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16216 static_assert_sae!(SAE);
16217 _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_setzero_pd(), k, a, b)
16218}
16219
16220/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16221///
16222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16223#[inline]
16224#[target_feature(enable = "avx512fp16")]
16225#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16226pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
16227 unsafe { simd_extract!(a, 0) }
16228}
16229
16230/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16231///
16232/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16233#[inline]
16234#[target_feature(enable = "avx512fp16")]
16235#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16236pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16237 unsafe { simd_extract!(a, 0) }
16238}
16239
16240/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16241///
16242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16243#[inline]
16244#[target_feature(enable = "avx512fp16")]
16245#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16246pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16247 unsafe { simd_extract!(a, 0) }
16248}
16249
16250/// Copy the lower 16-bit integer in a to dst.
16251///
16252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16253#[inline]
16254#[target_feature(enable = "avx512fp16")]
16255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16256pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16257 unsafe { simd_extract!(a.as_i16x8(), 0) }
16258}
16259
16260/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16261///
16262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16263#[inline]
16264#[target_feature(enable = "avx512fp16")]
16265#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16266pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16267 unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) }
16268}
16269
16270#[allow(improper_ctypes)]
16271unsafe extern "C" {
16272 #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16273 unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16274 #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16275 unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16276
16277 #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16278 unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16279 #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16280 unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16281 #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16282 unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16283 #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16284 unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16285
16286 #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16287 unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16288 #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16289 unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16290 #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16291 unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16292 #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16293 unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16294
16295 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16296 unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16297 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16298 unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16299 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16300 unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16301 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16302 unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16303
16304 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16305 unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16306 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16307 unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16308 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16309 unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16310 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16311 unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16312
16313 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16314 unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16315 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16316 unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16317 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16318 unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16319 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16320 unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16321 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16322 unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16323 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16324 unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16325 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16326 unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16327 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16328 unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16329
16330 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16331 unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16332 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16333 unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16334 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16335 unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16336 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16337 unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16338 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16339 unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16340 -> __m512;
16341 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16342 unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16343 -> __m512;
16344 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16345 unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16346 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16347 unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16348
16349 #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16350 unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16351 #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16352 unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16353
16354 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
16355 unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
16356 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
16357 unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
16358 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16359 unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16360
16361 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16362 unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16363 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16364 unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16365 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16366 unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16367 #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16368 unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16369
16370 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16371 unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16372 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16373 unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16374 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16375 unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16376 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16377 unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16378
16379 #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16380 unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16381 #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16382 unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16383
16384 #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16385 unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16386 #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16387 unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16388 #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16389 unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16390 #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16391 unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16392
16393 #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16394 unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16395 #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16396 unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16397 #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16398 unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16399 #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16400 unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16401
16402 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16403 unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16404 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16405 unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16406 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16407 unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16408 #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16409 unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16410
16411 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16412 unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16413 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16414 unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16415 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16416 unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16417 #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16418 unsafefn vgetmantsh(
16419 a: __m128h,
16420 b: __m128h,
16421 imm8: i32,
16422 src: __m128h,
16423 k: __mmask8,
16424 sae: i32,
16425 ) -> __m128h;
16426
16427 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16428 unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16429 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16430 unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16431 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16432 unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16433 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16434 unsafefn vrndscalesh(
16435 a: __m128h,
16436 b: __m128h,
16437 src: __m128h,
16438 k: __mmask8,
16439 imm8: i32,
16440 sae: i32,
16441 ) -> __m128h;
16442
16443 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16444 unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16445 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16446 unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16447 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16448 unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16449 #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16450 unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16451
16452 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16453 unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16454 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16455 unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16456 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16457 unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16458 #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16459 unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16460 -> __m128h;
16461
16462 #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16463 unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16464
16465 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16466 unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16467 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16468 unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16469 #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16470 unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16471 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u16"]
16472 unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16473 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u16"]
16474 unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16475 #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32u16"]
16476 unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16477
16478 #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16479 unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16480 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16481 unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16482 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16483 unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16484 #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16485 unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16486 #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16487 unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16488 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u32"]
16489 unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16490 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16u32"]
16491 unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16492 #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16493 unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16494
16495 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16496 unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16497 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16498 unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16499 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16500 unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16501 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16502 unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16503 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16504 unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16505 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8u64"]
16506 unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16507
16508 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16509 unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16510 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16511 unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16512 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16513 unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16514 #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16515 unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16516
16517 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16518 unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16519 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16520 unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16521 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16522 unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16523 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16524 unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16525
16526 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16527 unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16528 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16529 unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16530 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16531 unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16532 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16533 unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16534 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16535 unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16536 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16537 unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32;
16538
16539 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16540 unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16541 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16542 unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16543 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16544 unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16545 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16546 unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16547 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16548 unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16549 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16550 unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16551
16552 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16553 unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16554 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16555 unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16556 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16557 unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16558 #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16559 unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16560 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16561 unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16562 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16563 unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16564 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16565 unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16566 #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16567 unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16568
16569 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16570 unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16571 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16572 unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16573 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16574 unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16575 #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16576 unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16577 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16578 unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16579 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16580 unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16581 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16582 unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16583 #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16584 unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16585
16586 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16587 unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16588 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16589 unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16590 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16591 unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16592 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16593 unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16594 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16595 unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16596 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16597 unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16598
16599 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16600 unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16601 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16602 unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16603 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16604 unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16605 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16606 unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16607 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16608 unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16609 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16610 unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16611
16612 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16613 unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16614 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16615 unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16616 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16617 unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16618 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16619 unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16620
16621 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16622 unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16623 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16624 unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16625 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16626 unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16627 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16628 unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16629
16630}
16631
16632#[cfg(test)]
16633mod tests {
16634 use crate::core_arch::x86::*;
16635 use crate::mem::transmute;
16636 use crate::ptr::{addr_of, addr_of_mut};
16637 use stdarch_test::simd_test;
16638
16639 #[target_feature(enable = "avx512fp16")]
16640 unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16641 _mm_setr_ph(re, im, re, im, re, im, re, im)
16642 }
16643
16644 #[target_feature(enable = "avx512fp16")]
16645 unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16646 _mm256_setr_ph(
16647 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16648 )
16649 }
16650
16651 #[target_feature(enable = "avx512fp16")]
16652 unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16653 _mm512_setr_ph(
16654 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16655 re, im, re, im, re, im, re, im, re, im,
16656 )
16657 }
16658
16659 #[simd_test(enable = "avx512fp16")]
16660 unsafe fn test_mm_set_ph() {
16661 let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16662 let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16663 assert_eq_m128h(r, e);
16664 }
16665
16666 #[simd_test(enable = "avx512fp16")]
16667 unsafe fn test_mm256_set_ph() {
16668 let r = _mm256_set_ph(
16669 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16670 );
16671 let e = _mm256_setr_ph(
16672 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16673 );
16674 assert_eq_m256h(r, e);
16675 }
16676
16677 #[simd_test(enable = "avx512fp16")]
16678 unsafe fn test_mm512_set_ph() {
16679 let r = _mm512_set_ph(
16680 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16681 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16682 31.0, 32.0,
16683 );
16684 let e = _mm512_setr_ph(
16685 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16686 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16687 3.0, 2.0, 1.0,
16688 );
16689 assert_eq_m512h(r, e);
16690 }
16691
16692 #[simd_test(enable = "avx512fp16")]
16693 unsafe fn test_mm_set_sh() {
16694 let r = _mm_set_sh(1.0);
16695 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
16696 assert_eq_m128h(r, e);
16697 }
16698
16699 #[simd_test(enable = "avx512fp16")]
16700 unsafe fn test_mm_set1_ph() {
16701 let r = _mm_set1_ph(1.0);
16702 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
16703 assert_eq_m128h(r, e);
16704 }
16705
16706 #[simd_test(enable = "avx512fp16")]
16707 unsafe fn test_mm256_set1_ph() {
16708 let r = _mm256_set1_ph(1.0);
16709 let e = _mm256_set_ph(
16710 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16711 );
16712 assert_eq_m256h(r, e);
16713 }
16714
16715 #[simd_test(enable = "avx512fp16")]
16716 unsafe fn test_mm512_set1_ph() {
16717 let r = _mm512_set1_ph(1.0);
16718 let e = _mm512_set_ph(
16719 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16720 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16721 );
16722 assert_eq_m512h(r, e);
16723 }
16724
16725 #[simd_test(enable = "avx512fp16")]
16726 unsafe fn test_mm_setr_ph() {
16727 let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16728 let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16729 assert_eq_m128h(r, e);
16730 }
16731
16732 #[simd_test(enable = "avx512fp16")]
16733 unsafe fn test_mm256_setr_ph() {
16734 let r = _mm256_setr_ph(
16735 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16736 );
16737 let e = _mm256_set_ph(
16738 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16739 );
16740 assert_eq_m256h(r, e);
16741 }
16742
16743 #[simd_test(enable = "avx512fp16")]
16744 unsafe fn test_mm512_setr_ph() {
16745 let r = _mm512_setr_ph(
16746 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16747 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16748 31.0, 32.0,
16749 );
16750 let e = _mm512_set_ph(
16751 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16752 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16753 3.0, 2.0, 1.0,
16754 );
16755 assert_eq_m512h(r, e);
16756 }
16757
16758 #[simd_test(enable = "avx512fp16,avx512vl")]
16759 unsafe fn test_mm_setzero_ph() {
16760 let r = _mm_setzero_ph();
16761 let e = _mm_set1_ph(0.0);
16762 assert_eq_m128h(r, e);
16763 }
16764
16765 #[simd_test(enable = "avx512fp16,avx512vl")]
16766 unsafe fn test_mm256_setzero_ph() {
16767 let r = _mm256_setzero_ph();
16768 let e = _mm256_set1_ph(0.0);
16769 assert_eq_m256h(r, e);
16770 }
16771
16772 #[simd_test(enable = "avx512fp16")]
16773 unsafe fn test_mm512_setzero_ph() {
16774 let r = _mm512_setzero_ph();
16775 let e = _mm512_set1_ph(0.0);
16776 assert_eq_m512h(r, e);
16777 }
16778
16779 #[simd_test(enable = "avx512fp16")]
16780 unsafe fn test_mm_castsi128_ph() {
16781 let a = _mm_set1_epi16(0x3c00);
16782 let r = _mm_castsi128_ph(a);
16783 let e = _mm_set1_ph(1.0);
16784 assert_eq_m128h(r, e);
16785 }
16786
16787 #[simd_test(enable = "avx512fp16")]
16788 unsafe fn test_mm256_castsi256_ph() {
16789 let a = _mm256_set1_epi16(0x3c00);
16790 let r = _mm256_castsi256_ph(a);
16791 let e = _mm256_set1_ph(1.0);
16792 assert_eq_m256h(r, e);
16793 }
16794
16795 #[simd_test(enable = "avx512fp16")]
16796 unsafe fn test_mm512_castsi512_ph() {
16797 let a = _mm512_set1_epi16(0x3c00);
16798 let r = _mm512_castsi512_ph(a);
16799 let e = _mm512_set1_ph(1.0);
16800 assert_eq_m512h(r, e);
16801 }
16802
16803 #[simd_test(enable = "avx512fp16")]
16804 unsafe fn test_mm_castph_si128() {
16805 let a = _mm_set1_ph(1.0);
16806 let r = _mm_castph_si128(a);
16807 let e = _mm_set1_epi16(0x3c00);
16808 assert_eq_m128i(r, e);
16809 }
16810
16811 #[simd_test(enable = "avx512fp16")]
16812 unsafe fn test_mm256_castph_si256() {
16813 let a = _mm256_set1_ph(1.0);
16814 let r = _mm256_castph_si256(a);
16815 let e = _mm256_set1_epi16(0x3c00);
16816 assert_eq_m256i(r, e);
16817 }
16818
16819 #[simd_test(enable = "avx512fp16")]
16820 unsafe fn test_mm512_castph_si512() {
16821 let a = _mm512_set1_ph(1.0);
16822 let r = _mm512_castph_si512(a);
16823 let e = _mm512_set1_epi16(0x3c00);
16824 assert_eq_m512i(r, e);
16825 }
16826
16827 #[simd_test(enable = "avx512fp16")]
16828 unsafe fn test_mm_castps_ph() {
16829 let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
16830 let r = _mm_castps_ph(a);
16831 let e = _mm_set1_ph(1.0);
16832 assert_eq_m128h(r, e);
16833 }
16834
16835 #[simd_test(enable = "avx512fp16")]
16836 unsafe fn test_mm256_castps_ph() {
16837 let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
16838 let r = _mm256_castps_ph(a);
16839 let e = _mm256_set1_ph(1.0);
16840 assert_eq_m256h(r, e);
16841 }
16842
16843 #[simd_test(enable = "avx512fp16")]
16844 unsafe fn test_mm512_castps_ph() {
16845 let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
16846 let r = _mm512_castps_ph(a);
16847 let e = _mm512_set1_ph(1.0);
16848 assert_eq_m512h(r, e);
16849 }
16850
16851 #[simd_test(enable = "avx512fp16")]
16852 unsafe fn test_mm_castph_ps() {
16853 let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
16854 let r = _mm_castph_ps(a);
16855 let e = _mm_set1_ps(1.0);
16856 assert_eq_m128(r, e);
16857 }
16858
16859 #[simd_test(enable = "avx512fp16")]
16860 unsafe fn test_mm256_castph_ps() {
16861 let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
16862 let r = _mm256_castph_ps(a);
16863 let e = _mm256_set1_ps(1.0);
16864 assert_eq_m256(r, e);
16865 }
16866
16867 #[simd_test(enable = "avx512fp16")]
16868 unsafe fn test_mm512_castph_ps() {
16869 let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
16870 let r = _mm512_castph_ps(a);
16871 let e = _mm512_set1_ps(1.0);
16872 assert_eq_m512(r, e);
16873 }
16874
16875 #[simd_test(enable = "avx512fp16")]
16876 unsafe fn test_mm_castpd_ph() {
16877 let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
16878 let r = _mm_castpd_ph(a);
16879 let e = _mm_set1_ph(1.0);
16880 assert_eq_m128h(r, e);
16881 }
16882
16883 #[simd_test(enable = "avx512fp16")]
16884 unsafe fn test_mm256_castpd_ph() {
16885 let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
16886 let r = _mm256_castpd_ph(a);
16887 let e = _mm256_set1_ph(1.0);
16888 assert_eq_m256h(r, e);
16889 }
16890
16891 #[simd_test(enable = "avx512fp16")]
16892 unsafe fn test_mm512_castpd_ph() {
16893 let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
16894 let r = _mm512_castpd_ph(a);
16895 let e = _mm512_set1_ph(1.0);
16896 assert_eq_m512h(r, e);
16897 }
16898
16899 #[simd_test(enable = "avx512fp16")]
16900 unsafe fn test_mm_castph_pd() {
16901 let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
16902 let r = _mm_castph_pd(a);
16903 let e = _mm_set1_pd(1.0);
16904 assert_eq_m128d(r, e);
16905 }
16906
16907 #[simd_test(enable = "avx512fp16")]
16908 unsafe fn test_mm256_castph_pd() {
16909 let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
16910 let r = _mm256_castph_pd(a);
16911 let e = _mm256_set1_pd(1.0);
16912 assert_eq_m256d(r, e);
16913 }
16914
16915 #[simd_test(enable = "avx512fp16")]
16916 unsafe fn test_mm512_castph_pd() {
16917 let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
16918 let r = _mm512_castph_pd(a);
16919 let e = _mm512_set1_pd(1.0);
16920 assert_eq_m512d(r, e);
16921 }
16922
16923 #[simd_test(enable = "avx512fp16")]
16924 unsafe fn test_mm256_castph256_ph128() {
16925 let a = _mm256_setr_ph(
16926 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16927 );
16928 let r = _mm256_castph256_ph128(a);
16929 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16930 assert_eq_m128h(r, e);
16931 }
16932
16933 #[simd_test(enable = "avx512fp16")]
16934 unsafe fn test_mm512_castph512_ph128() {
16935 let a = _mm512_setr_ph(
16936 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
16937 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
16938 );
16939 let r = _mm512_castph512_ph128(a);
16940 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16941 assert_eq_m128h(r, e);
16942 }
16943
16944 #[simd_test(enable = "avx512fp16")]
16945 unsafe fn test_mm512_castph512_ph256() {
16946 let a = _mm512_setr_ph(
16947 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
16948 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
16949 );
16950 let r = _mm512_castph512_ph256(a);
16951 let e = _mm256_setr_ph(
16952 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16953 );
16954 assert_eq_m256h(r, e);
16955 }
16956
16957 #[simd_test(enable = "avx512fp16")]
16958 unsafe fn test_mm256_castph128_ph256() {
16959 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16960 let r = _mm256_castph128_ph256(a);
16961 assert_eq_m128h(_mm256_castph256_ph128(r), a);
16962 }
16963
16964 #[simd_test(enable = "avx512fp16")]
16965 unsafe fn test_mm512_castph128_ph512() {
16966 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16967 let r = _mm512_castph128_ph512(a);
16968 assert_eq_m128h(_mm512_castph512_ph128(r), a);
16969 }
16970
16971 #[simd_test(enable = "avx512fp16")]
16972 unsafe fn test_mm512_castph256_ph512() {
16973 let a = _mm256_setr_ph(
16974 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16975 );
16976 let r = _mm512_castph256_ph512(a);
16977 assert_eq_m256h(_mm512_castph512_ph256(r), a);
16978 }
16979
16980 #[simd_test(enable = "avx512fp16")]
16981 unsafe fn test_mm256_zextph128_ph256() {
16982 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16983 let r = _mm256_zextph128_ph256(a);
16984 let e = _mm256_setr_ph(
16985 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
16986 );
16987 assert_eq_m256h(r, e);
16988 }
16989
16990 #[simd_test(enable = "avx512fp16")]
16991 unsafe fn test_mm512_zextph128_ph512() {
16992 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16993 let r = _mm512_zextph128_ph512(a);
16994 let e = _mm512_setr_ph(
16995 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
16996 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
16997 );
16998 assert_eq_m512h(r, e);
16999 }
17000
17001 #[simd_test(enable = "avx512fp16")]
17002 unsafe fn test_mm512_zextph256_ph512() {
17003 let a = _mm256_setr_ph(
17004 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17005 );
17006 let r = _mm512_zextph256_ph512(a);
17007 let e = _mm512_setr_ph(
17008 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
17009 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17010 );
17011 assert_eq_m512h(r, e);
17012 }
17013
17014 #[simd_test(enable = "avx512fp16,avx512vl")]
17015 unsafe fn test_mm_cmp_ph_mask() {
17016 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17017 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17018 let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17019 assert_eq!(r, 0b11110000);
17020 }
17021
17022 #[simd_test(enable = "avx512fp16,avx512vl")]
17023 unsafe fn test_mm_mask_cmp_ph_mask() {
17024 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17025 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17026 let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
17027 assert_eq!(r, 0b01010000);
17028 }
17029
17030 #[simd_test(enable = "avx512fp16,avx512vl")]
17031 unsafe fn test_mm256_cmp_ph_mask() {
17032 let a = _mm256_set_ph(
17033 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17034 );
17035 let b = _mm256_set_ph(
17036 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17037 -16.0,
17038 );
17039 let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17040 assert_eq!(r, 0b1111000011110000);
17041 }
17042
17043 #[simd_test(enable = "avx512fp16,avx512vl")]
17044 unsafe fn test_mm256_mask_cmp_ph_mask() {
17045 let a = _mm256_set_ph(
17046 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17047 );
17048 let b = _mm256_set_ph(
17049 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17050 -16.0,
17051 );
17052 let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
17053 assert_eq!(r, 0b0101000001010000);
17054 }
17055
17056 #[simd_test(enable = "avx512fp16")]
17057 unsafe fn test_mm512_cmp_ph_mask() {
17058 let a = _mm512_set_ph(
17059 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17060 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17061 31.0, 32.0,
17062 );
17063 let b = _mm512_set_ph(
17064 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17065 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17066 -29.0, -30.0, -31.0, -32.0,
17067 );
17068 let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17069 assert_eq!(r, 0b11110000111100001111000011110000);
17070 }
17071
17072 #[simd_test(enable = "avx512fp16")]
17073 unsafe fn test_mm512_mask_cmp_ph_mask() {
17074 let a = _mm512_set_ph(
17075 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17076 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17077 31.0, 32.0,
17078 );
17079 let b = _mm512_set_ph(
17080 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17081 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17082 -29.0, -30.0, -31.0, -32.0,
17083 );
17084 let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
17085 assert_eq!(r, 0b01010000010100000101000001010000);
17086 }
17087
17088 #[simd_test(enable = "avx512fp16")]
17089 unsafe fn test_mm512_cmp_round_ph_mask() {
17090 let a = _mm512_set_ph(
17091 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17092 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17093 31.0, 32.0,
17094 );
17095 let b = _mm512_set_ph(
17096 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17097 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17098 -29.0, -30.0, -31.0, -32.0,
17099 );
17100 let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17101 assert_eq!(r, 0b11110000111100001111000011110000);
17102 }
17103
17104 #[simd_test(enable = "avx512fp16")]
17105 unsafe fn test_mm512_mask_cmp_round_ph_mask() {
17106 let a = _mm512_set_ph(
17107 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17108 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17109 31.0, 32.0,
17110 );
17111 let b = _mm512_set_ph(
17112 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17113 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17114 -29.0, -30.0, -31.0, -32.0,
17115 );
17116 let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17117 0b01010101010101010101010101010101,
17118 a,
17119 b,
17120 );
17121 assert_eq!(r, 0b01010000010100000101000001010000);
17122 }
17123
17124 #[simd_test(enable = "avx512fp16")]
17125 unsafe fn test_mm_cmp_round_sh_mask() {
17126 let a = _mm_set_sh(1.0);
17127 let b = _mm_set_sh(1.0);
17128 let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17129 assert_eq!(r, 1);
17130 }
17131
17132 #[simd_test(enable = "avx512fp16")]
17133 unsafe fn test_mm_mask_cmp_round_sh_mask() {
17134 let a = _mm_set_sh(1.0);
17135 let b = _mm_set_sh(1.0);
17136 let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
17137 assert_eq!(r, 0);
17138 }
17139
17140 #[simd_test(enable = "avx512fp16")]
17141 unsafe fn test_mm_cmp_sh_mask() {
17142 let a = _mm_set_sh(1.0);
17143 let b = _mm_set_sh(1.0);
17144 let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17145 assert_eq!(r, 1);
17146 }
17147
17148 #[simd_test(enable = "avx512fp16")]
17149 unsafe fn test_mm_mask_cmp_sh_mask() {
17150 let a = _mm_set_sh(1.0);
17151 let b = _mm_set_sh(1.0);
17152 let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
17153 assert_eq!(r, 0);
17154 }
17155
17156 #[simd_test(enable = "avx512fp16")]
17157 unsafe fn test_mm_comi_round_sh() {
17158 let a = _mm_set_sh(1.0);
17159 let b = _mm_set_sh(1.0);
17160 let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17161 assert_eq!(r, 1);
17162 }
17163
17164 #[simd_test(enable = "avx512fp16")]
17165 unsafe fn test_mm_comi_sh() {
17166 let a = _mm_set_sh(1.0);
17167 let b = _mm_set_sh(1.0);
17168 let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17169 assert_eq!(r, 1);
17170 }
17171
17172 #[simd_test(enable = "avx512fp16")]
17173 unsafe fn test_mm_comieq_sh() {
17174 let a = _mm_set_sh(1.0);
17175 let b = _mm_set_sh(1.0);
17176 let r = _mm_comieq_sh(a, b);
17177 assert_eq!(r, 1);
17178 }
17179
17180 #[simd_test(enable = "avx512fp16")]
17181 unsafe fn test_mm_comige_sh() {
17182 let a = _mm_set_sh(2.0);
17183 let b = _mm_set_sh(1.0);
17184 let r = _mm_comige_sh(a, b);
17185 assert_eq!(r, 1);
17186 }
17187
17188 #[simd_test(enable = "avx512fp16")]
17189 unsafe fn test_mm_comigt_sh() {
17190 let a = _mm_set_sh(2.0);
17191 let b = _mm_set_sh(1.0);
17192 let r = _mm_comigt_sh(a, b);
17193 assert_eq!(r, 1);
17194 }
17195
17196 #[simd_test(enable = "avx512fp16")]
17197 unsafe fn test_mm_comile_sh() {
17198 let a = _mm_set_sh(1.0);
17199 let b = _mm_set_sh(2.0);
17200 let r = _mm_comile_sh(a, b);
17201 assert_eq!(r, 1);
17202 }
17203
17204 #[simd_test(enable = "avx512fp16")]
17205 unsafe fn test_mm_comilt_sh() {
17206 let a = _mm_set_sh(1.0);
17207 let b = _mm_set_sh(2.0);
17208 let r = _mm_comilt_sh(a, b);
17209 assert_eq!(r, 1);
17210 }
17211
17212 #[simd_test(enable = "avx512fp16")]
17213 unsafe fn test_mm_comineq_sh() {
17214 let a = _mm_set_sh(1.0);
17215 let b = _mm_set_sh(2.0);
17216 let r = _mm_comineq_sh(a, b);
17217 assert_eq!(r, 1);
17218 }
17219
17220 #[simd_test(enable = "avx512fp16")]
17221 unsafe fn test_mm_ucomieq_sh() {
17222 let a = _mm_set_sh(1.0);
17223 let b = _mm_set_sh(1.0);
17224 let r = _mm_ucomieq_sh(a, b);
17225 assert_eq!(r, 1);
17226 }
17227
17228 #[simd_test(enable = "avx512fp16")]
17229 unsafe fn test_mm_ucomige_sh() {
17230 let a = _mm_set_sh(2.0);
17231 let b = _mm_set_sh(1.0);
17232 let r = _mm_ucomige_sh(a, b);
17233 assert_eq!(r, 1);
17234 }
17235
17236 #[simd_test(enable = "avx512fp16")]
17237 unsafe fn test_mm_ucomigt_sh() {
17238 let a = _mm_set_sh(2.0);
17239 let b = _mm_set_sh(1.0);
17240 let r = _mm_ucomigt_sh(a, b);
17241 assert_eq!(r, 1);
17242 }
17243
17244 #[simd_test(enable = "avx512fp16")]
17245 unsafe fn test_mm_ucomile_sh() {
17246 let a = _mm_set_sh(1.0);
17247 let b = _mm_set_sh(2.0);
17248 let r = _mm_ucomile_sh(a, b);
17249 assert_eq!(r, 1);
17250 }
17251
17252 #[simd_test(enable = "avx512fp16")]
17253 unsafe fn test_mm_ucomilt_sh() {
17254 let a = _mm_set_sh(1.0);
17255 let b = _mm_set_sh(2.0);
17256 let r = _mm_ucomilt_sh(a, b);
17257 assert_eq!(r, 1);
17258 }
17259
17260 #[simd_test(enable = "avx512fp16")]
17261 unsafe fn test_mm_ucomineq_sh() {
17262 let a = _mm_set_sh(1.0);
17263 let b = _mm_set_sh(2.0);
17264 let r = _mm_ucomineq_sh(a, b);
17265 assert_eq!(r, 1);
17266 }
17267
17268 #[simd_test(enable = "avx512fp16,avx512vl")]
17269 unsafe fn test_mm_load_ph() {
17270 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17271 let b = _mm_load_ph(addr_of!(a).cast());
17272 assert_eq_m128h(a, b);
17273 }
17274
17275 #[simd_test(enable = "avx512fp16,avx512vl")]
17276 unsafe fn test_mm256_load_ph() {
17277 let a = _mm256_set_ph(
17278 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17279 );
17280 let b = _mm256_load_ph(addr_of!(a).cast());
17281 assert_eq_m256h(a, b);
17282 }
17283
17284 #[simd_test(enable = "avx512fp16")]
17285 unsafe fn test_mm512_load_ph() {
17286 let a = _mm512_set_ph(
17287 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17288 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17289 31.0, 32.0,
17290 );
17291 let b = _mm512_load_ph(addr_of!(a).cast());
17292 assert_eq_m512h(a, b);
17293 }
17294
17295 #[simd_test(enable = "avx512fp16")]
17296 unsafe fn test_mm_load_sh() {
17297 let a = _mm_set_sh(1.0);
17298 let b = _mm_load_sh(addr_of!(a).cast());
17299 assert_eq_m128h(a, b);
17300 }
17301
17302 #[simd_test(enable = "avx512fp16")]
17303 unsafe fn test_mm_mask_load_sh() {
17304 let a = _mm_set_sh(1.0);
17305 let src = _mm_set_sh(2.);
17306 let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
17307 assert_eq_m128h(a, b);
17308 let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
17309 assert_eq_m128h(src, b);
17310 }
17311
17312 #[simd_test(enable = "avx512fp16")]
17313 unsafe fn test_mm_maskz_load_sh() {
17314 let a = _mm_set_sh(1.0);
17315 let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
17316 assert_eq_m128h(a, b);
17317 let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
17318 assert_eq_m128h(_mm_setzero_ph(), b);
17319 }
17320
17321 #[simd_test(enable = "avx512fp16,avx512vl")]
17322 unsafe fn test_mm_loadu_ph() {
17323 let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
17324 let r = _mm_loadu_ph(array.as_ptr());
17325 let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17326 assert_eq_m128h(r, e);
17327 }
17328
17329 #[simd_test(enable = "avx512fp16,avx512vl")]
17330 unsafe fn test_mm256_loadu_ph() {
17331 let array = [
17332 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17333 ];
17334 let r = _mm256_loadu_ph(array.as_ptr());
17335 let e = _mm256_setr_ph(
17336 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17337 );
17338 assert_eq_m256h(r, e);
17339 }
17340
17341 #[simd_test(enable = "avx512fp16")]
17342 unsafe fn test_mm512_loadu_ph() {
17343 let array = [
17344 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17345 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17346 31.0, 32.0,
17347 ];
17348 let r = _mm512_loadu_ph(array.as_ptr());
17349 let e = _mm512_setr_ph(
17350 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17351 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17352 31.0, 32.0,
17353 );
17354 assert_eq_m512h(r, e);
17355 }
17356
17357 #[simd_test(enable = "avx512fp16")]
17358 unsafe fn test_mm_move_sh() {
17359 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17360 let b = _mm_set_sh(9.0);
17361 let r = _mm_move_sh(a, b);
17362 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
17363 assert_eq_m128h(r, e);
17364 }
17365
17366 #[simd_test(enable = "avx512fp16")]
17367 unsafe fn test_mm_mask_move_sh() {
17368 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17369 let b = _mm_set_sh(9.0);
17370 let src = _mm_set_sh(10.0);
17371 let r = _mm_mask_move_sh(src, 0, a, b);
17372 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
17373 assert_eq_m128h(r, e);
17374 }
17375
17376 #[simd_test(enable = "avx512fp16")]
17377 unsafe fn test_mm_maskz_move_sh() {
17378 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17379 let b = _mm_set_sh(9.0);
17380 let r = _mm_maskz_move_sh(0, a, b);
17381 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
17382 assert_eq_m128h(r, e);
17383 }
17384
17385 #[simd_test(enable = "avx512fp16,avx512vl")]
17386 unsafe fn test_mm_store_ph() {
17387 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17388 let mut b = _mm_setzero_ph();
17389 _mm_store_ph(addr_of_mut!(b).cast(), a);
17390 assert_eq_m128h(a, b);
17391 }
17392
17393 #[simd_test(enable = "avx512fp16,avx512vl")]
17394 unsafe fn test_mm256_store_ph() {
17395 let a = _mm256_set_ph(
17396 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17397 );
17398 let mut b = _mm256_setzero_ph();
17399 _mm256_store_ph(addr_of_mut!(b).cast(), a);
17400 assert_eq_m256h(a, b);
17401 }
17402
17403 #[simd_test(enable = "avx512fp16")]
17404 unsafe fn test_mm512_store_ph() {
17405 let a = _mm512_set_ph(
17406 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17407 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17408 31.0, 32.0,
17409 );
17410 let mut b = _mm512_setzero_ph();
17411 _mm512_store_ph(addr_of_mut!(b).cast(), a);
17412 assert_eq_m512h(a, b);
17413 }
17414
17415 #[simd_test(enable = "avx512fp16")]
17416 unsafe fn test_mm_store_sh() {
17417 let a = _mm_set_sh(1.0);
17418 let mut b = _mm_setzero_ph();
17419 _mm_store_sh(addr_of_mut!(b).cast(), a);
17420 assert_eq_m128h(a, b);
17421 }
17422
17423 #[simd_test(enable = "avx512fp16")]
17424 unsafe fn test_mm_mask_store_sh() {
17425 let a = _mm_set_sh(1.0);
17426 let mut b = _mm_setzero_ph();
17427 _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
17428 assert_eq_m128h(_mm_setzero_ph(), b);
17429 _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
17430 assert_eq_m128h(a, b);
17431 }
17432
17433 #[simd_test(enable = "avx512fp16,avx512vl")]
17434 unsafe fn test_mm_storeu_ph() {
17435 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17436 let mut array = [0.0; 8];
17437 _mm_storeu_ph(array.as_mut_ptr(), a);
17438 assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
17439 }
17440
17441 #[simd_test(enable = "avx512fp16,avx512vl")]
17442 unsafe fn test_mm256_storeu_ph() {
17443 let a = _mm256_set_ph(
17444 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17445 );
17446 let mut array = [0.0; 16];
17447 _mm256_storeu_ph(array.as_mut_ptr(), a);
17448 assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
17449 }
17450
17451 #[simd_test(enable = "avx512fp16")]
17452 unsafe fn test_mm512_storeu_ph() {
17453 let a = _mm512_set_ph(
17454 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17455 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17456 31.0, 32.0,
17457 );
17458 let mut array = [0.0; 32];
17459 _mm512_storeu_ph(array.as_mut_ptr(), a);
17460 assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
17461 }
17462
17463 #[simd_test(enable = "avx512fp16,avx512vl")]
17464 unsafe fn test_mm_add_ph() {
17465 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17466 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17467 let r = _mm_add_ph(a, b);
17468 let e = _mm_set1_ph(9.0);
17469 assert_eq_m128h(r, e);
17470 }
17471
17472 #[simd_test(enable = "avx512fp16,avx512vl")]
17473 unsafe fn test_mm_mask_add_ph() {
17474 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17475 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17476 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17477 let r = _mm_mask_add_ph(src, 0b01010101, a, b);
17478 let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
17479 assert_eq_m128h(r, e);
17480 }
17481
17482 #[simd_test(enable = "avx512fp16,avx512vl")]
17483 unsafe fn test_mm_maskz_add_ph() {
17484 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17485 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17486 let r = _mm_maskz_add_ph(0b01010101, a, b);
17487 let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
17488 assert_eq_m128h(r, e);
17489 }
17490
17491 #[simd_test(enable = "avx512fp16,avx512vl")]
17492 unsafe fn test_mm256_add_ph() {
17493 let a = _mm256_set_ph(
17494 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17495 );
17496 let b = _mm256_set_ph(
17497 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17498 );
17499 let r = _mm256_add_ph(a, b);
17500 let e = _mm256_set1_ph(17.0);
17501 assert_eq_m256h(r, e);
17502 }
17503
17504 #[simd_test(enable = "avx512fp16,avx512vl")]
17505 unsafe fn test_mm256_mask_add_ph() {
17506 let a = _mm256_set_ph(
17507 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17508 );
17509 let b = _mm256_set_ph(
17510 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17511 );
17512 let src = _mm256_set_ph(
17513 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17514 );
17515 let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
17516 let e = _mm256_set_ph(
17517 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
17518 );
17519 assert_eq_m256h(r, e);
17520 }
17521
17522 #[simd_test(enable = "avx512fp16,avx512vl")]
17523 unsafe fn test_mm256_maskz_add_ph() {
17524 let a = _mm256_set_ph(
17525 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17526 );
17527 let b = _mm256_set_ph(
17528 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17529 );
17530 let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
17531 let e = _mm256_set_ph(
17532 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
17533 );
17534 assert_eq_m256h(r, e);
17535 }
17536
17537 #[simd_test(enable = "avx512fp16")]
17538 unsafe fn test_mm512_add_ph() {
17539 let a = _mm512_set_ph(
17540 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17541 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17542 31.0, 32.0,
17543 );
17544 let b = _mm512_set_ph(
17545 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17546 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17547 3.0, 2.0, 1.0,
17548 );
17549 let r = _mm512_add_ph(a, b);
17550 let e = _mm512_set1_ph(33.0);
17551 assert_eq_m512h(r, e);
17552 }
17553
17554 #[simd_test(enable = "avx512fp16")]
17555 unsafe fn test_mm512_mask_add_ph() {
17556 let a = _mm512_set_ph(
17557 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17558 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17559 31.0, 32.0,
17560 );
17561 let b = _mm512_set_ph(
17562 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17563 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17564 3.0, 2.0, 1.0,
17565 );
17566 let src = _mm512_set_ph(
17567 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17568 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17569 );
17570 let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
17571 let e = _mm512_set_ph(
17572 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17573 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17574 );
17575 assert_eq_m512h(r, e);
17576 }
17577
17578 #[simd_test(enable = "avx512fp16")]
17579 unsafe fn test_mm512_maskz_add_ph() {
17580 let a = _mm512_set_ph(
17581 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17582 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17583 31.0, 32.0,
17584 );
17585 let b = _mm512_set_ph(
17586 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17587 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17588 3.0, 2.0, 1.0,
17589 );
17590 let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
17591 let e = _mm512_set_ph(
17592 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17593 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17594 );
17595 assert_eq_m512h(r, e);
17596 }
17597
17598 #[simd_test(enable = "avx512fp16")]
17599 unsafe fn test_mm512_add_round_ph() {
17600 let a = _mm512_set_ph(
17601 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17602 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17603 31.0, 32.0,
17604 );
17605 let b = _mm512_set_ph(
17606 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17607 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17608 3.0, 2.0, 1.0,
17609 );
17610 let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17611 let e = _mm512_set1_ph(33.0);
17612 assert_eq_m512h(r, e);
17613 }
17614
17615 #[simd_test(enable = "avx512fp16")]
17616 unsafe fn test_mm512_mask_add_round_ph() {
17617 let a = _mm512_set_ph(
17618 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17619 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17620 31.0, 32.0,
17621 );
17622 let b = _mm512_set_ph(
17623 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17624 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17625 3.0, 2.0, 1.0,
17626 );
17627 let src = _mm512_set_ph(
17628 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17629 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17630 );
17631 let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17632 src,
17633 0b01010101010101010101010101010101,
17634 a,
17635 b,
17636 );
17637 let e = _mm512_set_ph(
17638 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17639 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17640 );
17641 assert_eq_m512h(r, e);
17642 }
17643
17644 #[simd_test(enable = "avx512fp16")]
17645 unsafe fn test_mm512_maskz_add_round_ph() {
17646 let a = _mm512_set_ph(
17647 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17648 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17649 31.0, 32.0,
17650 );
17651 let b = _mm512_set_ph(
17652 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17653 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17654 3.0, 2.0, 1.0,
17655 );
17656 let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17657 0b01010101010101010101010101010101,
17658 a,
17659 b,
17660 );
17661 let e = _mm512_set_ph(
17662 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17663 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17664 );
17665 assert_eq_m512h(r, e);
17666 }
17667
17668 #[simd_test(enable = "avx512fp16")]
17669 unsafe fn test_mm_add_round_sh() {
17670 let a = _mm_set_sh(1.0);
17671 let b = _mm_set_sh(2.0);
17672 let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17673 let e = _mm_set_sh(3.0);
17674 assert_eq_m128h(r, e);
17675 }
17676
17677 #[simd_test(enable = "avx512fp16")]
17678 unsafe fn test_mm_mask_add_round_sh() {
17679 let a = _mm_set_sh(1.0);
17680 let b = _mm_set_sh(2.0);
17681 let src = _mm_set_sh(4.0);
17682 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17683 src, 0, a, b,
17684 );
17685 let e = _mm_set_sh(4.0);
17686 assert_eq_m128h(r, e);
17687 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17688 src, 1, a, b,
17689 );
17690 let e = _mm_set_sh(3.0);
17691 assert_eq_m128h(r, e);
17692 }
17693
17694 #[simd_test(enable = "avx512fp16")]
17695 unsafe fn test_mm_maskz_add_round_sh() {
17696 let a = _mm_set_sh(1.0);
17697 let b = _mm_set_sh(2.0);
17698 let r =
17699 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
17700 let e = _mm_set_sh(0.0);
17701 assert_eq_m128h(r, e);
17702 let r =
17703 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
17704 let e = _mm_set_sh(3.0);
17705 assert_eq_m128h(r, e);
17706 }
17707
17708 #[simd_test(enable = "avx512fp16")]
17709 unsafe fn test_mm_add_sh() {
17710 let a = _mm_set_sh(1.0);
17711 let b = _mm_set_sh(2.0);
17712 let r = _mm_add_sh(a, b);
17713 let e = _mm_set_sh(3.0);
17714 assert_eq_m128h(r, e);
17715 }
17716
17717 #[simd_test(enable = "avx512fp16")]
17718 unsafe fn test_mm_mask_add_sh() {
17719 let a = _mm_set_sh(1.0);
17720 let b = _mm_set_sh(2.0);
17721 let src = _mm_set_sh(4.0);
17722 let r = _mm_mask_add_sh(src, 0, a, b);
17723 let e = _mm_set_sh(4.0);
17724 assert_eq_m128h(r, e);
17725 let r = _mm_mask_add_sh(src, 1, a, b);
17726 let e = _mm_set_sh(3.0);
17727 assert_eq_m128h(r, e);
17728 }
17729
17730 #[simd_test(enable = "avx512fp16")]
17731 unsafe fn test_mm_maskz_add_sh() {
17732 let a = _mm_set_sh(1.0);
17733 let b = _mm_set_sh(2.0);
17734 let r = _mm_maskz_add_sh(0, a, b);
17735 let e = _mm_set_sh(0.0);
17736 assert_eq_m128h(r, e);
17737 let r = _mm_maskz_add_sh(1, a, b);
17738 let e = _mm_set_sh(3.0);
17739 assert_eq_m128h(r, e);
17740 }
17741
17742 #[simd_test(enable = "avx512fp16,avx512vl")]
17743 unsafe fn test_mm_sub_ph() {
17744 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17745 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17746 let r = _mm_sub_ph(a, b);
17747 let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
17748 assert_eq_m128h(r, e);
17749 }
17750
17751 #[simd_test(enable = "avx512fp16,avx512vl")]
17752 unsafe fn test_mm_mask_sub_ph() {
17753 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17754 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17755 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17756 let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
17757 let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
17758 assert_eq_m128h(r, e);
17759 }
17760
17761 #[simd_test(enable = "avx512fp16,avx512vl")]
17762 unsafe fn test_mm_maskz_sub_ph() {
17763 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17764 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17765 let r = _mm_maskz_sub_ph(0b01010101, a, b);
17766 let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
17767 assert_eq_m128h(r, e);
17768 }
17769
17770 #[simd_test(enable = "avx512fp16,avx512vl")]
17771 unsafe fn test_mm256_sub_ph() {
17772 let a = _mm256_set_ph(
17773 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17774 );
17775 let b = _mm256_set_ph(
17776 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17777 );
17778 let r = _mm256_sub_ph(a, b);
17779 let e = _mm256_set_ph(
17780 -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
17781 15.0,
17782 );
17783 assert_eq_m256h(r, e);
17784 }
17785
17786 #[simd_test(enable = "avx512fp16,avx512vl")]
17787 unsafe fn test_mm256_mask_sub_ph() {
17788 let a = _mm256_set_ph(
17789 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17790 );
17791 let b = _mm256_set_ph(
17792 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17793 );
17794 let src = _mm256_set_ph(
17795 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17796 );
17797 let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
17798 let e = _mm256_set_ph(
17799 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
17800 );
17801 assert_eq_m256h(r, e);
17802 }
17803
17804 #[simd_test(enable = "avx512fp16,avx512vl")]
17805 unsafe fn test_mm256_maskz_sub_ph() {
17806 let a = _mm256_set_ph(
17807 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17808 );
17809 let b = _mm256_set_ph(
17810 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17811 );
17812 let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
17813 let e = _mm256_set_ph(
17814 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
17815 );
17816 assert_eq_m256h(r, e);
17817 }
17818
17819 #[simd_test(enable = "avx512fp16")]
17820 unsafe fn test_mm512_sub_ph() {
17821 let a = _mm512_set_ph(
17822 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17823 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17824 31.0, 32.0,
17825 );
17826 let b = _mm512_set_ph(
17827 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17828 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17829 3.0, 2.0, 1.0,
17830 );
17831 let r = _mm512_sub_ph(a, b);
17832 let e = _mm512_set_ph(
17833 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17834 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17835 23.0, 25.0, 27.0, 29.0, 31.0,
17836 );
17837 assert_eq_m512h(r, e);
17838 }
17839
17840 #[simd_test(enable = "avx512fp16")]
17841 unsafe fn test_mm512_mask_sub_ph() {
17842 let a = _mm512_set_ph(
17843 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17844 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17845 31.0, 32.0,
17846 );
17847 let b = _mm512_set_ph(
17848 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17849 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17850 3.0, 2.0, 1.0,
17851 );
17852 let src = _mm512_set_ph(
17853 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17854 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17855 );
17856 let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
17857 let e = _mm512_set_ph(
17858 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
17859 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
17860 );
17861 assert_eq_m512h(r, e);
17862 }
17863
17864 #[simd_test(enable = "avx512fp16")]
17865 unsafe fn test_mm512_maskz_sub_ph() {
17866 let a = _mm512_set_ph(
17867 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17868 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17869 31.0, 32.0,
17870 );
17871 let b = _mm512_set_ph(
17872 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17873 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17874 3.0, 2.0, 1.0,
17875 );
17876 let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
17877 let e = _mm512_set_ph(
17878 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
17879 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
17880 );
17881 assert_eq_m512h(r, e);
17882 }
17883
17884 #[simd_test(enable = "avx512fp16")]
17885 unsafe fn test_mm512_sub_round_ph() {
17886 let a = _mm512_set_ph(
17887 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17888 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17889 31.0, 32.0,
17890 );
17891 let b = _mm512_set_ph(
17892 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17893 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17894 3.0, 2.0, 1.0,
17895 );
17896 let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17897 let e = _mm512_set_ph(
17898 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17899 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17900 23.0, 25.0, 27.0, 29.0, 31.0,
17901 );
17902 assert_eq_m512h(r, e);
17903 }
17904
17905 #[simd_test(enable = "avx512fp16")]
17906 unsafe fn test_mm512_mask_sub_round_ph() {
17907 let a = _mm512_set_ph(
17908 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17909 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17910 31.0, 32.0,
17911 );
17912 let b = _mm512_set_ph(
17913 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17914 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17915 3.0, 2.0, 1.0,
17916 );
17917 let src = _mm512_set_ph(
17918 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17919 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17920 );
17921 let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17922 src,
17923 0b01010101010101010101010101010101,
17924 a,
17925 b,
17926 );
17927 let e = _mm512_set_ph(
17928 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
17929 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
17930 );
17931 assert_eq_m512h(r, e);
17932 }
17933
17934 #[simd_test(enable = "avx512fp16")]
17935 unsafe fn test_mm512_maskz_sub_round_ph() {
17936 let a = _mm512_set_ph(
17937 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17938 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17939 31.0, 32.0,
17940 );
17941 let b = _mm512_set_ph(
17942 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17943 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17944 3.0, 2.0, 1.0,
17945 );
17946 let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17947 0b01010101010101010101010101010101,
17948 a,
17949 b,
17950 );
17951 let e = _mm512_set_ph(
17952 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
17953 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
17954 );
17955 assert_eq_m512h(r, e);
17956 }
17957
17958 #[simd_test(enable = "avx512fp16")]
17959 unsafe fn test_mm_sub_round_sh() {
17960 let a = _mm_set_sh(1.0);
17961 let b = _mm_set_sh(2.0);
17962 let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17963 let e = _mm_set_sh(-1.0);
17964 assert_eq_m128h(r, e);
17965 }
17966
17967 #[simd_test(enable = "avx512fp16")]
17968 unsafe fn test_mm_mask_sub_round_sh() {
17969 let a = _mm_set_sh(1.0);
17970 let b = _mm_set_sh(2.0);
17971 let src = _mm_set_sh(4.0);
17972 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17973 src, 0, a, b,
17974 );
17975 let e = _mm_set_sh(4.0);
17976 assert_eq_m128h(r, e);
17977 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17978 src, 1, a, b,
17979 );
17980 let e = _mm_set_sh(-1.0);
17981 assert_eq_m128h(r, e);
17982 }
17983
17984 #[simd_test(enable = "avx512fp16")]
17985 unsafe fn test_mm_maskz_sub_round_sh() {
17986 let a = _mm_set_sh(1.0);
17987 let b = _mm_set_sh(2.0);
17988 let r =
17989 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
17990 let e = _mm_set_sh(0.0);
17991 assert_eq_m128h(r, e);
17992 let r =
17993 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
17994 let e = _mm_set_sh(-1.0);
17995 assert_eq_m128h(r, e);
17996 }
17997
17998 #[simd_test(enable = "avx512fp16")]
17999 unsafe fn test_mm_sub_sh() {
18000 let a = _mm_set_sh(1.0);
18001 let b = _mm_set_sh(2.0);
18002 let r = _mm_sub_sh(a, b);
18003 let e = _mm_set_sh(-1.0);
18004 assert_eq_m128h(r, e);
18005 }
18006
18007 #[simd_test(enable = "avx512fp16")]
18008 unsafe fn test_mm_mask_sub_sh() {
18009 let a = _mm_set_sh(1.0);
18010 let b = _mm_set_sh(2.0);
18011 let src = _mm_set_sh(4.0);
18012 let r = _mm_mask_sub_sh(src, 0, a, b);
18013 let e = _mm_set_sh(4.0);
18014 assert_eq_m128h(r, e);
18015 let r = _mm_mask_sub_sh(src, 1, a, b);
18016 let e = _mm_set_sh(-1.0);
18017 assert_eq_m128h(r, e);
18018 }
18019
18020 #[simd_test(enable = "avx512fp16")]
18021 unsafe fn test_mm_maskz_sub_sh() {
18022 let a = _mm_set_sh(1.0);
18023 let b = _mm_set_sh(2.0);
18024 let r = _mm_maskz_sub_sh(0, a, b);
18025 let e = _mm_set_sh(0.0);
18026 assert_eq_m128h(r, e);
18027 let r = _mm_maskz_sub_sh(1, a, b);
18028 let e = _mm_set_sh(-1.0);
18029 assert_eq_m128h(r, e);
18030 }
18031
18032 #[simd_test(enable = "avx512fp16,avx512vl")]
18033 unsafe fn test_mm_mul_ph() {
18034 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18035 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18036 let r = _mm_mul_ph(a, b);
18037 let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
18038 assert_eq_m128h(r, e);
18039 }
18040
18041 #[simd_test(enable = "avx512fp16,avx512vl")]
18042 unsafe fn test_mm_mask_mul_ph() {
18043 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18044 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18045 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
18046 let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
18047 let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
18048 assert_eq_m128h(r, e);
18049 }
18050
18051 #[simd_test(enable = "avx512fp16,avx512vl")]
18052 unsafe fn test_mm_maskz_mul_ph() {
18053 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18054 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18055 let r = _mm_maskz_mul_ph(0b01010101, a, b);
18056 let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
18057 assert_eq_m128h(r, e);
18058 }
18059
18060 #[simd_test(enable = "avx512fp16,avx512vl")]
18061 unsafe fn test_mm256_mul_ph() {
18062 let a = _mm256_set_ph(
18063 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18064 );
18065 let b = _mm256_set_ph(
18066 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18067 );
18068 let r = _mm256_mul_ph(a, b);
18069 let e = _mm256_set_ph(
18070 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
18071 30.0, 16.0,
18072 );
18073 assert_eq_m256h(r, e);
18074 }
18075
18076 #[simd_test(enable = "avx512fp16,avx512vl")]
18077 unsafe fn test_mm256_mask_mul_ph() {
18078 let a = _mm256_set_ph(
18079 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18080 );
18081 let b = _mm256_set_ph(
18082 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18083 );
18084 let src = _mm256_set_ph(
18085 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
18086 );
18087 let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
18088 let e = _mm256_set_ph(
18089 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
18090 );
18091 assert_eq_m256h(r, e);
18092 }
18093
18094 #[simd_test(enable = "avx512fp16,avx512vl")]
18095 unsafe fn test_mm256_maskz_mul_ph() {
18096 let a = _mm256_set_ph(
18097 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18098 );
18099 let b = _mm256_set_ph(
18100 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18101 );
18102 let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
18103 let e = _mm256_set_ph(
18104 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
18105 );
18106 assert_eq_m256h(r, e);
18107 }
18108
18109 #[simd_test(enable = "avx512fp16")]
18110 unsafe fn test_mm512_mul_ph() {
18111 let a = _mm512_set_ph(
18112 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18113 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18114 31.0, 32.0,
18115 );
18116 let b = _mm512_set_ph(
18117 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18118 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18119 3.0, 2.0, 1.0,
18120 );
18121 let r = _mm512_mul_ph(a, b);
18122 let e = _mm512_set_ph(
18123 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18124 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18125 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18126 );
18127 assert_eq_m512h(r, e);
18128 }
18129
18130 #[simd_test(enable = "avx512fp16")]
18131 unsafe fn test_mm512_mask_mul_ph() {
18132 let a = _mm512_set_ph(
18133 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18134 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18135 31.0, 32.0,
18136 );
18137 let b = _mm512_set_ph(
18138 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18139 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18140 3.0, 2.0, 1.0,
18141 );
18142 let src = _mm512_set_ph(
18143 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18144 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18145 );
18146 let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
18147 let e = _mm512_set_ph(
18148 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18149 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18150 );
18151 assert_eq_m512h(r, e);
18152 }
18153
18154 #[simd_test(enable = "avx512fp16")]
18155 unsafe fn test_mm512_maskz_mul_ph() {
18156 let a = _mm512_set_ph(
18157 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18158 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18159 31.0, 32.0,
18160 );
18161 let b = _mm512_set_ph(
18162 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18163 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18164 3.0, 2.0, 1.0,
18165 );
18166 let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
18167 let e = _mm512_set_ph(
18168 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18169 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18170 );
18171 assert_eq_m512h(r, e);
18172 }
18173
18174 #[simd_test(enable = "avx512fp16")]
18175 unsafe fn test_mm512_mul_round_ph() {
18176 let a = _mm512_set_ph(
18177 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18178 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18179 31.0, 32.0,
18180 );
18181 let b = _mm512_set_ph(
18182 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18183 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18184 3.0, 2.0, 1.0,
18185 );
18186 let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18187 let e = _mm512_set_ph(
18188 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18189 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18190 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18191 );
18192 assert_eq_m512h(r, e);
18193 }
18194
18195 #[simd_test(enable = "avx512fp16")]
18196 unsafe fn test_mm512_mask_mul_round_ph() {
18197 let a = _mm512_set_ph(
18198 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18199 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18200 31.0, 32.0,
18201 );
18202 let b = _mm512_set_ph(
18203 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18204 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18205 3.0, 2.0, 1.0,
18206 );
18207 let src = _mm512_set_ph(
18208 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18209 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18210 );
18211 let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18212 src,
18213 0b01010101010101010101010101010101,
18214 a,
18215 b,
18216 );
18217 let e = _mm512_set_ph(
18218 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18219 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18220 );
18221 assert_eq_m512h(r, e);
18222 }
18223
18224 #[simd_test(enable = "avx512fp16")]
18225 unsafe fn test_mm512_maskz_mul_round_ph() {
18226 let a = _mm512_set_ph(
18227 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18228 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18229 31.0, 32.0,
18230 );
18231 let b = _mm512_set_ph(
18232 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18233 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18234 3.0, 2.0, 1.0,
18235 );
18236 let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18237 0b01010101010101010101010101010101,
18238 a,
18239 b,
18240 );
18241 let e = _mm512_set_ph(
18242 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18243 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18244 );
18245 assert_eq_m512h(r, e);
18246 }
18247
18248 #[simd_test(enable = "avx512fp16")]
18249 unsafe fn test_mm_mul_round_sh() {
18250 let a = _mm_set_sh(1.0);
18251 let b = _mm_set_sh(2.0);
18252 let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18253 let e = _mm_set_sh(2.0);
18254 assert_eq_m128h(r, e);
18255 }
18256
18257 #[simd_test(enable = "avx512fp16")]
18258 unsafe fn test_mm_mask_mul_round_sh() {
18259 let a = _mm_set_sh(1.0);
18260 let b = _mm_set_sh(2.0);
18261 let src = _mm_set_sh(4.0);
18262 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18263 src, 0, a, b,
18264 );
18265 let e = _mm_set_sh(4.0);
18266 assert_eq_m128h(r, e);
18267 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18268 src, 1, a, b,
18269 );
18270 let e = _mm_set_sh(2.0);
18271 assert_eq_m128h(r, e);
18272 }
18273
18274 #[simd_test(enable = "avx512fp16")]
18275 unsafe fn test_mm_maskz_mul_round_sh() {
18276 let a = _mm_set_sh(1.0);
18277 let b = _mm_set_sh(2.0);
18278 let r =
18279 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18280 let e = _mm_set_sh(0.0);
18281 assert_eq_m128h(r, e);
18282 let r =
18283 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18284 let e = _mm_set_sh(2.0);
18285 assert_eq_m128h(r, e);
18286 }
18287
18288 #[simd_test(enable = "avx512fp16")]
18289 unsafe fn test_mm_mul_sh() {
18290 let a = _mm_set_sh(1.0);
18291 let b = _mm_set_sh(2.0);
18292 let r = _mm_mul_sh(a, b);
18293 let e = _mm_set_sh(2.0);
18294 assert_eq_m128h(r, e);
18295 }
18296
18297 #[simd_test(enable = "avx512fp16")]
18298 unsafe fn test_mm_mask_mul_sh() {
18299 let a = _mm_set_sh(1.0);
18300 let b = _mm_set_sh(2.0);
18301 let src = _mm_set_sh(4.0);
18302 let r = _mm_mask_mul_sh(src, 0, a, b);
18303 let e = _mm_set_sh(4.0);
18304 assert_eq_m128h(r, e);
18305 let r = _mm_mask_mul_sh(src, 1, a, b);
18306 let e = _mm_set_sh(2.0);
18307 assert_eq_m128h(r, e);
18308 }
18309
18310 #[simd_test(enable = "avx512fp16")]
18311 unsafe fn test_mm_maskz_mul_sh() {
18312 let a = _mm_set_sh(1.0);
18313 let b = _mm_set_sh(2.0);
18314 let r = _mm_maskz_mul_sh(0, a, b);
18315 let e = _mm_set_sh(0.0);
18316 assert_eq_m128h(r, e);
18317 let r = _mm_maskz_mul_sh(1, a, b);
18318 let e = _mm_set_sh(2.0);
18319 assert_eq_m128h(r, e);
18320 }
18321
18322 #[simd_test(enable = "avx512fp16,avx512vl")]
18323 unsafe fn test_mm_div_ph() {
18324 let a = _mm_set1_ph(1.0);
18325 let b = _mm_set1_ph(2.0);
18326 let r = _mm_div_ph(a, b);
18327 let e = _mm_set1_ph(0.5);
18328 assert_eq_m128h(r, e);
18329 }
18330
18331 #[simd_test(enable = "avx512fp16,avx512vl")]
18332 unsafe fn test_mm_mask_div_ph() {
18333 let a = _mm_set1_ph(1.0);
18334 let b = _mm_set1_ph(2.0);
18335 let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
18336 let r = _mm_mask_div_ph(src, 0b01010101, a, b);
18337 let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
18338 assert_eq_m128h(r, e);
18339 }
18340
18341 #[simd_test(enable = "avx512fp16,avx512vl")]
18342 unsafe fn test_mm_maskz_div_ph() {
18343 let a = _mm_set1_ph(1.0);
18344 let b = _mm_set1_ph(2.0);
18345 let r = _mm_maskz_div_ph(0b01010101, a, b);
18346 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
18347 assert_eq_m128h(r, e);
18348 }
18349
18350 #[simd_test(enable = "avx512fp16,avx512vl")]
18351 unsafe fn test_mm256_div_ph() {
18352 let a = _mm256_set1_ph(1.0);
18353 let b = _mm256_set1_ph(2.0);
18354 let r = _mm256_div_ph(a, b);
18355 let e = _mm256_set1_ph(0.5);
18356 assert_eq_m256h(r, e);
18357 }
18358
18359 #[simd_test(enable = "avx512fp16,avx512vl")]
18360 unsafe fn test_mm256_mask_div_ph() {
18361 let a = _mm256_set1_ph(1.0);
18362 let b = _mm256_set1_ph(2.0);
18363 let src = _mm256_set_ph(
18364 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18365 19.0,
18366 );
18367 let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
18368 let e = _mm256_set_ph(
18369 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18370 );
18371 assert_eq_m256h(r, e);
18372 }
18373
18374 #[simd_test(enable = "avx512fp16,avx512vl")]
18375 unsafe fn test_mm256_maskz_div_ph() {
18376 let a = _mm256_set1_ph(1.0);
18377 let b = _mm256_set1_ph(2.0);
18378 let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
18379 let e = _mm256_set_ph(
18380 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18381 );
18382 assert_eq_m256h(r, e);
18383 }
18384
18385 #[simd_test(enable = "avx512fp16")]
18386 unsafe fn test_mm512_div_ph() {
18387 let a = _mm512_set1_ph(1.0);
18388 let b = _mm512_set1_ph(2.0);
18389 let r = _mm512_div_ph(a, b);
18390 let e = _mm512_set1_ph(0.5);
18391 assert_eq_m512h(r, e);
18392 }
18393
18394 #[simd_test(enable = "avx512fp16")]
18395 unsafe fn test_mm512_mask_div_ph() {
18396 let a = _mm512_set1_ph(1.0);
18397 let b = _mm512_set1_ph(2.0);
18398 let src = _mm512_set_ph(
18399 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18400 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18401 33.0, 34.0, 35.0,
18402 );
18403 let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
18404 let e = _mm512_set_ph(
18405 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18406 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18407 );
18408 assert_eq_m512h(r, e);
18409 }
18410
18411 #[simd_test(enable = "avx512fp16")]
18412 unsafe fn test_mm512_maskz_div_ph() {
18413 let a = _mm512_set1_ph(1.0);
18414 let b = _mm512_set1_ph(2.0);
18415 let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
18416 let e = _mm512_set_ph(
18417 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18418 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18419 );
18420 assert_eq_m512h(r, e);
18421 }
18422
18423 #[simd_test(enable = "avx512fp16")]
18424 unsafe fn test_mm512_div_round_ph() {
18425 let a = _mm512_set1_ph(1.0);
18426 let b = _mm512_set1_ph(2.0);
18427 let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18428 let e = _mm512_set1_ph(0.5);
18429 assert_eq_m512h(r, e);
18430 }
18431
18432 #[simd_test(enable = "avx512fp16")]
18433 unsafe fn test_mm512_mask_div_round_ph() {
18434 let a = _mm512_set1_ph(1.0);
18435 let b = _mm512_set1_ph(2.0);
18436 let src = _mm512_set_ph(
18437 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18438 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18439 33.0, 34.0, 35.0,
18440 );
18441 let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18442 src,
18443 0b01010101010101010101010101010101,
18444 a,
18445 b,
18446 );
18447 let e = _mm512_set_ph(
18448 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18449 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18450 );
18451 assert_eq_m512h(r, e);
18452 }
18453
18454 #[simd_test(enable = "avx512fp16")]
18455 unsafe fn test_mm512_maskz_div_round_ph() {
18456 let a = _mm512_set1_ph(1.0);
18457 let b = _mm512_set1_ph(2.0);
18458 let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18459 0b01010101010101010101010101010101,
18460 a,
18461 b,
18462 );
18463 let e = _mm512_set_ph(
18464 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18465 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18466 );
18467 assert_eq_m512h(r, e);
18468 }
18469
18470 #[simd_test(enable = "avx512fp16")]
18471 unsafe fn test_mm_div_round_sh() {
18472 let a = _mm_set_sh(1.0);
18473 let b = _mm_set_sh(2.0);
18474 let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18475 let e = _mm_set_sh(0.5);
18476 assert_eq_m128h(r, e);
18477 }
18478
18479 #[simd_test(enable = "avx512fp16")]
18480 unsafe fn test_mm_mask_div_round_sh() {
18481 let a = _mm_set_sh(1.0);
18482 let b = _mm_set_sh(2.0);
18483 let src = _mm_set_sh(4.0);
18484 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18485 src, 0, a, b,
18486 );
18487 let e = _mm_set_sh(4.0);
18488 assert_eq_m128h(r, e);
18489 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18490 src, 1, a, b,
18491 );
18492 let e = _mm_set_sh(0.5);
18493 assert_eq_m128h(r, e);
18494 }
18495
18496 #[simd_test(enable = "avx512fp16")]
18497 unsafe fn test_mm_maskz_div_round_sh() {
18498 let a = _mm_set_sh(1.0);
18499 let b = _mm_set_sh(2.0);
18500 let r =
18501 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18502 let e = _mm_set_sh(0.0);
18503 assert_eq_m128h(r, e);
18504 let r =
18505 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18506 let e = _mm_set_sh(0.5);
18507 assert_eq_m128h(r, e);
18508 }
18509
18510 #[simd_test(enable = "avx512fp16")]
18511 unsafe fn test_mm_div_sh() {
18512 let a = _mm_set_sh(1.0);
18513 let b = _mm_set_sh(2.0);
18514 let r = _mm_div_sh(a, b);
18515 let e = _mm_set_sh(0.5);
18516 assert_eq_m128h(r, e);
18517 }
18518
18519 #[simd_test(enable = "avx512fp16")]
18520 unsafe fn test_mm_mask_div_sh() {
18521 let a = _mm_set_sh(1.0);
18522 let b = _mm_set_sh(2.0);
18523 let src = _mm_set_sh(4.0);
18524 let r = _mm_mask_div_sh(src, 0, a, b);
18525 let e = _mm_set_sh(4.0);
18526 assert_eq_m128h(r, e);
18527 let r = _mm_mask_div_sh(src, 1, a, b);
18528 let e = _mm_set_sh(0.5);
18529 assert_eq_m128h(r, e);
18530 }
18531
18532 #[simd_test(enable = "avx512fp16")]
18533 unsafe fn test_mm_maskz_div_sh() {
18534 let a = _mm_set_sh(1.0);
18535 let b = _mm_set_sh(2.0);
18536 let r = _mm_maskz_div_sh(0, a, b);
18537 let e = _mm_set_sh(0.0);
18538 assert_eq_m128h(r, e);
18539 let r = _mm_maskz_div_sh(1, a, b);
18540 let e = _mm_set_sh(0.5);
18541 assert_eq_m128h(r, e);
18542 }
18543
18544 #[simd_test(enable = "avx512fp16,avx512vl")]
18545 unsafe fn test_mm_mul_pch() {
18546 let a = _mm_set1_pch(0.0, 1.0);
18547 let b = _mm_set1_pch(0.0, 1.0);
18548 let r = _mm_mul_pch(a, b);
18549 let e = _mm_set1_pch(-1.0, 0.0);
18550 assert_eq_m128h(r, e);
18551 }
18552
18553 #[simd_test(enable = "avx512fp16,avx512vl")]
18554 unsafe fn test_mm_mask_mul_pch() {
18555 let a = _mm_set1_pch(0.0, 1.0);
18556 let b = _mm_set1_pch(0.0, 1.0);
18557 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18558 let r = _mm_mask_mul_pch(src, 0b0101, a, b);
18559 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18560 assert_eq_m128h(r, e);
18561 }
18562
18563 #[simd_test(enable = "avx512fp16,avx512vl")]
18564 unsafe fn test_mm_maskz_mul_pch() {
18565 let a = _mm_set1_pch(0.0, 1.0);
18566 let b = _mm_set1_pch(0.0, 1.0);
18567 let r = _mm_maskz_mul_pch(0b0101, a, b);
18568 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18569 assert_eq_m128h(r, e);
18570 }
18571
18572 #[simd_test(enable = "avx512fp16,avx512vl")]
18573 unsafe fn test_mm256_mul_pch() {
18574 let a = _mm256_set1_pch(0.0, 1.0);
18575 let b = _mm256_set1_pch(0.0, 1.0);
18576 let r = _mm256_mul_pch(a, b);
18577 let e = _mm256_set1_pch(-1.0, 0.0);
18578 assert_eq_m256h(r, e);
18579 }
18580
18581 #[simd_test(enable = "avx512fp16,avx512vl")]
18582 unsafe fn test_mm256_mask_mul_pch() {
18583 let a = _mm256_set1_pch(0.0, 1.0);
18584 let b = _mm256_set1_pch(0.0, 1.0);
18585 let src = _mm256_setr_ph(
18586 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18587 );
18588 let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
18589 let e = _mm256_setr_ph(
18590 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18591 );
18592 assert_eq_m256h(r, e);
18593 }
18594
18595 #[simd_test(enable = "avx512fp16,avx512vl")]
18596 unsafe fn test_mm256_maskz_mul_pch() {
18597 let a = _mm256_set1_pch(0.0, 1.0);
18598 let b = _mm256_set1_pch(0.0, 1.0);
18599 let r = _mm256_maskz_mul_pch(0b01010101, a, b);
18600 let e = _mm256_setr_ph(
18601 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18602 );
18603 assert_eq_m256h(r, e);
18604 }
18605
18606 #[simd_test(enable = "avx512fp16")]
18607 unsafe fn test_mm512_mul_pch() {
18608 let a = _mm512_set1_pch(0.0, 1.0);
18609 let b = _mm512_set1_pch(0.0, 1.0);
18610 let r = _mm512_mul_pch(a, b);
18611 let e = _mm512_set1_pch(-1.0, 0.0);
18612 assert_eq_m512h(r, e);
18613 }
18614
18615 #[simd_test(enable = "avx512fp16")]
18616 unsafe fn test_mm512_mask_mul_pch() {
18617 let a = _mm512_set1_pch(0.0, 1.0);
18618 let b = _mm512_set1_pch(0.0, 1.0);
18619 let src = _mm512_setr_ph(
18620 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18621 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18622 32.0, 33.0,
18623 );
18624 let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
18625 let e = _mm512_setr_ph(
18626 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18627 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18628 33.0,
18629 );
18630 assert_eq_m512h(r, e);
18631 }
18632
18633 #[simd_test(enable = "avx512fp16")]
18634 unsafe fn test_mm512_maskz_mul_pch() {
18635 let a = _mm512_set1_pch(0.0, 1.0);
18636 let b = _mm512_set1_pch(0.0, 1.0);
18637 let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
18638 let e = _mm512_setr_ph(
18639 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18640 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18641 );
18642 assert_eq_m512h(r, e);
18643 }
18644
18645 #[simd_test(enable = "avx512fp16")]
18646 unsafe fn test_mm512_mul_round_pch() {
18647 let a = _mm512_set1_pch(0.0, 1.0);
18648 let b = _mm512_set1_pch(0.0, 1.0);
18649 let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18650 let e = _mm512_set1_pch(-1.0, 0.0);
18651 assert_eq_m512h(r, e);
18652 }
18653
18654 #[simd_test(enable = "avx512fp16")]
18655 unsafe fn test_mm512_mask_mul_round_pch() {
18656 let a = _mm512_set1_pch(0.0, 1.0);
18657 let b = _mm512_set1_pch(0.0, 1.0);
18658 let src = _mm512_setr_ph(
18659 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18660 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18661 32.0, 33.0,
18662 );
18663 let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18664 src,
18665 0b0101010101010101,
18666 a,
18667 b,
18668 );
18669 let e = _mm512_setr_ph(
18670 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18671 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18672 33.0,
18673 );
18674 assert_eq_m512h(r, e);
18675 }
18676
18677 #[simd_test(enable = "avx512fp16")]
18678 unsafe fn test_mm512_maskz_mul_round_pch() {
18679 let a = _mm512_set1_pch(0.0, 1.0);
18680 let b = _mm512_set1_pch(0.0, 1.0);
18681 let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18682 0b0101010101010101,
18683 a,
18684 b,
18685 );
18686 let e = _mm512_setr_ph(
18687 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18688 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18689 );
18690 assert_eq_m512h(r, e);
18691 }
18692
18693 #[simd_test(enable = "avx512fp16")]
18694 unsafe fn test_mm_mul_round_sch() {
18695 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18696 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18697 let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18698 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18699 assert_eq_m128h(r, e);
18700 }
18701
18702 #[simd_test(enable = "avx512fp16")]
18703 unsafe fn test_mm_mask_mul_round_sch() {
18704 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18705 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18706 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18707 let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18708 src, 0, a, b,
18709 );
18710 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18711 assert_eq_m128h(r, e);
18712 }
18713
18714 #[simd_test(enable = "avx512fp16")]
18715 unsafe fn test_mm_maskz_mul_round_sch() {
18716 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18717 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18718 let r =
18719 _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18720 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18721 assert_eq_m128h(r, e);
18722 }
18723
18724 #[simd_test(enable = "avx512fp16")]
18725 unsafe fn test_mm_mul_sch() {
18726 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18727 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18728 let r = _mm_mul_sch(a, b);
18729 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18730 assert_eq_m128h(r, e);
18731 }
18732
18733 #[simd_test(enable = "avx512fp16")]
18734 unsafe fn test_mm_mask_mul_sch() {
18735 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18736 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18737 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18738 let r = _mm_mask_mul_sch(src, 0, a, b);
18739 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18740 assert_eq_m128h(r, e);
18741 }
18742
18743 #[simd_test(enable = "avx512fp16")]
18744 unsafe fn test_mm_maskz_mul_sch() {
18745 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18746 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18747 let r = _mm_maskz_mul_sch(0, a, b);
18748 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18749 assert_eq_m128h(r, e);
18750 }
18751
18752 #[simd_test(enable = "avx512fp16,avx512vl")]
18753 unsafe fn test_mm_fmul_pch() {
18754 let a = _mm_set1_pch(0.0, 1.0);
18755 let b = _mm_set1_pch(0.0, 1.0);
18756 let r = _mm_fmul_pch(a, b);
18757 let e = _mm_set1_pch(-1.0, 0.0);
18758 assert_eq_m128h(r, e);
18759 }
18760
18761 #[simd_test(enable = "avx512fp16,avx512vl")]
18762 unsafe fn test_mm_mask_fmul_pch() {
18763 let a = _mm_set1_pch(0.0, 1.0);
18764 let b = _mm_set1_pch(0.0, 1.0);
18765 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18766 let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
18767 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18768 assert_eq_m128h(r, e);
18769 }
18770
18771 #[simd_test(enable = "avx512fp16,avx512vl")]
18772 unsafe fn test_mm_maskz_fmul_pch() {
18773 let a = _mm_set1_pch(0.0, 1.0);
18774 let b = _mm_set1_pch(0.0, 1.0);
18775 let r = _mm_maskz_fmul_pch(0b0101, a, b);
18776 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18777 assert_eq_m128h(r, e);
18778 }
18779
18780 #[simd_test(enable = "avx512fp16,avx512vl")]
18781 unsafe fn test_mm256_fmul_pch() {
18782 let a = _mm256_set1_pch(0.0, 1.0);
18783 let b = _mm256_set1_pch(0.0, 1.0);
18784 let r = _mm256_fmul_pch(a, b);
18785 let e = _mm256_set1_pch(-1.0, 0.0);
18786 assert_eq_m256h(r, e);
18787 }
18788
18789 #[simd_test(enable = "avx512fp16,avx512vl")]
18790 unsafe fn test_mm256_mask_fmul_pch() {
18791 let a = _mm256_set1_pch(0.0, 1.0);
18792 let b = _mm256_set1_pch(0.0, 1.0);
18793 let src = _mm256_setr_ph(
18794 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18795 );
18796 let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
18797 let e = _mm256_setr_ph(
18798 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18799 );
18800 assert_eq_m256h(r, e);
18801 }
18802
18803 #[simd_test(enable = "avx512fp16,avx512vl")]
18804 unsafe fn test_mm256_maskz_fmul_pch() {
18805 let a = _mm256_set1_pch(0.0, 1.0);
18806 let b = _mm256_set1_pch(0.0, 1.0);
18807 let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
18808 let e = _mm256_setr_ph(
18809 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18810 );
18811 assert_eq_m256h(r, e);
18812 }
18813
18814 #[simd_test(enable = "avx512fp16")]
18815 unsafe fn test_mm512_fmul_pch() {
18816 let a = _mm512_set1_pch(0.0, 1.0);
18817 let b = _mm512_set1_pch(0.0, 1.0);
18818 let r = _mm512_fmul_pch(a, b);
18819 let e = _mm512_set1_pch(-1.0, 0.0);
18820 assert_eq_m512h(r, e);
18821 }
18822
18823 #[simd_test(enable = "avx512fp16")]
18824 unsafe fn test_mm512_mask_fmul_pch() {
18825 let a = _mm512_set1_pch(0.0, 1.0);
18826 let b = _mm512_set1_pch(0.0, 1.0);
18827 let src = _mm512_setr_ph(
18828 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18829 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18830 32.0, 33.0,
18831 );
18832 let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
18833 let e = _mm512_setr_ph(
18834 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18835 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18836 33.0,
18837 );
18838 assert_eq_m512h(r, e);
18839 }
18840
18841 #[simd_test(enable = "avx512fp16")]
18842 unsafe fn test_mm512_maskz_fmul_pch() {
18843 let a = _mm512_set1_pch(0.0, 1.0);
18844 let b = _mm512_set1_pch(0.0, 1.0);
18845 let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
18846 let e = _mm512_setr_ph(
18847 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18848 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18849 );
18850 assert_eq_m512h(r, e);
18851 }
18852
18853 #[simd_test(enable = "avx512fp16")]
18854 unsafe fn test_mm512_fmul_round_pch() {
18855 let a = _mm512_set1_pch(0.0, 1.0);
18856 let b = _mm512_set1_pch(0.0, 1.0);
18857 let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18858 let e = _mm512_set1_pch(-1.0, 0.0);
18859 assert_eq_m512h(r, e);
18860 }
18861
18862 #[simd_test(enable = "avx512fp16")]
18863 unsafe fn test_mm512_mask_fmul_round_pch() {
18864 let a = _mm512_set1_pch(0.0, 1.0);
18865 let b = _mm512_set1_pch(0.0, 1.0);
18866 let src = _mm512_setr_ph(
18867 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18868 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18869 32.0, 33.0,
18870 );
18871 let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18872 src,
18873 0b0101010101010101,
18874 a,
18875 b,
18876 );
18877 let e = _mm512_setr_ph(
18878 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18879 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18880 33.0,
18881 );
18882 assert_eq_m512h(r, e);
18883 }
18884
18885 #[simd_test(enable = "avx512fp16")]
18886 unsafe fn test_mm512_maskz_fmul_round_pch() {
18887 let a = _mm512_set1_pch(0.0, 1.0);
18888 let b = _mm512_set1_pch(0.0, 1.0);
18889 let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18890 0b0101010101010101,
18891 a,
18892 b,
18893 );
18894 let e = _mm512_setr_ph(
18895 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18896 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18897 );
18898 assert_eq_m512h(r, e);
18899 }
18900
18901 #[simd_test(enable = "avx512fp16")]
18902 unsafe fn test_mm_fmul_round_sch() {
18903 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18904 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18905 let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18906 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18907 assert_eq_m128h(r, e);
18908 }
18909
18910 #[simd_test(enable = "avx512fp16")]
18911 unsafe fn test_mm_mask_fmul_round_sch() {
18912 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18913 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18914 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18915 let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18916 src, 0, a, b,
18917 );
18918 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18919 assert_eq_m128h(r, e);
18920 }
18921
18922 #[simd_test(enable = "avx512fp16")]
18923 unsafe fn test_mm_maskz_fmul_round_sch() {
18924 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18925 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18926 let r =
18927 _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18928 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18929 assert_eq_m128h(r, e);
18930 }
18931
18932 #[simd_test(enable = "avx512fp16")]
18933 unsafe fn test_mm_fmul_sch() {
18934 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18935 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18936 let r = _mm_fmul_sch(a, b);
18937 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18938 assert_eq_m128h(r, e);
18939 }
18940
18941 #[simd_test(enable = "avx512fp16")]
18942 unsafe fn test_mm_mask_fmul_sch() {
18943 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18944 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18945 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18946 let r = _mm_mask_fmul_sch(src, 0, a, b);
18947 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18948 assert_eq_m128h(r, e);
18949 }
18950
18951 #[simd_test(enable = "avx512fp16")]
18952 unsafe fn test_mm_maskz_fmul_sch() {
18953 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18954 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18955 let r = _mm_maskz_fmul_sch(0, a, b);
18956 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18957 assert_eq_m128h(r, e);
18958 }
18959
18960 #[simd_test(enable = "avx512fp16,avx512vl")]
18961 unsafe fn test_mm_cmul_pch() {
18962 let a = _mm_set1_pch(0.0, 1.0);
18963 let b = _mm_set1_pch(0.0, -1.0);
18964 let r = _mm_cmul_pch(a, b);
18965 let e = _mm_set1_pch(-1.0, 0.0);
18966 assert_eq_m128h(r, e);
18967 }
18968
18969 #[simd_test(enable = "avx512fp16,avx512vl")]
18970 unsafe fn test_mm_mask_cmul_pch() {
18971 let a = _mm_set1_pch(0.0, 1.0);
18972 let b = _mm_set1_pch(0.0, -1.0);
18973 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18974 let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
18975 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18976 assert_eq_m128h(r, e);
18977 }
18978
18979 #[simd_test(enable = "avx512fp16,avx512vl")]
18980 unsafe fn test_mm_maskz_cmul_pch() {
18981 let a = _mm_set1_pch(0.0, 1.0);
18982 let b = _mm_set1_pch(0.0, -1.0);
18983 let r = _mm_maskz_cmul_pch(0b0101, a, b);
18984 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18985 assert_eq_m128h(r, e);
18986 }
18987
18988 #[simd_test(enable = "avx512fp16,avx512vl")]
18989 unsafe fn test_mm256_cmul_pch() {
18990 let a = _mm256_set1_pch(0.0, 1.0);
18991 let b = _mm256_set1_pch(0.0, -1.0);
18992 let r = _mm256_cmul_pch(a, b);
18993 let e = _mm256_set1_pch(-1.0, 0.0);
18994 assert_eq_m256h(r, e);
18995 }
18996
18997 #[simd_test(enable = "avx512fp16,avx512vl")]
18998 unsafe fn test_mm256_mask_cmul_pch() {
18999 let a = _mm256_set1_pch(0.0, 1.0);
19000 let b = _mm256_set1_pch(0.0, -1.0);
19001 let src = _mm256_setr_ph(
19002 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19003 );
19004 let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
19005 let e = _mm256_setr_ph(
19006 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19007 );
19008 assert_eq_m256h(r, e);
19009 }
19010
19011 #[simd_test(enable = "avx512fp16,avx512vl")]
19012 unsafe fn test_mm256_maskz_cmul_pch() {
19013 let a = _mm256_set1_pch(0.0, 1.0);
19014 let b = _mm256_set1_pch(0.0, -1.0);
19015 let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
19016 let e = _mm256_setr_ph(
19017 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19018 );
19019 assert_eq_m256h(r, e);
19020 }
19021
19022 #[simd_test(enable = "avx512fp16")]
19023 unsafe fn test_mm512_cmul_pch() {
19024 let a = _mm512_set1_pch(0.0, 1.0);
19025 let b = _mm512_set1_pch(0.0, -1.0);
19026 let r = _mm512_cmul_pch(a, b);
19027 let e = _mm512_set1_pch(-1.0, 0.0);
19028 assert_eq_m512h(r, e);
19029 }
19030
19031 #[simd_test(enable = "avx512fp16")]
19032 unsafe fn test_mm512_mask_cmul_pch() {
19033 let a = _mm512_set1_pch(0.0, 1.0);
19034 let b = _mm512_set1_pch(0.0, -1.0);
19035 let src = _mm512_setr_ph(
19036 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19037 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19038 32.0, 33.0,
19039 );
19040 let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
19041 let e = _mm512_setr_ph(
19042 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19043 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19044 33.0,
19045 );
19046 assert_eq_m512h(r, e);
19047 }
19048
19049 #[simd_test(enable = "avx512fp16")]
19050 unsafe fn test_mm512_maskz_cmul_pch() {
19051 let a = _mm512_set1_pch(0.0, 1.0);
19052 let b = _mm512_set1_pch(0.0, -1.0);
19053 let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
19054 let e = _mm512_setr_ph(
19055 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19056 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19057 );
19058 assert_eq_m512h(r, e);
19059 }
19060
19061 #[simd_test(enable = "avx512fp16")]
19062 unsafe fn test_mm512_cmul_round_pch() {
19063 let a = _mm512_set1_pch(0.0, 1.0);
19064 let b = _mm512_set1_pch(0.0, -1.0);
19065 let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19066 let e = _mm512_set1_pch(-1.0, 0.0);
19067 assert_eq_m512h(r, e);
19068 }
19069
19070 #[simd_test(enable = "avx512fp16")]
19071 unsafe fn test_mm512_mask_cmul_round_pch() {
19072 let a = _mm512_set1_pch(0.0, 1.0);
19073 let b = _mm512_set1_pch(0.0, -1.0);
19074 let src = _mm512_setr_ph(
19075 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19076 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19077 32.0, 33.0,
19078 );
19079 let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19080 src,
19081 0b0101010101010101,
19082 a,
19083 b,
19084 );
19085 let e = _mm512_setr_ph(
19086 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19087 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19088 33.0,
19089 );
19090 assert_eq_m512h(r, e);
19091 }
19092
19093 #[simd_test(enable = "avx512fp16")]
19094 unsafe fn test_mm512_maskz_cmul_round_pch() {
19095 let a = _mm512_set1_pch(0.0, 1.0);
19096 let b = _mm512_set1_pch(0.0, -1.0);
19097 let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19098 0b0101010101010101,
19099 a,
19100 b,
19101 );
19102 let e = _mm512_setr_ph(
19103 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19104 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19105 );
19106 assert_eq_m512h(r, e);
19107 }
19108
19109 #[simd_test(enable = "avx512fp16")]
19110 unsafe fn test_mm_cmul_sch() {
19111 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19112 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19113 let r = _mm_cmul_sch(a, b);
19114 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19115 assert_eq_m128h(r, e);
19116 }
19117
19118 #[simd_test(enable = "avx512fp16")]
19119 unsafe fn test_mm_mask_cmul_sch() {
19120 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19121 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19122 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19123 let r = _mm_mask_cmul_sch(src, 0, a, b);
19124 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19125 assert_eq_m128h(r, e);
19126 }
19127
19128 #[simd_test(enable = "avx512fp16")]
19129 unsafe fn test_mm_maskz_cmul_sch() {
19130 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19131 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19132 let r = _mm_maskz_cmul_sch(0, a, b);
19133 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19134 assert_eq_m128h(r, e);
19135 }
19136
19137 #[simd_test(enable = "avx512fp16")]
19138 unsafe fn test_mm_cmul_round_sch() {
19139 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19140 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19141 let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19142 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19143 assert_eq_m128h(r, e);
19144 }
19145
19146 #[simd_test(enable = "avx512fp16")]
19147 unsafe fn test_mm_mask_cmul_round_sch() {
19148 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19149 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19150 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19151 let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19152 src, 0, a, b,
19153 );
19154 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19155 assert_eq_m128h(r, e);
19156 }
19157
19158 #[simd_test(enable = "avx512fp16")]
19159 unsafe fn test_mm_maskz_cmul_round_sch() {
19160 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19161 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19162 let r =
19163 _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19164 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19165 assert_eq_m128h(r, e);
19166 }
19167
19168 #[simd_test(enable = "avx512fp16,avx512vl")]
19169 unsafe fn test_mm_fcmul_pch() {
19170 let a = _mm_set1_pch(0.0, 1.0);
19171 let b = _mm_set1_pch(0.0, -1.0);
19172 let r = _mm_fcmul_pch(a, b);
19173 let e = _mm_set1_pch(-1.0, 0.0);
19174 assert_eq_m128h(r, e);
19175 }
19176
19177 #[simd_test(enable = "avx512fp16,avx512vl")]
19178 unsafe fn test_mm_mask_fcmul_pch() {
19179 let a = _mm_set1_pch(0.0, 1.0);
19180 let b = _mm_set1_pch(0.0, -1.0);
19181 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19182 let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
19183 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19184 assert_eq_m128h(r, e);
19185 }
19186
19187 #[simd_test(enable = "avx512fp16,avx512vl")]
19188 unsafe fn test_mm_maskz_fcmul_pch() {
19189 let a = _mm_set1_pch(0.0, 1.0);
19190 let b = _mm_set1_pch(0.0, -1.0);
19191 let r = _mm_maskz_fcmul_pch(0b0101, a, b);
19192 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19193 assert_eq_m128h(r, e);
19194 }
19195
19196 #[simd_test(enable = "avx512fp16,avx512vl")]
19197 unsafe fn test_mm256_fcmul_pch() {
19198 let a = _mm256_set1_pch(0.0, 1.0);
19199 let b = _mm256_set1_pch(0.0, -1.0);
19200 let r = _mm256_fcmul_pch(a, b);
19201 let e = _mm256_set1_pch(-1.0, 0.0);
19202 assert_eq_m256h(r, e);
19203 }
19204
19205 #[simd_test(enable = "avx512fp16,avx512vl")]
19206 unsafe fn test_mm256_mask_fcmul_pch() {
19207 let a = _mm256_set1_pch(0.0, 1.0);
19208 let b = _mm256_set1_pch(0.0, -1.0);
19209 let src = _mm256_setr_ph(
19210 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19211 );
19212 let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
19213 let e = _mm256_setr_ph(
19214 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19215 );
19216 assert_eq_m256h(r, e);
19217 }
19218
19219 #[simd_test(enable = "avx512fp16,avx512vl")]
19220 unsafe fn test_mm256_maskz_fcmul_pch() {
19221 let a = _mm256_set1_pch(0.0, 1.0);
19222 let b = _mm256_set1_pch(0.0, -1.0);
19223 let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
19224 let e = _mm256_setr_ph(
19225 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19226 );
19227 assert_eq_m256h(r, e);
19228 }
19229
19230 #[simd_test(enable = "avx512fp16")]
19231 unsafe fn test_mm512_fcmul_pch() {
19232 let a = _mm512_set1_pch(0.0, 1.0);
19233 let b = _mm512_set1_pch(0.0, -1.0);
19234 let r = _mm512_fcmul_pch(a, b);
19235 let e = _mm512_set1_pch(-1.0, 0.0);
19236 assert_eq_m512h(r, e);
19237 }
19238
19239 #[simd_test(enable = "avx512fp16")]
19240 unsafe fn test_mm512_mask_fcmul_pch() {
19241 let a = _mm512_set1_pch(0.0, 1.0);
19242 let b = _mm512_set1_pch(0.0, -1.0);
19243 let src = _mm512_setr_ph(
19244 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19245 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19246 32.0, 33.0,
19247 );
19248 let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
19249 let e = _mm512_setr_ph(
19250 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19251 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19252 33.0,
19253 );
19254 assert_eq_m512h(r, e);
19255 }
19256
19257 #[simd_test(enable = "avx512fp16")]
19258 unsafe fn test_mm512_maskz_fcmul_pch() {
19259 let a = _mm512_set1_pch(0.0, 1.0);
19260 let b = _mm512_set1_pch(0.0, -1.0);
19261 let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
19262 let e = _mm512_setr_ph(
19263 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19264 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19265 );
19266 assert_eq_m512h(r, e);
19267 }
19268
19269 #[simd_test(enable = "avx512fp16")]
19270 unsafe fn test_mm512_fcmul_round_pch() {
19271 let a = _mm512_set1_pch(0.0, 1.0);
19272 let b = _mm512_set1_pch(0.0, -1.0);
19273 let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19274 let e = _mm512_set1_pch(-1.0, 0.0);
19275 assert_eq_m512h(r, e);
19276 }
19277
19278 #[simd_test(enable = "avx512fp16")]
19279 unsafe fn test_mm512_mask_fcmul_round_pch() {
19280 let a = _mm512_set1_pch(0.0, 1.0);
19281 let b = _mm512_set1_pch(0.0, -1.0);
19282 let src = _mm512_setr_ph(
19283 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19284 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19285 32.0, 33.0,
19286 );
19287 let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19288 src,
19289 0b0101010101010101,
19290 a,
19291 b,
19292 );
19293 let e = _mm512_setr_ph(
19294 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19295 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19296 33.0,
19297 );
19298 assert_eq_m512h(r, e);
19299 }
19300
19301 #[simd_test(enable = "avx512fp16")]
19302 unsafe fn test_mm512_maskz_fcmul_round_pch() {
19303 let a = _mm512_set1_pch(0.0, 1.0);
19304 let b = _mm512_set1_pch(0.0, -1.0);
19305 let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19306 0b0101010101010101,
19307 a,
19308 b,
19309 );
19310 let e = _mm512_setr_ph(
19311 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19312 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19313 );
19314 assert_eq_m512h(r, e);
19315 }
19316
19317 #[simd_test(enable = "avx512fp16")]
19318 unsafe fn test_mm_fcmul_sch() {
19319 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19320 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19321 let r = _mm_fcmul_sch(a, b);
19322 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19323 assert_eq_m128h(r, e);
19324 }
19325
19326 #[simd_test(enable = "avx512fp16")]
19327 unsafe fn test_mm_mask_fcmul_sch() {
19328 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19329 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19330 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19331 let r = _mm_mask_fcmul_sch(src, 0, a, b);
19332 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19333 assert_eq_m128h(r, e);
19334 }
19335
19336 #[simd_test(enable = "avx512fp16")]
19337 unsafe fn test_mm_maskz_fcmul_sch() {
19338 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19339 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19340 let r = _mm_maskz_fcmul_sch(0, a, b);
19341 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19342 assert_eq_m128h(r, e);
19343 }
19344
19345 #[simd_test(enable = "avx512fp16")]
19346 unsafe fn test_mm_fcmul_round_sch() {
19347 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19348 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19349 let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19350 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19351 assert_eq_m128h(r, e);
19352 }
19353
19354 #[simd_test(enable = "avx512fp16")]
19355 unsafe fn test_mm_mask_fcmul_round_sch() {
19356 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19357 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19358 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19359 let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19360 src, 0, a, b,
19361 );
19362 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19363 assert_eq_m128h(r, e);
19364 }
19365
19366 #[simd_test(enable = "avx512fp16")]
19367 unsafe fn test_mm_maskz_fcmul_round_sch() {
19368 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19369 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19370 let r =
19371 _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19372 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19373 assert_eq_m128h(r, e);
19374 }
19375
19376 #[simd_test(enable = "avx512fp16,avx512vl")]
19377 unsafe fn test_mm_abs_ph() {
19378 let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
19379 let r = _mm_abs_ph(a);
19380 let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
19381 assert_eq_m128h(r, e);
19382 }
19383
19384 #[simd_test(enable = "avx512fp16,avx512vl")]
19385 unsafe fn test_mm256_abs_ph() {
19386 let a = _mm256_set_ph(
19387 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19388 -14.0,
19389 );
19390 let r = _mm256_abs_ph(a);
19391 let e = _mm256_set_ph(
19392 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19393 );
19394 assert_eq_m256h(r, e);
19395 }
19396
19397 #[simd_test(enable = "avx512fp16")]
19398 unsafe fn test_mm512_abs_ph() {
19399 let a = _mm512_set_ph(
19400 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19401 -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
19402 27.0, -28.0, 29.0, -30.0,
19403 );
19404 let r = _mm512_abs_ph(a);
19405 let e = _mm512_set_ph(
19406 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19407 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
19408 29.0, 30.0,
19409 );
19410 assert_eq_m512h(r, e);
19411 }
19412
19413 #[simd_test(enable = "avx512fp16,avx512vl")]
19414 unsafe fn test_mm_conj_pch() {
19415 let a = _mm_set1_pch(0.0, 1.0);
19416 let r = _mm_conj_pch(a);
19417 let e = _mm_set1_pch(0.0, -1.0);
19418 assert_eq_m128h(r, e);
19419 }
19420
19421 #[simd_test(enable = "avx512fp16,avx512vl")]
19422 unsafe fn test_mm_mask_conj_pch() {
19423 let a = _mm_set1_pch(0.0, 1.0);
19424 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19425 let r = _mm_mask_conj_pch(src, 0b0101, a);
19426 let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
19427 assert_eq_m128h(r, e);
19428 }
19429
19430 #[simd_test(enable = "avx512fp16,avx512vl")]
19431 unsafe fn test_mm_maskz_conj_pch() {
19432 let a = _mm_set1_pch(0.0, 1.0);
19433 let r = _mm_maskz_conj_pch(0b0101, a);
19434 let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
19435 assert_eq_m128h(r, e);
19436 }
19437
19438 #[simd_test(enable = "avx512fp16,avx512vl")]
19439 unsafe fn test_mm256_conj_pch() {
19440 let a = _mm256_set1_pch(0.0, 1.0);
19441 let r = _mm256_conj_pch(a);
19442 let e = _mm256_set1_pch(0.0, -1.0);
19443 assert_eq_m256h(r, e);
19444 }
19445
19446 #[simd_test(enable = "avx512fp16,avx512vl")]
19447 unsafe fn test_mm256_mask_conj_pch() {
19448 let a = _mm256_set1_pch(0.0, 1.0);
19449 let src = _mm256_setr_ph(
19450 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19451 );
19452 let r = _mm256_mask_conj_pch(src, 0b01010101, a);
19453 let e = _mm256_setr_ph(
19454 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19455 );
19456 assert_eq_m256h(r, e);
19457 }
19458
19459 #[simd_test(enable = "avx512fp16,avx512vl")]
19460 unsafe fn test_mm256_maskz_conj_pch() {
19461 let a = _mm256_set1_pch(0.0, 1.0);
19462 let r = _mm256_maskz_conj_pch(0b01010101, a);
19463 let e = _mm256_setr_ph(
19464 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19465 );
19466 assert_eq_m256h(r, e);
19467 }
19468
19469 #[simd_test(enable = "avx512fp16")]
19470 unsafe fn test_mm512_conj_pch() {
19471 let a = _mm512_set1_pch(0.0, 1.0);
19472 let r = _mm512_conj_pch(a);
19473 let e = _mm512_set1_pch(0.0, -1.0);
19474 assert_eq_m512h(r, e);
19475 }
19476
19477 #[simd_test(enable = "avx512fp16")]
19478 unsafe fn test_mm512_mask_conj_pch() {
19479 let a = _mm512_set1_pch(0.0, 1.0);
19480 let src = _mm512_setr_ph(
19481 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19482 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19483 32.0, 33.0,
19484 );
19485 let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
19486 let e = _mm512_setr_ph(
19487 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19488 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
19489 33.0,
19490 );
19491 assert_eq_m512h(r, e);
19492 }
19493
19494 #[simd_test(enable = "avx512fp16")]
19495 unsafe fn test_mm512_maskz_conj_pch() {
19496 let a = _mm512_set1_pch(0.0, 1.0);
19497 let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
19498 let e = _mm512_setr_ph(
19499 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19500 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19501 );
19502 assert_eq_m512h(r, e);
19503 }
19504
19505 #[simd_test(enable = "avx512fp16,avx512vl")]
19506 unsafe fn test_mm_fmadd_pch() {
19507 let a = _mm_set1_pch(0.0, 1.0);
19508 let b = _mm_set1_pch(0.0, 2.0);
19509 let c = _mm_set1_pch(0.0, 3.0);
19510 let r = _mm_fmadd_pch(a, b, c);
19511 let e = _mm_set1_pch(-2.0, 3.0);
19512 assert_eq_m128h(r, e);
19513 }
19514
19515 #[simd_test(enable = "avx512fp16,avx512vl")]
19516 unsafe fn test_mm_mask_fmadd_pch() {
19517 let a = _mm_set1_pch(0.0, 1.0);
19518 let b = _mm_set1_pch(0.0, 2.0);
19519 let c = _mm_set1_pch(0.0, 3.0);
19520 let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
19521 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
19522 assert_eq_m128h(r, e);
19523 }
19524
19525 #[simd_test(enable = "avx512fp16,avx512vl")]
19526 unsafe fn test_mm_mask3_fmadd_pch() {
19527 let a = _mm_set1_pch(0.0, 1.0);
19528 let b = _mm_set1_pch(0.0, 2.0);
19529 let c = _mm_set1_pch(0.0, 3.0);
19530 let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
19531 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
19532 assert_eq_m128h(r, e);
19533 }
19534
19535 #[simd_test(enable = "avx512fp16,avx512vl")]
19536 unsafe fn test_mm_maskz_fmadd_pch() {
19537 let a = _mm_set1_pch(0.0, 1.0);
19538 let b = _mm_set1_pch(0.0, 2.0);
19539 let c = _mm_set1_pch(0.0, 3.0);
19540 let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
19541 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
19542 assert_eq_m128h(r, e);
19543 }
19544
19545 #[simd_test(enable = "avx512fp16,avx512vl")]
19546 unsafe fn test_mm256_fmadd_pch() {
19547 let a = _mm256_set1_pch(0.0, 1.0);
19548 let b = _mm256_set1_pch(0.0, 2.0);
19549 let c = _mm256_set1_pch(0.0, 3.0);
19550 let r = _mm256_fmadd_pch(a, b, c);
19551 let e = _mm256_set1_pch(-2.0, 3.0);
19552 assert_eq_m256h(r, e);
19553 }
19554
19555 #[simd_test(enable = "avx512fp16,avx512vl")]
19556 unsafe fn test_mm256_mask_fmadd_pch() {
19557 let a = _mm256_set1_pch(0.0, 1.0);
19558 let b = _mm256_set1_pch(0.0, 2.0);
19559 let c = _mm256_set1_pch(0.0, 3.0);
19560 let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
19561 let e = _mm256_setr_ph(
19562 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19563 );
19564 assert_eq_m256h(r, e);
19565 }
19566
19567 #[simd_test(enable = "avx512fp16,avx512vl")]
19568 unsafe fn test_mm256_mask3_fmadd_pch() {
19569 let a = _mm256_set1_pch(0.0, 1.0);
19570 let b = _mm256_set1_pch(0.0, 2.0);
19571 let c = _mm256_set1_pch(0.0, 3.0);
19572 let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
19573 let e = _mm256_setr_ph(
19574 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19575 );
19576 assert_eq_m256h(r, e);
19577 }
19578
19579 #[simd_test(enable = "avx512fp16,avx512vl")]
19580 unsafe fn test_mm256_maskz_fmadd_pch() {
19581 let a = _mm256_set1_pch(0.0, 1.0);
19582 let b = _mm256_set1_pch(0.0, 2.0);
19583 let c = _mm256_set1_pch(0.0, 3.0);
19584 let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
19585 let e = _mm256_setr_ph(
19586 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19587 );
19588 assert_eq_m256h(r, e);
19589 }
19590
19591 #[simd_test(enable = "avx512fp16")]
19592 unsafe fn test_mm512_fmadd_pch() {
19593 let a = _mm512_set1_pch(0.0, 1.0);
19594 let b = _mm512_set1_pch(0.0, 2.0);
19595 let c = _mm512_set1_pch(0.0, 3.0);
19596 let r = _mm512_fmadd_pch(a, b, c);
19597 let e = _mm512_set1_pch(-2.0, 3.0);
19598 assert_eq_m512h(r, e);
19599 }
19600
19601 #[simd_test(enable = "avx512fp16")]
19602 unsafe fn test_mm512_mask_fmadd_pch() {
19603 let a = _mm512_set1_pch(0.0, 1.0);
19604 let b = _mm512_set1_pch(0.0, 2.0);
19605 let c = _mm512_set1_pch(0.0, 3.0);
19606 let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
19607 let e = _mm512_setr_ph(
19608 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19609 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19610 );
19611 assert_eq_m512h(r, e);
19612 }
19613
19614 #[simd_test(enable = "avx512fp16")]
19615 unsafe fn test_mm512_mask3_fmadd_pch() {
19616 let a = _mm512_set1_pch(0.0, 1.0);
19617 let b = _mm512_set1_pch(0.0, 2.0);
19618 let c = _mm512_set1_pch(0.0, 3.0);
19619 let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
19620 let e = _mm512_setr_ph(
19621 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19622 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19623 );
19624 assert_eq_m512h(r, e);
19625 }
19626
19627 #[simd_test(enable = "avx512fp16")]
19628 unsafe fn test_mm512_maskz_fmadd_pch() {
19629 let a = _mm512_set1_pch(0.0, 1.0);
19630 let b = _mm512_set1_pch(0.0, 2.0);
19631 let c = _mm512_set1_pch(0.0, 3.0);
19632 let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
19633 let e = _mm512_setr_ph(
19634 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19635 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19636 );
19637 assert_eq_m512h(r, e);
19638 }
19639
19640 #[simd_test(enable = "avx512fp16")]
19641 unsafe fn test_mm512_fmadd_round_pch() {
19642 let a = _mm512_set1_pch(0.0, 1.0);
19643 let b = _mm512_set1_pch(0.0, 2.0);
19644 let c = _mm512_set1_pch(0.0, 3.0);
19645 let r =
19646 _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19647 let e = _mm512_set1_pch(-2.0, 3.0);
19648 assert_eq_m512h(r, e);
19649 }
19650
19651 #[simd_test(enable = "avx512fp16")]
19652 unsafe fn test_mm512_mask_fmadd_round_pch() {
19653 let a = _mm512_set1_pch(0.0, 1.0);
19654 let b = _mm512_set1_pch(0.0, 2.0);
19655 let c = _mm512_set1_pch(0.0, 3.0);
19656 let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19657 a,
19658 0b0101010101010101,
19659 b,
19660 c,
19661 );
19662 let e = _mm512_setr_ph(
19663 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19664 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19665 );
19666 assert_eq_m512h(r, e);
19667 }
19668
19669 #[simd_test(enable = "avx512fp16")]
19670 unsafe fn test_mm512_mask3_fmadd_round_pch() {
19671 let a = _mm512_set1_pch(0.0, 1.0);
19672 let b = _mm512_set1_pch(0.0, 2.0);
19673 let c = _mm512_set1_pch(0.0, 3.0);
19674 let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19675 a,
19676 b,
19677 c,
19678 0b0101010101010101,
19679 );
19680 let e = _mm512_setr_ph(
19681 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19682 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19683 );
19684 assert_eq_m512h(r, e);
19685 }
19686
19687 #[simd_test(enable = "avx512fp16")]
19688 unsafe fn test_mm512_maskz_fmadd_round_pch() {
19689 let a = _mm512_set1_pch(0.0, 1.0);
19690 let b = _mm512_set1_pch(0.0, 2.0);
19691 let c = _mm512_set1_pch(0.0, 3.0);
19692 let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19693 0b0101010101010101,
19694 a,
19695 b,
19696 c,
19697 );
19698 let e = _mm512_setr_ph(
19699 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19700 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19701 );
19702 assert_eq_m512h(r, e);
19703 }
19704
19705 #[simd_test(enable = "avx512fp16")]
19706 unsafe fn test_mm_fmadd_sch() {
19707 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19708 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19709 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19710 let r = _mm_fmadd_sch(a, b, c);
19711 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19712 assert_eq_m128h(r, e);
19713 }
19714
19715 #[simd_test(enable = "avx512fp16")]
19716 unsafe fn test_mm_mask_fmadd_sch() {
19717 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19718 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19719 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19720 let r = _mm_mask_fmadd_sch(a, 0, b, c);
19721 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19722 assert_eq_m128h(r, e);
19723 let r = _mm_mask_fmadd_sch(a, 1, b, c);
19724 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19725 assert_eq_m128h(r, e);
19726 }
19727
19728 #[simd_test(enable = "avx512fp16")]
19729 unsafe fn test_mm_mask3_fmadd_sch() {
19730 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19731 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19732 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19733 let r = _mm_mask3_fmadd_sch(a, b, c, 0);
19734 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19735 assert_eq_m128h(r, e);
19736 let r = _mm_mask3_fmadd_sch(a, b, c, 1);
19737 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19738 assert_eq_m128h(r, e);
19739 }
19740
19741 #[simd_test(enable = "avx512fp16")]
19742 unsafe fn test_mm_maskz_fmadd_sch() {
19743 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19744 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19745 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19746 let r = _mm_maskz_fmadd_sch(0, a, b, c);
19747 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19748 assert_eq_m128h(r, e);
19749 let r = _mm_maskz_fmadd_sch(1, a, b, c);
19750 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19751 assert_eq_m128h(r, e);
19752 }
19753
19754 #[simd_test(enable = "avx512fp16")]
19755 unsafe fn test_mm_fmadd_round_sch() {
19756 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19757 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19758 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19759 let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19760 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19761 assert_eq_m128h(r, e);
19762 }
19763
19764 #[simd_test(enable = "avx512fp16")]
19765 unsafe fn test_mm_mask_fmadd_round_sch() {
19766 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19767 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19768 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19769 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19770 a, 0, b, c,
19771 );
19772 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19773 assert_eq_m128h(r, e);
19774 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19775 a, 1, b, c,
19776 );
19777 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19778 assert_eq_m128h(r, e);
19779 }
19780
19781 #[simd_test(enable = "avx512fp16")]
19782 unsafe fn test_mm_mask3_fmadd_round_sch() {
19783 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19784 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19785 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19786 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19787 a, b, c, 0,
19788 );
19789 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19790 assert_eq_m128h(r, e);
19791 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19792 a, b, c, 1,
19793 );
19794 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19795 assert_eq_m128h(r, e);
19796 }
19797
19798 #[simd_test(enable = "avx512fp16")]
19799 unsafe fn test_mm_maskz_fmadd_round_sch() {
19800 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19801 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19802 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19803 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19804 0, a, b, c,
19805 );
19806 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19807 assert_eq_m128h(r, e);
19808 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19809 1, a, b, c,
19810 );
19811 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19812 assert_eq_m128h(r, e);
19813 }
19814
19815 #[simd_test(enable = "avx512fp16,avx512vl")]
19816 unsafe fn test_mm_fcmadd_pch() {
19817 let a = _mm_set1_pch(0.0, 1.0);
19818 let b = _mm_set1_pch(0.0, 2.0);
19819 let c = _mm_set1_pch(0.0, 3.0);
19820 let r = _mm_fcmadd_pch(a, b, c);
19821 let e = _mm_set1_pch(2.0, 3.0);
19822 assert_eq_m128h(r, e);
19823 }
19824
19825 #[simd_test(enable = "avx512fp16,avx512vl")]
19826 unsafe fn test_mm_mask_fcmadd_pch() {
19827 let a = _mm_set1_pch(0.0, 1.0);
19828 let b = _mm_set1_pch(0.0, 2.0);
19829 let c = _mm_set1_pch(0.0, 3.0);
19830 let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
19831 let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
19832 assert_eq_m128h(r, e);
19833 }
19834
19835 #[simd_test(enable = "avx512fp16,avx512vl")]
19836 unsafe fn test_mm_mask3_fcmadd_pch() {
19837 let a = _mm_set1_pch(0.0, 1.0);
19838 let b = _mm_set1_pch(0.0, 2.0);
19839 let c = _mm_set1_pch(0.0, 3.0);
19840 let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
19841 let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
19842 assert_eq_m128h(r, e);
19843 }
19844
19845 #[simd_test(enable = "avx512fp16,avx512vl")]
19846 unsafe fn test_mm_maskz_fcmadd_pch() {
19847 let a = _mm_set1_pch(0.0, 1.0);
19848 let b = _mm_set1_pch(0.0, 2.0);
19849 let c = _mm_set1_pch(0.0, 3.0);
19850 let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
19851 let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
19852 assert_eq_m128h(r, e);
19853 }
19854
19855 #[simd_test(enable = "avx512fp16,avx512vl")]
19856 unsafe fn test_mm256_fcmadd_pch() {
19857 let a = _mm256_set1_pch(0.0, 1.0);
19858 let b = _mm256_set1_pch(0.0, 2.0);
19859 let c = _mm256_set1_pch(0.0, 3.0);
19860 let r = _mm256_fcmadd_pch(a, b, c);
19861 let e = _mm256_set1_pch(2.0, 3.0);
19862 assert_eq_m256h(r, e);
19863 }
19864
19865 #[simd_test(enable = "avx512fp16,avx512vl")]
19866 unsafe fn test_mm256_mask_fcmadd_pch() {
19867 let a = _mm256_set1_pch(0.0, 1.0);
19868 let b = _mm256_set1_pch(0.0, 2.0);
19869 let c = _mm256_set1_pch(0.0, 3.0);
19870 let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
19871 let e = _mm256_setr_ph(
19872 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19873 );
19874 assert_eq_m256h(r, e);
19875 }
19876
19877 #[simd_test(enable = "avx512fp16,avx512vl")]
19878 unsafe fn test_mm256_mask3_fcmadd_pch() {
19879 let a = _mm256_set1_pch(0.0, 1.0);
19880 let b = _mm256_set1_pch(0.0, 2.0);
19881 let c = _mm256_set1_pch(0.0, 3.0);
19882 let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
19883 let e = _mm256_setr_ph(
19884 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19885 );
19886 assert_eq_m256h(r, e);
19887 }
19888
19889 #[simd_test(enable = "avx512fp16,avx512vl")]
19890 unsafe fn test_mm256_maskz_fcmadd_pch() {
19891 let a = _mm256_set1_pch(0.0, 1.0);
19892 let b = _mm256_set1_pch(0.0, 2.0);
19893 let c = _mm256_set1_pch(0.0, 3.0);
19894 let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
19895 let e = _mm256_setr_ph(
19896 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
19897 );
19898 assert_eq_m256h(r, e);
19899 }
19900
19901 #[simd_test(enable = "avx512fp16")]
19902 unsafe fn test_mm512_fcmadd_pch() {
19903 let a = _mm512_set1_pch(0.0, 1.0);
19904 let b = _mm512_set1_pch(0.0, 2.0);
19905 let c = _mm512_set1_pch(0.0, 3.0);
19906 let r = _mm512_fcmadd_pch(a, b, c);
19907 let e = _mm512_set1_pch(2.0, 3.0);
19908 assert_eq_m512h(r, e);
19909 }
19910
19911 #[simd_test(enable = "avx512fp16")]
19912 unsafe fn test_mm512_mask_fcmadd_pch() {
19913 let a = _mm512_set1_pch(0.0, 1.0);
19914 let b = _mm512_set1_pch(0.0, 2.0);
19915 let c = _mm512_set1_pch(0.0, 3.0);
19916 let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
19917 let e = _mm512_setr_ph(
19918 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
19919 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19920 );
19921 assert_eq_m512h(r, e);
19922 }
19923
19924 #[simd_test(enable = "avx512fp16")]
19925 unsafe fn test_mm512_mask3_fcmadd_pch() {
19926 let a = _mm512_set1_pch(0.0, 1.0);
19927 let b = _mm512_set1_pch(0.0, 2.0);
19928 let c = _mm512_set1_pch(0.0, 3.0);
19929 let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
19930 let e = _mm512_setr_ph(
19931 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
19932 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19933 );
19934 assert_eq_m512h(r, e);
19935 }
19936
19937 #[simd_test(enable = "avx512fp16")]
19938 unsafe fn test_mm512_maskz_fcmadd_pch() {
19939 let a = _mm512_set1_pch(0.0, 1.0);
19940 let b = _mm512_set1_pch(0.0, 2.0);
19941 let c = _mm512_set1_pch(0.0, 3.0);
19942 let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
19943 let e = _mm512_setr_ph(
19944 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
19945 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
19946 );
19947 assert_eq_m512h(r, e);
19948 }
19949
19950 #[simd_test(enable = "avx512fp16")]
19951 unsafe fn test_mm512_fcmadd_round_pch() {
19952 let a = _mm512_set1_pch(0.0, 1.0);
19953 let b = _mm512_set1_pch(0.0, 2.0);
19954 let c = _mm512_set1_pch(0.0, 3.0);
19955 let r =
19956 _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19957 let e = _mm512_set1_pch(2.0, 3.0);
19958 assert_eq_m512h(r, e);
19959 }
19960
19961 #[simd_test(enable = "avx512fp16")]
19962 unsafe fn test_mm512_mask_fcmadd_round_pch() {
19963 let a = _mm512_set1_pch(0.0, 1.0);
19964 let b = _mm512_set1_pch(0.0, 2.0);
19965 let c = _mm512_set1_pch(0.0, 3.0);
19966 let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19967 a,
19968 0b0101010101010101,
19969 b,
19970 c,
19971 );
19972 let e = _mm512_setr_ph(
19973 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
19974 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19975 );
19976 assert_eq_m512h(r, e);
19977 }
19978
19979 #[simd_test(enable = "avx512fp16")]
19980 unsafe fn test_mm512_mask3_fcmadd_round_pch() {
19981 let a = _mm512_set1_pch(0.0, 1.0);
19982 let b = _mm512_set1_pch(0.0, 2.0);
19983 let c = _mm512_set1_pch(0.0, 3.0);
19984 let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19985 a,
19986 b,
19987 c,
19988 0b0101010101010101,
19989 );
19990 let e = _mm512_setr_ph(
19991 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
19992 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19993 );
19994 assert_eq_m512h(r, e);
19995 }
19996
19997 #[simd_test(enable = "avx512fp16")]
19998 unsafe fn test_mm512_maskz_fcmadd_round_pch() {
19999 let a = _mm512_set1_pch(0.0, 1.0);
20000 let b = _mm512_set1_pch(0.0, 2.0);
20001 let c = _mm512_set1_pch(0.0, 3.0);
20002 let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20003 0b0101010101010101,
20004 a,
20005 b,
20006 c,
20007 );
20008 let e = _mm512_setr_ph(
20009 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20010 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20011 );
20012 assert_eq_m512h(r, e);
20013 }
20014
20015 #[simd_test(enable = "avx512fp16")]
20016 unsafe fn test_mm_fcmadd_sch() {
20017 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20018 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20019 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20020 let r = _mm_fcmadd_sch(a, b, c);
20021 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20022 assert_eq_m128h(r, e);
20023 }
20024
20025 #[simd_test(enable = "avx512fp16")]
20026 unsafe fn test_mm_mask_fcmadd_sch() {
20027 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20028 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20029 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20030 let r = _mm_mask_fcmadd_sch(a, 0, b, c);
20031 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20032 assert_eq_m128h(r, e);
20033 let r = _mm_mask_fcmadd_sch(a, 1, b, c);
20034 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20035 assert_eq_m128h(r, e);
20036 }
20037
20038 #[simd_test(enable = "avx512fp16")]
20039 unsafe fn test_mm_mask3_fcmadd_sch() {
20040 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20041 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20042 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20043 let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
20044 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20045 assert_eq_m128h(r, e);
20046 let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
20047 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20048 assert_eq_m128h(r, e);
20049 }
20050
20051 #[simd_test(enable = "avx512fp16")]
20052 unsafe fn test_mm_maskz_fcmadd_sch() {
20053 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20054 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20055 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20056 let r = _mm_maskz_fcmadd_sch(0, a, b, c);
20057 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20058 assert_eq_m128h(r, e);
20059 let r = _mm_maskz_fcmadd_sch(1, a, b, c);
20060 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20061 assert_eq_m128h(r, e);
20062 }
20063
20064 #[simd_test(enable = "avx512fp16")]
20065 unsafe fn test_mm_fcmadd_round_sch() {
20066 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20067 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20068 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20069 let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20070 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20071 assert_eq_m128h(r, e);
20072 }
20073
20074 #[simd_test(enable = "avx512fp16")]
20075 unsafe fn test_mm_mask_fcmadd_round_sch() {
20076 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20077 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20078 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20079 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20080 a, 0, b, c,
20081 );
20082 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20083 assert_eq_m128h(r, e);
20084 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20085 a, 1, b, c,
20086 );
20087 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20088 assert_eq_m128h(r, e);
20089 }
20090
20091 #[simd_test(enable = "avx512fp16")]
20092 unsafe fn test_mm_mask3_fcmadd_round_sch() {
20093 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20094 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20095 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20096 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20097 a, b, c, 0,
20098 );
20099 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20100 assert_eq_m128h(r, e);
20101 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20102 a, b, c, 1,
20103 );
20104 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20105 assert_eq_m128h(r, e);
20106 }
20107
20108 #[simd_test(enable = "avx512fp16")]
20109 unsafe fn test_mm_maskz_fcmadd_round_sch() {
20110 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20111 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20112 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20113 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20114 0, a, b, c,
20115 );
20116 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20117 assert_eq_m128h(r, e);
20118 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20119 1, a, b, c,
20120 );
20121 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20122 assert_eq_m128h(r, e);
20123 }
20124
20125 #[simd_test(enable = "avx512fp16,avx512vl")]
20126 unsafe fn test_mm_fmadd_ph() {
20127 let a = _mm_set1_ph(1.0);
20128 let b = _mm_set1_ph(2.0);
20129 let c = _mm_set1_ph(3.0);
20130 let r = _mm_fmadd_ph(a, b, c);
20131 let e = _mm_set1_ph(5.0);
20132 assert_eq_m128h(r, e);
20133 }
20134
20135 #[simd_test(enable = "avx512fp16,avx512vl")]
20136 unsafe fn test_mm_mask_fmadd_ph() {
20137 let a = _mm_set1_ph(1.0);
20138 let b = _mm_set1_ph(2.0);
20139 let c = _mm_set1_ph(3.0);
20140 let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
20141 let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
20142 assert_eq_m128h(r, e);
20143 }
20144
20145 #[simd_test(enable = "avx512fp16,avx512vl")]
20146 unsafe fn test_mm_mask3_fmadd_ph() {
20147 let a = _mm_set1_ph(1.0);
20148 let b = _mm_set1_ph(2.0);
20149 let c = _mm_set1_ph(3.0);
20150 let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
20151 let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
20152 assert_eq_m128h(r, e);
20153 }
20154
20155 #[simd_test(enable = "avx512fp16,avx512vl")]
20156 unsafe fn test_mm_maskz_fmadd_ph() {
20157 let a = _mm_set1_ph(1.0);
20158 let b = _mm_set1_ph(2.0);
20159 let c = _mm_set1_ph(3.0);
20160 let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
20161 let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
20162 assert_eq_m128h(r, e);
20163 }
20164
20165 #[simd_test(enable = "avx512fp16,avx512vl")]
20166 unsafe fn test_mm256_fmadd_ph() {
20167 let a = _mm256_set1_ph(1.0);
20168 let b = _mm256_set1_ph(2.0);
20169 let c = _mm256_set1_ph(3.0);
20170 let r = _mm256_fmadd_ph(a, b, c);
20171 let e = _mm256_set1_ph(5.0);
20172 assert_eq_m256h(r, e);
20173 }
20174
20175 #[simd_test(enable = "avx512fp16,avx512vl")]
20176 unsafe fn test_mm256_mask_fmadd_ph() {
20177 let a = _mm256_set1_ph(1.0);
20178 let b = _mm256_set1_ph(2.0);
20179 let c = _mm256_set1_ph(3.0);
20180 let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
20181 let e = _mm256_set_ph(
20182 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20183 );
20184 assert_eq_m256h(r, e);
20185 }
20186
20187 #[simd_test(enable = "avx512fp16,avx512vl")]
20188 unsafe fn test_mm256_mask3_fmadd_ph() {
20189 let a = _mm256_set1_ph(1.0);
20190 let b = _mm256_set1_ph(2.0);
20191 let c = _mm256_set1_ph(3.0);
20192 let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
20193 let e = _mm256_set_ph(
20194 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20195 );
20196 assert_eq_m256h(r, e);
20197 }
20198
20199 #[simd_test(enable = "avx512fp16,avx512vl")]
20200 unsafe fn test_mm256_maskz_fmadd_ph() {
20201 let a = _mm256_set1_ph(1.0);
20202 let b = _mm256_set1_ph(2.0);
20203 let c = _mm256_set1_ph(3.0);
20204 let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
20205 let e = _mm256_set_ph(
20206 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20207 );
20208 assert_eq_m256h(r, e);
20209 }
20210
20211 #[simd_test(enable = "avx512fp16")]
20212 unsafe fn test_mm512_fmadd_ph() {
20213 let a = _mm512_set1_ph(1.0);
20214 let b = _mm512_set1_ph(2.0);
20215 let c = _mm512_set1_ph(3.0);
20216 let r = _mm512_fmadd_ph(a, b, c);
20217 let e = _mm512_set1_ph(5.0);
20218 assert_eq_m512h(r, e);
20219 }
20220
20221 #[simd_test(enable = "avx512fp16")]
20222 unsafe fn test_mm512_mask_fmadd_ph() {
20223 let a = _mm512_set1_ph(1.0);
20224 let b = _mm512_set1_ph(2.0);
20225 let c = _mm512_set1_ph(3.0);
20226 let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20227 let e = _mm512_set_ph(
20228 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20229 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20230 );
20231 assert_eq_m512h(r, e);
20232 }
20233
20234 #[simd_test(enable = "avx512fp16")]
20235 unsafe fn test_mm512_mask3_fmadd_ph() {
20236 let a = _mm512_set1_ph(1.0);
20237 let b = _mm512_set1_ph(2.0);
20238 let c = _mm512_set1_ph(3.0);
20239 let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20240 let e = _mm512_set_ph(
20241 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20242 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20243 );
20244 assert_eq_m512h(r, e);
20245 }
20246
20247 #[simd_test(enable = "avx512fp16")]
20248 unsafe fn test_mm512_maskz_fmadd_ph() {
20249 let a = _mm512_set1_ph(1.0);
20250 let b = _mm512_set1_ph(2.0);
20251 let c = _mm512_set1_ph(3.0);
20252 let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
20253 let e = _mm512_set_ph(
20254 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20255 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20256 );
20257 assert_eq_m512h(r, e);
20258 }
20259
20260 #[simd_test(enable = "avx512fp16")]
20261 unsafe fn test_mm512_fmadd_round_ph() {
20262 let a = _mm512_set1_ph(1.0);
20263 let b = _mm512_set1_ph(2.0);
20264 let c = _mm512_set1_ph(3.0);
20265 let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20266 let e = _mm512_set1_ph(5.0);
20267 assert_eq_m512h(r, e);
20268 }
20269
20270 #[simd_test(enable = "avx512fp16")]
20271 unsafe fn test_mm512_mask_fmadd_round_ph() {
20272 let a = _mm512_set1_ph(1.0);
20273 let b = _mm512_set1_ph(2.0);
20274 let c = _mm512_set1_ph(3.0);
20275 let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20276 a,
20277 0b01010101010101010101010101010101,
20278 b,
20279 c,
20280 );
20281 let e = _mm512_set_ph(
20282 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20283 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20284 );
20285 assert_eq_m512h(r, e);
20286 }
20287
20288 #[simd_test(enable = "avx512fp16")]
20289 unsafe fn test_mm512_mask3_fmadd_round_ph() {
20290 let a = _mm512_set1_ph(1.0);
20291 let b = _mm512_set1_ph(2.0);
20292 let c = _mm512_set1_ph(3.0);
20293 let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20294 a,
20295 b,
20296 c,
20297 0b01010101010101010101010101010101,
20298 );
20299 let e = _mm512_set_ph(
20300 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20301 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20302 );
20303 assert_eq_m512h(r, e);
20304 }
20305
20306 #[simd_test(enable = "avx512fp16")]
20307 unsafe fn test_mm512_maskz_fmadd_round_ph() {
20308 let a = _mm512_set1_ph(1.0);
20309 let b = _mm512_set1_ph(2.0);
20310 let c = _mm512_set1_ph(3.0);
20311 let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20312 0b01010101010101010101010101010101,
20313 a,
20314 b,
20315 c,
20316 );
20317 let e = _mm512_set_ph(
20318 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20319 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20320 );
20321 assert_eq_m512h(r, e);
20322 }
20323
20324 #[simd_test(enable = "avx512fp16")]
20325 unsafe fn test_mm_fmadd_sh() {
20326 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20327 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20328 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20329 let r = _mm_fmadd_sh(a, b, c);
20330 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20331 assert_eq_m128h(r, e);
20332 }
20333
20334 #[simd_test(enable = "avx512fp16")]
20335 unsafe fn test_mm_mask_fmadd_sh() {
20336 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20337 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20338 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20339 let r = _mm_mask_fmadd_sh(a, 0, b, c);
20340 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20341 assert_eq_m128h(r, e);
20342 let r = _mm_mask_fmadd_sh(a, 1, b, c);
20343 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20344 assert_eq_m128h(r, e);
20345 }
20346
20347 #[simd_test(enable = "avx512fp16")]
20348 unsafe fn test_mm_mask3_fmadd_sh() {
20349 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20350 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20351 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20352 let r = _mm_mask3_fmadd_sh(a, b, c, 0);
20353 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20354 assert_eq_m128h(r, e);
20355 let r = _mm_mask3_fmadd_sh(a, b, c, 1);
20356 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20357 assert_eq_m128h(r, e);
20358 }
20359
20360 #[simd_test(enable = "avx512fp16")]
20361 unsafe fn test_mm_maskz_fmadd_sh() {
20362 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20363 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20364 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20365 let r = _mm_maskz_fmadd_sh(0, a, b, c);
20366 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20367 assert_eq_m128h(r, e);
20368 let r = _mm_maskz_fmadd_sh(1, a, b, c);
20369 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20370 assert_eq_m128h(r, e);
20371 }
20372
20373 #[simd_test(enable = "avx512fp16")]
20374 unsafe fn test_mm_fmadd_round_sh() {
20375 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20376 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20377 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20378 let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20379 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20380 assert_eq_m128h(r, e);
20381 }
20382
20383 #[simd_test(enable = "avx512fp16")]
20384 unsafe fn test_mm_mask_fmadd_round_sh() {
20385 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20386 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20387 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20388 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20389 a, 0, b, c,
20390 );
20391 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20392 assert_eq_m128h(r, e);
20393 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20394 a, 1, b, c,
20395 );
20396 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20397 assert_eq_m128h(r, e);
20398 }
20399
20400 #[simd_test(enable = "avx512fp16")]
20401 unsafe fn test_mm_mask3_fmadd_round_sh() {
20402 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20403 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20404 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20405 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20406 a, b, c, 0,
20407 );
20408 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20409 assert_eq_m128h(r, e);
20410 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20411 a, b, c, 1,
20412 );
20413 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20414 assert_eq_m128h(r, e);
20415 }
20416
20417 #[simd_test(enable = "avx512fp16")]
20418 unsafe fn test_mm_maskz_fmadd_round_sh() {
20419 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20420 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20421 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20422 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20423 0, a, b, c,
20424 );
20425 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20426 assert_eq_m128h(r, e);
20427 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20428 1, a, b, c,
20429 );
20430 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20431 assert_eq_m128h(r, e);
20432 }
20433
20434 #[simd_test(enable = "avx512fp16,avx512vl")]
20435 unsafe fn test_mm_fmsub_ph() {
20436 let a = _mm_set1_ph(1.0);
20437 let b = _mm_set1_ph(2.0);
20438 let c = _mm_set1_ph(3.0);
20439 let r = _mm_fmsub_ph(a, b, c);
20440 let e = _mm_set1_ph(-1.0);
20441 assert_eq_m128h(r, e);
20442 }
20443
20444 #[simd_test(enable = "avx512fp16,avx512vl")]
20445 unsafe fn test_mm_mask_fmsub_ph() {
20446 let a = _mm_set1_ph(1.0);
20447 let b = _mm_set1_ph(2.0);
20448 let c = _mm_set1_ph(3.0);
20449 let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
20450 let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
20451 assert_eq_m128h(r, e);
20452 }
20453
20454 #[simd_test(enable = "avx512fp16,avx512vl")]
20455 unsafe fn test_mm_mask3_fmsub_ph() {
20456 let a = _mm_set1_ph(1.0);
20457 let b = _mm_set1_ph(2.0);
20458 let c = _mm_set1_ph(3.0);
20459 let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
20460 let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
20461 assert_eq_m128h(r, e);
20462 }
20463
20464 #[simd_test(enable = "avx512fp16,avx512vl")]
20465 unsafe fn test_mm_maskz_fmsub_ph() {
20466 let a = _mm_set1_ph(1.0);
20467 let b = _mm_set1_ph(2.0);
20468 let c = _mm_set1_ph(3.0);
20469 let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
20470 let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
20471 assert_eq_m128h(r, e);
20472 }
20473
20474 #[simd_test(enable = "avx512fp16,avx512vl")]
20475 unsafe fn test_mm256_fmsub_ph() {
20476 let a = _mm256_set1_ph(1.0);
20477 let b = _mm256_set1_ph(2.0);
20478 let c = _mm256_set1_ph(3.0);
20479 let r = _mm256_fmsub_ph(a, b, c);
20480 let e = _mm256_set1_ph(-1.0);
20481 assert_eq_m256h(r, e);
20482 }
20483
20484 #[simd_test(enable = "avx512fp16,avx512vl")]
20485 unsafe fn test_mm256_mask_fmsub_ph() {
20486 let a = _mm256_set1_ph(1.0);
20487 let b = _mm256_set1_ph(2.0);
20488 let c = _mm256_set1_ph(3.0);
20489 let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
20490 let e = _mm256_set_ph(
20491 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20492 );
20493 assert_eq_m256h(r, e);
20494 }
20495
20496 #[simd_test(enable = "avx512fp16,avx512vl")]
20497 unsafe fn test_mm256_mask3_fmsub_ph() {
20498 let a = _mm256_set1_ph(1.0);
20499 let b = _mm256_set1_ph(2.0);
20500 let c = _mm256_set1_ph(3.0);
20501 let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
20502 let e = _mm256_set_ph(
20503 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20504 );
20505 assert_eq_m256h(r, e);
20506 }
20507
20508 #[simd_test(enable = "avx512fp16,avx512vl")]
20509 unsafe fn test_mm256_maskz_fmsub_ph() {
20510 let a = _mm256_set1_ph(1.0);
20511 let b = _mm256_set1_ph(2.0);
20512 let c = _mm256_set1_ph(3.0);
20513 let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
20514 let e = _mm256_set_ph(
20515 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20516 );
20517 assert_eq_m256h(r, e);
20518 }
20519
20520 #[simd_test(enable = "avx512fp16")]
20521 unsafe fn test_mm512_fmsub_ph() {
20522 let a = _mm512_set1_ph(1.0);
20523 let b = _mm512_set1_ph(2.0);
20524 let c = _mm512_set1_ph(3.0);
20525 let r = _mm512_fmsub_ph(a, b, c);
20526 let e = _mm512_set1_ph(-1.0);
20527 assert_eq_m512h(r, e);
20528 }
20529
20530 #[simd_test(enable = "avx512fp16")]
20531 unsafe fn test_mm512_mask_fmsub_ph() {
20532 let a = _mm512_set1_ph(1.0);
20533 let b = _mm512_set1_ph(2.0);
20534 let c = _mm512_set1_ph(3.0);
20535 let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
20536 let e = _mm512_set_ph(
20537 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20538 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20539 );
20540 assert_eq_m512h(r, e);
20541 }
20542
20543 #[simd_test(enable = "avx512fp16")]
20544 unsafe fn test_mm512_mask3_fmsub_ph() {
20545 let a = _mm512_set1_ph(1.0);
20546 let b = _mm512_set1_ph(2.0);
20547 let c = _mm512_set1_ph(3.0);
20548 let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
20549 let e = _mm512_set_ph(
20550 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20551 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20552 );
20553 assert_eq_m512h(r, e);
20554 }
20555
20556 #[simd_test(enable = "avx512fp16")]
20557 unsafe fn test_mm512_maskz_fmsub_ph() {
20558 let a = _mm512_set1_ph(1.0);
20559 let b = _mm512_set1_ph(2.0);
20560 let c = _mm512_set1_ph(3.0);
20561 let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
20562 let e = _mm512_set_ph(
20563 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20564 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20565 );
20566 assert_eq_m512h(r, e);
20567 }
20568
20569 #[simd_test(enable = "avx512fp16")]
20570 unsafe fn test_mm512_fmsub_round_ph() {
20571 let a = _mm512_set1_ph(1.0);
20572 let b = _mm512_set1_ph(2.0);
20573 let c = _mm512_set1_ph(3.0);
20574 let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20575 let e = _mm512_set1_ph(-1.0);
20576 assert_eq_m512h(r, e);
20577 }
20578
20579 #[simd_test(enable = "avx512fp16")]
20580 unsafe fn test_mm512_mask_fmsub_round_ph() {
20581 let a = _mm512_set1_ph(1.0);
20582 let b = _mm512_set1_ph(2.0);
20583 let c = _mm512_set1_ph(3.0);
20584 let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20585 a,
20586 0b01010101010101010101010101010101,
20587 b,
20588 c,
20589 );
20590 let e = _mm512_set_ph(
20591 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20592 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20593 );
20594 assert_eq_m512h(r, e);
20595 }
20596
20597 #[simd_test(enable = "avx512fp16")]
20598 unsafe fn test_mm512_mask3_fmsub_round_ph() {
20599 let a = _mm512_set1_ph(1.0);
20600 let b = _mm512_set1_ph(2.0);
20601 let c = _mm512_set1_ph(3.0);
20602 let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20603 a,
20604 b,
20605 c,
20606 0b01010101010101010101010101010101,
20607 );
20608 let e = _mm512_set_ph(
20609 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20610 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20611 );
20612 assert_eq_m512h(r, e);
20613 }
20614
20615 #[simd_test(enable = "avx512fp16")]
20616 unsafe fn test_mm512_maskz_fmsub_round_ph() {
20617 let a = _mm512_set1_ph(1.0);
20618 let b = _mm512_set1_ph(2.0);
20619 let c = _mm512_set1_ph(3.0);
20620 let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20621 0b01010101010101010101010101010101,
20622 a,
20623 b,
20624 c,
20625 );
20626 let e = _mm512_set_ph(
20627 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20628 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20629 );
20630 assert_eq_m512h(r, e);
20631 }
20632
20633 #[simd_test(enable = "avx512fp16")]
20634 unsafe fn test_mm_fmsub_sh() {
20635 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20636 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20637 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20638 let r = _mm_fmsub_sh(a, b, c);
20639 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20640 assert_eq_m128h(r, e);
20641 }
20642
20643 #[simd_test(enable = "avx512fp16")]
20644 unsafe fn test_mm_mask_fmsub_sh() {
20645 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20646 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20647 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20648 let r = _mm_mask_fmsub_sh(a, 0, b, c);
20649 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20650 assert_eq_m128h(r, e);
20651 let r = _mm_mask_fmsub_sh(a, 1, b, c);
20652 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20653 assert_eq_m128h(r, e);
20654 }
20655
20656 #[simd_test(enable = "avx512fp16")]
20657 unsafe fn test_mm_mask3_fmsub_sh() {
20658 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20659 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20660 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20661 let r = _mm_mask3_fmsub_sh(a, b, c, 0);
20662 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20663 assert_eq_m128h(r, e);
20664 let r = _mm_mask3_fmsub_sh(a, b, c, 1);
20665 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20666 assert_eq_m128h(r, e);
20667 }
20668
20669 #[simd_test(enable = "avx512fp16")]
20670 unsafe fn test_mm_maskz_fmsub_sh() {
20671 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20672 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20673 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20674 let r = _mm_maskz_fmsub_sh(0, a, b, c);
20675 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20676 assert_eq_m128h(r, e);
20677 let r = _mm_maskz_fmsub_sh(1, a, b, c);
20678 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20679 assert_eq_m128h(r, e);
20680 }
20681
20682 #[simd_test(enable = "avx512fp16")]
20683 unsafe fn test_mm_fmsub_round_sh() {
20684 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20685 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20686 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20687 let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20688 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20689 assert_eq_m128h(r, e);
20690 }
20691
20692 #[simd_test(enable = "avx512fp16")]
20693 unsafe fn test_mm_mask_fmsub_round_sh() {
20694 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20695 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20696 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20697 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20698 a, 0, b, c,
20699 );
20700 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20701 assert_eq_m128h(r, e);
20702 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20703 a, 1, b, c,
20704 );
20705 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20706 assert_eq_m128h(r, e);
20707 }
20708
20709 #[simd_test(enable = "avx512fp16")]
20710 unsafe fn test_mm_mask3_fmsub_round_sh() {
20711 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20712 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20713 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20714 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20715 a, b, c, 0,
20716 );
20717 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20718 assert_eq_m128h(r, e);
20719 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20720 a, b, c, 1,
20721 );
20722 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20723 assert_eq_m128h(r, e);
20724 }
20725
20726 #[simd_test(enable = "avx512fp16")]
20727 unsafe fn test_mm_maskz_fmsub_round_sh() {
20728 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20729 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20730 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20731 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20732 0, a, b, c,
20733 );
20734 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20735 assert_eq_m128h(r, e);
20736 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20737 1, a, b, c,
20738 );
20739 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20740 assert_eq_m128h(r, e);
20741 }
20742
20743 #[simd_test(enable = "avx512fp16,avx512vl")]
20744 unsafe fn test_mm_fnmadd_ph() {
20745 let a = _mm_set1_ph(1.0);
20746 let b = _mm_set1_ph(2.0);
20747 let c = _mm_set1_ph(3.0);
20748 let r = _mm_fnmadd_ph(a, b, c);
20749 let e = _mm_set1_ph(1.0);
20750 assert_eq_m128h(r, e);
20751 }
20752
20753 #[simd_test(enable = "avx512fp16,avx512vl")]
20754 unsafe fn test_mm_mask_fnmadd_ph() {
20755 let a = _mm_set1_ph(1.0);
20756 let b = _mm_set1_ph(2.0);
20757 let c = _mm_set1_ph(3.0);
20758 let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
20759 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
20760 assert_eq_m128h(r, e);
20761 }
20762
20763 #[simd_test(enable = "avx512fp16,avx512vl")]
20764 unsafe fn test_mm_mask3_fnmadd_ph() {
20765 let a = _mm_set1_ph(1.0);
20766 let b = _mm_set1_ph(2.0);
20767 let c = _mm_set1_ph(3.0);
20768 let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
20769 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
20770 assert_eq_m128h(r, e);
20771 }
20772
20773 #[simd_test(enable = "avx512fp16,avx512vl")]
20774 unsafe fn test_mm_maskz_fnmadd_ph() {
20775 let a = _mm_set1_ph(1.0);
20776 let b = _mm_set1_ph(2.0);
20777 let c = _mm_set1_ph(3.0);
20778 let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
20779 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
20780 assert_eq_m128h(r, e);
20781 }
20782
20783 #[simd_test(enable = "avx512fp16,avx512vl")]
20784 unsafe fn test_mm256_fnmadd_ph() {
20785 let a = _mm256_set1_ph(1.0);
20786 let b = _mm256_set1_ph(2.0);
20787 let c = _mm256_set1_ph(3.0);
20788 let r = _mm256_fnmadd_ph(a, b, c);
20789 let e = _mm256_set1_ph(1.0);
20790 assert_eq_m256h(r, e);
20791 }
20792
20793 #[simd_test(enable = "avx512fp16,avx512vl")]
20794 unsafe fn test_mm256_mask_fnmadd_ph() {
20795 let a = _mm256_set1_ph(1.0);
20796 let b = _mm256_set1_ph(2.0);
20797 let c = _mm256_set1_ph(3.0);
20798 let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
20799 let e = _mm256_set_ph(
20800 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20801 );
20802 assert_eq_m256h(r, e);
20803 }
20804
20805 #[simd_test(enable = "avx512fp16,avx512vl")]
20806 unsafe fn test_mm256_mask3_fnmadd_ph() {
20807 let a = _mm256_set1_ph(1.0);
20808 let b = _mm256_set1_ph(2.0);
20809 let c = _mm256_set1_ph(3.0);
20810 let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
20811 let e = _mm256_set_ph(
20812 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20813 );
20814 assert_eq_m256h(r, e);
20815 }
20816
20817 #[simd_test(enable = "avx512fp16,avx512vl")]
20818 unsafe fn test_mm256_maskz_fnmadd_ph() {
20819 let a = _mm256_set1_ph(1.0);
20820 let b = _mm256_set1_ph(2.0);
20821 let c = _mm256_set1_ph(3.0);
20822 let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
20823 let e = _mm256_set_ph(
20824 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20825 );
20826 assert_eq_m256h(r, e);
20827 }
20828
20829 #[simd_test(enable = "avx512fp16")]
20830 unsafe fn test_mm512_fnmadd_ph() {
20831 let a = _mm512_set1_ph(1.0);
20832 let b = _mm512_set1_ph(2.0);
20833 let c = _mm512_set1_ph(3.0);
20834 let r = _mm512_fnmadd_ph(a, b, c);
20835 let e = _mm512_set1_ph(1.0);
20836 assert_eq_m512h(r, e);
20837 }
20838
20839 #[simd_test(enable = "avx512fp16")]
20840 unsafe fn test_mm512_mask_fnmadd_ph() {
20841 let a = _mm512_set1_ph(1.0);
20842 let b = _mm512_set1_ph(2.0);
20843 let c = _mm512_set1_ph(3.0);
20844 let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20845 let e = _mm512_set_ph(
20846 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20847 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20848 );
20849 assert_eq_m512h(r, e);
20850 }
20851
20852 #[simd_test(enable = "avx512fp16")]
20853 unsafe fn test_mm512_mask3_fnmadd_ph() {
20854 let a = _mm512_set1_ph(1.0);
20855 let b = _mm512_set1_ph(2.0);
20856 let c = _mm512_set1_ph(3.0);
20857 let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20858 let e = _mm512_set_ph(
20859 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20860 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20861 );
20862 assert_eq_m512h(r, e);
20863 }
20864
20865 #[simd_test(enable = "avx512fp16")]
20866 unsafe fn test_mm512_maskz_fnmadd_ph() {
20867 let a = _mm512_set1_ph(1.0);
20868 let b = _mm512_set1_ph(2.0);
20869 let c = _mm512_set1_ph(3.0);
20870 let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
20871 let e = _mm512_set_ph(
20872 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
20873 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20874 );
20875 assert_eq_m512h(r, e);
20876 }
20877
20878 #[simd_test(enable = "avx512fp16")]
20879 unsafe fn test_mm512_fnmadd_round_ph() {
20880 let a = _mm512_set1_ph(1.0);
20881 let b = _mm512_set1_ph(2.0);
20882 let c = _mm512_set1_ph(3.0);
20883 let r =
20884 _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20885 let e = _mm512_set1_ph(1.0);
20886 assert_eq_m512h(r, e);
20887 }
20888
20889 #[simd_test(enable = "avx512fp16")]
20890 unsafe fn test_mm512_mask_fnmadd_round_ph() {
20891 let a = _mm512_set1_ph(1.0);
20892 let b = _mm512_set1_ph(2.0);
20893 let c = _mm512_set1_ph(3.0);
20894 let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20895 a,
20896 0b01010101010101010101010101010101,
20897 b,
20898 c,
20899 );
20900 let e = _mm512_set_ph(
20901 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20902 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20903 );
20904 assert_eq_m512h(r, e);
20905 }
20906
20907 #[simd_test(enable = "avx512fp16")]
20908 unsafe fn test_mm512_mask3_fnmadd_round_ph() {
20909 let a = _mm512_set1_ph(1.0);
20910 let b = _mm512_set1_ph(2.0);
20911 let c = _mm512_set1_ph(3.0);
20912 let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20913 a,
20914 b,
20915 c,
20916 0b01010101010101010101010101010101,
20917 );
20918 let e = _mm512_set_ph(
20919 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20920 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20921 );
20922 assert_eq_m512h(r, e);
20923 }
20924
20925 #[simd_test(enable = "avx512fp16")]
20926 unsafe fn test_mm512_maskz_fnmadd_round_ph() {
20927 let a = _mm512_set1_ph(1.0);
20928 let b = _mm512_set1_ph(2.0);
20929 let c = _mm512_set1_ph(3.0);
20930 let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20931 0b01010101010101010101010101010101,
20932 a,
20933 b,
20934 c,
20935 );
20936 let e = _mm512_set_ph(
20937 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
20938 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20939 );
20940 assert_eq_m512h(r, e);
20941 }
20942
20943 #[simd_test(enable = "avx512fp16")]
20944 unsafe fn test_mm_fnmadd_sh() {
20945 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20946 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20947 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20948 let r = _mm_fnmadd_sh(a, b, c);
20949 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20950 assert_eq_m128h(r, e);
20951 }
20952
20953 #[simd_test(enable = "avx512fp16")]
20954 unsafe fn test_mm_mask_fnmadd_sh() {
20955 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20956 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20957 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20958 let r = _mm_mask_fnmadd_sh(a, 0, b, c);
20959 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20960 assert_eq_m128h(r, e);
20961 let r = _mm_mask_fnmadd_sh(a, 1, b, c);
20962 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20963 assert_eq_m128h(r, e);
20964 }
20965
20966 #[simd_test(enable = "avx512fp16")]
20967 unsafe fn test_mm_mask3_fnmadd_sh() {
20968 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20969 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20970 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20971 let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
20972 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20973 assert_eq_m128h(r, e);
20974 let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
20975 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
20976 assert_eq_m128h(r, e);
20977 }
20978
20979 #[simd_test(enable = "avx512fp16")]
20980 unsafe fn test_mm_maskz_fnmadd_sh() {
20981 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20982 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20983 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20984 let r = _mm_maskz_fnmadd_sh(0, a, b, c);
20985 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20986 assert_eq_m128h(r, e);
20987 let r = _mm_maskz_fnmadd_sh(1, a, b, c);
20988 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20989 assert_eq_m128h(r, e);
20990 }
20991
20992 #[simd_test(enable = "avx512fp16")]
20993 unsafe fn test_mm_fnmadd_round_sh() {
20994 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20995 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20996 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20997 let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20998 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20999 assert_eq_m128h(r, e);
21000 }
21001
21002 #[simd_test(enable = "avx512fp16")]
21003 unsafe fn test_mm_mask_fnmadd_round_sh() {
21004 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21005 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21006 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21007 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21008 a, 0, b, c,
21009 );
21010 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21011 assert_eq_m128h(r, e);
21012 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21013 a, 1, b, c,
21014 );
21015 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21016 assert_eq_m128h(r, e);
21017 }
21018
21019 #[simd_test(enable = "avx512fp16")]
21020 unsafe fn test_mm_mask3_fnmadd_round_sh() {
21021 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21022 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21023 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21024 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21025 a, b, c, 0,
21026 );
21027 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21028 assert_eq_m128h(r, e);
21029 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21030 a, b, c, 1,
21031 );
21032 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21033 assert_eq_m128h(r, e);
21034 }
21035
21036 #[simd_test(enable = "avx512fp16")]
21037 unsafe fn test_mm_maskz_fnmadd_round_sh() {
21038 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21039 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21040 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21041 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21042 0, a, b, c,
21043 );
21044 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21045 assert_eq_m128h(r, e);
21046 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21047 1, a, b, c,
21048 );
21049 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21050 assert_eq_m128h(r, e);
21051 }
21052
21053 #[simd_test(enable = "avx512fp16,avx512vl")]
21054 unsafe fn test_mm_fnmsub_ph() {
21055 let a = _mm_set1_ph(1.0);
21056 let b = _mm_set1_ph(2.0);
21057 let c = _mm_set1_ph(3.0);
21058 let r = _mm_fnmsub_ph(a, b, c);
21059 let e = _mm_set1_ph(-5.0);
21060 assert_eq_m128h(r, e);
21061 }
21062
21063 #[simd_test(enable = "avx512fp16,avx512vl")]
21064 unsafe fn test_mm_mask_fnmsub_ph() {
21065 let a = _mm_set1_ph(1.0);
21066 let b = _mm_set1_ph(2.0);
21067 let c = _mm_set1_ph(3.0);
21068 let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
21069 let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
21070 assert_eq_m128h(r, e);
21071 }
21072
21073 #[simd_test(enable = "avx512fp16,avx512vl")]
21074 unsafe fn test_mm_mask3_fnmsub_ph() {
21075 let a = _mm_set1_ph(1.0);
21076 let b = _mm_set1_ph(2.0);
21077 let c = _mm_set1_ph(3.0);
21078 let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
21079 let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
21080 assert_eq_m128h(r, e);
21081 }
21082
21083 #[simd_test(enable = "avx512fp16,avx512vl")]
21084 unsafe fn test_mm_maskz_fnmsub_ph() {
21085 let a = _mm_set1_ph(1.0);
21086 let b = _mm_set1_ph(2.0);
21087 let c = _mm_set1_ph(3.0);
21088 let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
21089 let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
21090 assert_eq_m128h(r, e);
21091 }
21092
21093 #[simd_test(enable = "avx512fp16,avx512vl")]
21094 unsafe fn test_mm256_fnmsub_ph() {
21095 let a = _mm256_set1_ph(1.0);
21096 let b = _mm256_set1_ph(2.0);
21097 let c = _mm256_set1_ph(3.0);
21098 let r = _mm256_fnmsub_ph(a, b, c);
21099 let e = _mm256_set1_ph(-5.0);
21100 assert_eq_m256h(r, e);
21101 }
21102
21103 #[simd_test(enable = "avx512fp16,avx512vl")]
21104 unsafe fn test_mm256_mask_fnmsub_ph() {
21105 let a = _mm256_set1_ph(1.0);
21106 let b = _mm256_set1_ph(2.0);
21107 let c = _mm256_set1_ph(3.0);
21108 let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
21109 let e = _mm256_set_ph(
21110 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21111 );
21112 assert_eq_m256h(r, e);
21113 }
21114
21115 #[simd_test(enable = "avx512fp16,avx512vl")]
21116 unsafe fn test_mm256_mask3_fnmsub_ph() {
21117 let a = _mm256_set1_ph(1.0);
21118 let b = _mm256_set1_ph(2.0);
21119 let c = _mm256_set1_ph(3.0);
21120 let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
21121 let e = _mm256_set_ph(
21122 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21123 );
21124 assert_eq_m256h(r, e);
21125 }
21126
21127 #[simd_test(enable = "avx512fp16,avx512vl")]
21128 unsafe fn test_mm256_maskz_fnmsub_ph() {
21129 let a = _mm256_set1_ph(1.0);
21130 let b = _mm256_set1_ph(2.0);
21131 let c = _mm256_set1_ph(3.0);
21132 let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
21133 let e = _mm256_set_ph(
21134 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21135 );
21136 assert_eq_m256h(r, e);
21137 }
21138
21139 #[simd_test(enable = "avx512fp16")]
21140 unsafe fn test_mm512_fnmsub_ph() {
21141 let a = _mm512_set1_ph(1.0);
21142 let b = _mm512_set1_ph(2.0);
21143 let c = _mm512_set1_ph(3.0);
21144 let r = _mm512_fnmsub_ph(a, b, c);
21145 let e = _mm512_set1_ph(-5.0);
21146 assert_eq_m512h(r, e);
21147 }
21148
21149 #[simd_test(enable = "avx512fp16")]
21150 unsafe fn test_mm512_mask_fnmsub_ph() {
21151 let a = _mm512_set1_ph(1.0);
21152 let b = _mm512_set1_ph(2.0);
21153 let c = _mm512_set1_ph(3.0);
21154 let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
21155 let e = _mm512_set_ph(
21156 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21157 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21158 );
21159 assert_eq_m512h(r, e);
21160 }
21161
21162 #[simd_test(enable = "avx512fp16")]
21163 unsafe fn test_mm512_mask3_fnmsub_ph() {
21164 let a = _mm512_set1_ph(1.0);
21165 let b = _mm512_set1_ph(2.0);
21166 let c = _mm512_set1_ph(3.0);
21167 let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
21168 let e = _mm512_set_ph(
21169 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21170 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21171 );
21172 assert_eq_m512h(r, e);
21173 }
21174
21175 #[simd_test(enable = "avx512fp16")]
21176 unsafe fn test_mm512_maskz_fnmsub_ph() {
21177 let a = _mm512_set1_ph(1.0);
21178 let b = _mm512_set1_ph(2.0);
21179 let c = _mm512_set1_ph(3.0);
21180 let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
21181 let e = _mm512_set_ph(
21182 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21183 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21184 );
21185 assert_eq_m512h(r, e);
21186 }
21187
21188 #[simd_test(enable = "avx512fp16")]
21189 unsafe fn test_mm512_fnmsub_round_ph() {
21190 let a = _mm512_set1_ph(1.0);
21191 let b = _mm512_set1_ph(2.0);
21192 let c = _mm512_set1_ph(3.0);
21193 let r =
21194 _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21195 let e = _mm512_set1_ph(-5.0);
21196 assert_eq_m512h(r, e);
21197 }
21198
21199 #[simd_test(enable = "avx512fp16")]
21200 unsafe fn test_mm512_mask_fnmsub_round_ph() {
21201 let a = _mm512_set1_ph(1.0);
21202 let b = _mm512_set1_ph(2.0);
21203 let c = _mm512_set1_ph(3.0);
21204 let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21205 a,
21206 0b01010101010101010101010101010101,
21207 b,
21208 c,
21209 );
21210 let e = _mm512_set_ph(
21211 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21212 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21213 );
21214 assert_eq_m512h(r, e);
21215 }
21216
21217 #[simd_test(enable = "avx512fp16")]
21218 unsafe fn test_mm512_mask3_fnmsub_round_ph() {
21219 let a = _mm512_set1_ph(1.0);
21220 let b = _mm512_set1_ph(2.0);
21221 let c = _mm512_set1_ph(3.0);
21222 let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21223 a,
21224 b,
21225 c,
21226 0b01010101010101010101010101010101,
21227 );
21228 let e = _mm512_set_ph(
21229 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21230 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21231 );
21232 assert_eq_m512h(r, e);
21233 }
21234
21235 #[simd_test(enable = "avx512fp16")]
21236 unsafe fn test_mm512_maskz_fnmsub_round_ph() {
21237 let a = _mm512_set1_ph(1.0);
21238 let b = _mm512_set1_ph(2.0);
21239 let c = _mm512_set1_ph(3.0);
21240 let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21241 0b01010101010101010101010101010101,
21242 a,
21243 b,
21244 c,
21245 );
21246 let e = _mm512_set_ph(
21247 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21248 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21249 );
21250 assert_eq_m512h(r, e);
21251 }
21252
21253 #[simd_test(enable = "avx512fp16")]
21254 unsafe fn test_mm_fnmsub_sh() {
21255 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21256 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21257 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21258 let r = _mm_fnmsub_sh(a, b, c);
21259 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21260 assert_eq_m128h(r, e);
21261 }
21262
21263 #[simd_test(enable = "avx512fp16")]
21264 unsafe fn test_mm_mask_fnmsub_sh() {
21265 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21266 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21267 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21268 let r = _mm_mask_fnmsub_sh(a, 0, b, c);
21269 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21270 assert_eq_m128h(r, e);
21271 let r = _mm_mask_fnmsub_sh(a, 1, b, c);
21272 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21273 assert_eq_m128h(r, e);
21274 }
21275
21276 #[simd_test(enable = "avx512fp16")]
21277 unsafe fn test_mm_mask3_fnmsub_sh() {
21278 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21279 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21280 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21281 let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
21282 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21283 assert_eq_m128h(r, e);
21284 let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
21285 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21286 assert_eq_m128h(r, e);
21287 }
21288
21289 #[simd_test(enable = "avx512fp16")]
21290 unsafe fn test_mm_maskz_fnmsub_sh() {
21291 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21292 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21293 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21294 let r = _mm_maskz_fnmsub_sh(0, a, b, c);
21295 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21296 assert_eq_m128h(r, e);
21297 let r = _mm_maskz_fnmsub_sh(1, a, b, c);
21298 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21299 assert_eq_m128h(r, e);
21300 }
21301
21302 #[simd_test(enable = "avx512fp16")]
21303 unsafe fn test_mm_fnmsub_round_sh() {
21304 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21305 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21306 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21307 let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21308 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21309 assert_eq_m128h(r, e);
21310 }
21311
21312 #[simd_test(enable = "avx512fp16")]
21313 unsafe fn test_mm_mask_fnmsub_round_sh() {
21314 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21315 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21316 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21317 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21318 a, 0, b, c,
21319 );
21320 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21321 assert_eq_m128h(r, e);
21322 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21323 a, 1, b, c,
21324 );
21325 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21326 assert_eq_m128h(r, e);
21327 }
21328
21329 #[simd_test(enable = "avx512fp16")]
21330 unsafe fn test_mm_mask3_fnmsub_round_sh() {
21331 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21332 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21333 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21334 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21335 a, b, c, 0,
21336 );
21337 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21338 assert_eq_m128h(r, e);
21339 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21340 a, b, c, 1,
21341 );
21342 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21343 assert_eq_m128h(r, e);
21344 }
21345
21346 #[simd_test(enable = "avx512fp16")]
21347 unsafe fn test_mm_maskz_fnmsub_round_sh() {
21348 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21349 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21350 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21351 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21352 0, a, b, c,
21353 );
21354 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21355 assert_eq_m128h(r, e);
21356 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21357 1, a, b, c,
21358 );
21359 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21360 assert_eq_m128h(r, e);
21361 }
21362
21363 #[simd_test(enable = "avx512fp16,avx512vl")]
21364 unsafe fn test_mm_fmaddsub_ph() {
21365 let a = _mm_set1_ph(1.0);
21366 let b = _mm_set1_ph(2.0);
21367 let c = _mm_set1_ph(3.0);
21368 let r = _mm_fmaddsub_ph(a, b, c);
21369 let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
21370 assert_eq_m128h(r, e);
21371 }
21372
21373 #[simd_test(enable = "avx512fp16,avx512vl")]
21374 unsafe fn test_mm_mask_fmaddsub_ph() {
21375 let a = _mm_set1_ph(1.0);
21376 let b = _mm_set1_ph(2.0);
21377 let c = _mm_set1_ph(3.0);
21378 let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
21379 let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
21380 assert_eq_m128h(r, e);
21381 }
21382
21383 #[simd_test(enable = "avx512fp16,avx512vl")]
21384 unsafe fn test_mm_mask3_fmaddsub_ph() {
21385 let a = _mm_set1_ph(1.0);
21386 let b = _mm_set1_ph(2.0);
21387 let c = _mm_set1_ph(3.0);
21388 let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
21389 let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
21390 assert_eq_m128h(r, e);
21391 }
21392
21393 #[simd_test(enable = "avx512fp16,avx512vl")]
21394 unsafe fn test_mm_maskz_fmaddsub_ph() {
21395 let a = _mm_set1_ph(1.0);
21396 let b = _mm_set1_ph(2.0);
21397 let c = _mm_set1_ph(3.0);
21398 let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
21399 let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
21400 assert_eq_m128h(r, e);
21401 }
21402
21403 #[simd_test(enable = "avx512fp16,avx512vl")]
21404 unsafe fn test_mm256_fmaddsub_ph() {
21405 let a = _mm256_set1_ph(1.0);
21406 let b = _mm256_set1_ph(2.0);
21407 let c = _mm256_set1_ph(3.0);
21408 let r = _mm256_fmaddsub_ph(a, b, c);
21409 let e = _mm256_set_ph(
21410 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21411 );
21412 assert_eq_m256h(r, e);
21413 }
21414
21415 #[simd_test(enable = "avx512fp16,avx512vl")]
21416 unsafe fn test_mm256_mask_fmaddsub_ph() {
21417 let a = _mm256_set1_ph(1.0);
21418 let b = _mm256_set1_ph(2.0);
21419 let c = _mm256_set1_ph(3.0);
21420 let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
21421 let e = _mm256_set_ph(
21422 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21423 );
21424 assert_eq_m256h(r, e);
21425 }
21426
21427 #[simd_test(enable = "avx512fp16,avx512vl")]
21428 unsafe fn test_mm256_mask3_fmaddsub_ph() {
21429 let a = _mm256_set1_ph(1.0);
21430 let b = _mm256_set1_ph(2.0);
21431 let c = _mm256_set1_ph(3.0);
21432 let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
21433 let e = _mm256_set_ph(
21434 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21435 );
21436 assert_eq_m256h(r, e);
21437 }
21438
21439 #[simd_test(enable = "avx512fp16,avx512vl")]
21440 unsafe fn test_mm256_maskz_fmaddsub_ph() {
21441 let a = _mm256_set1_ph(1.0);
21442 let b = _mm256_set1_ph(2.0);
21443 let c = _mm256_set1_ph(3.0);
21444 let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
21445 let e = _mm256_set_ph(
21446 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21447 );
21448 assert_eq_m256h(r, e);
21449 }
21450
21451 #[simd_test(enable = "avx512fp16")]
21452 unsafe fn test_mm512_fmaddsub_ph() {
21453 let a = _mm512_set1_ph(1.0);
21454 let b = _mm512_set1_ph(2.0);
21455 let c = _mm512_set1_ph(3.0);
21456 let r = _mm512_fmaddsub_ph(a, b, c);
21457 let e = _mm512_set_ph(
21458 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21459 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21460 );
21461 assert_eq_m512h(r, e);
21462 }
21463
21464 #[simd_test(enable = "avx512fp16")]
21465 unsafe fn test_mm512_mask_fmaddsub_ph() {
21466 let a = _mm512_set1_ph(1.0);
21467 let b = _mm512_set1_ph(2.0);
21468 let c = _mm512_set1_ph(3.0);
21469 let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
21470 let e = _mm512_set_ph(
21471 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21472 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21473 );
21474 assert_eq_m512h(r, e);
21475 }
21476
21477 #[simd_test(enable = "avx512fp16")]
21478 unsafe fn test_mm512_mask3_fmaddsub_ph() {
21479 let a = _mm512_set1_ph(1.0);
21480 let b = _mm512_set1_ph(2.0);
21481 let c = _mm512_set1_ph(3.0);
21482 let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
21483 let e = _mm512_set_ph(
21484 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21485 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21486 );
21487 assert_eq_m512h(r, e);
21488 }
21489
21490 #[simd_test(enable = "avx512fp16")]
21491 unsafe fn test_mm512_maskz_fmaddsub_ph() {
21492 let a = _mm512_set1_ph(1.0);
21493 let b = _mm512_set1_ph(2.0);
21494 let c = _mm512_set1_ph(3.0);
21495 let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
21496 let e = _mm512_set_ph(
21497 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21498 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21499 );
21500 assert_eq_m512h(r, e);
21501 }
21502
21503 #[simd_test(enable = "avx512fp16")]
21504 unsafe fn test_mm512_fmaddsub_round_ph() {
21505 let a = _mm512_set1_ph(1.0);
21506 let b = _mm512_set1_ph(2.0);
21507 let c = _mm512_set1_ph(3.0);
21508 let r =
21509 _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21510 let e = _mm512_set_ph(
21511 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21512 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21513 );
21514 assert_eq_m512h(r, e);
21515 }
21516
21517 #[simd_test(enable = "avx512fp16")]
21518 unsafe fn test_mm512_mask_fmaddsub_round_ph() {
21519 let a = _mm512_set1_ph(1.0);
21520 let b = _mm512_set1_ph(2.0);
21521 let c = _mm512_set1_ph(3.0);
21522 let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21523 a,
21524 0b00110011001100110011001100110011,
21525 b,
21526 c,
21527 );
21528 let e = _mm512_set_ph(
21529 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21530 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21531 );
21532 assert_eq_m512h(r, e);
21533 }
21534
21535 #[simd_test(enable = "avx512fp16")]
21536 unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
21537 let a = _mm512_set1_ph(1.0);
21538 let b = _mm512_set1_ph(2.0);
21539 let c = _mm512_set1_ph(3.0);
21540 let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21541 a,
21542 b,
21543 c,
21544 0b00110011001100110011001100110011,
21545 );
21546 let e = _mm512_set_ph(
21547 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21548 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21549 );
21550 assert_eq_m512h(r, e);
21551 }
21552
21553 #[simd_test(enable = "avx512fp16")]
21554 unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
21555 let a = _mm512_set1_ph(1.0);
21556 let b = _mm512_set1_ph(2.0);
21557 let c = _mm512_set1_ph(3.0);
21558 let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21559 0b00110011001100110011001100110011,
21560 a,
21561 b,
21562 c,
21563 );
21564 let e = _mm512_set_ph(
21565 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21566 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21567 );
21568 assert_eq_m512h(r, e);
21569 }
21570
21571 #[simd_test(enable = "avx512fp16,avx512vl")]
21572 unsafe fn test_mm_fmsubadd_ph() {
21573 let a = _mm_set1_ph(1.0);
21574 let b = _mm_set1_ph(2.0);
21575 let c = _mm_set1_ph(3.0);
21576 let r = _mm_fmsubadd_ph(a, b, c);
21577 let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
21578 assert_eq_m128h(r, e);
21579 }
21580
21581 #[simd_test(enable = "avx512fp16,avx512vl")]
21582 unsafe fn test_mm_mask_fmsubadd_ph() {
21583 let a = _mm_set1_ph(1.0);
21584 let b = _mm_set1_ph(2.0);
21585 let c = _mm_set1_ph(3.0);
21586 let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
21587 let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
21588 assert_eq_m128h(r, e);
21589 }
21590
21591 #[simd_test(enable = "avx512fp16,avx512vl")]
21592 unsafe fn test_mm_mask3_fmsubadd_ph() {
21593 let a = _mm_set1_ph(1.0);
21594 let b = _mm_set1_ph(2.0);
21595 let c = _mm_set1_ph(3.0);
21596 let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
21597 let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
21598 assert_eq_m128h(r, e);
21599 }
21600
21601 #[simd_test(enable = "avx512fp16,avx512vl")]
21602 unsafe fn test_mm_maskz_fmsubadd_ph() {
21603 let a = _mm_set1_ph(1.0);
21604 let b = _mm_set1_ph(2.0);
21605 let c = _mm_set1_ph(3.0);
21606 let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
21607 let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
21608 assert_eq_m128h(r, e);
21609 }
21610
21611 #[simd_test(enable = "avx512fp16,avx512vl")]
21612 unsafe fn test_mm256_fmsubadd_ph() {
21613 let a = _mm256_set1_ph(1.0);
21614 let b = _mm256_set1_ph(2.0);
21615 let c = _mm256_set1_ph(3.0);
21616 let r = _mm256_fmsubadd_ph(a, b, c);
21617 let e = _mm256_set_ph(
21618 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21619 );
21620 assert_eq_m256h(r, e);
21621 }
21622
21623 #[simd_test(enable = "avx512fp16,avx512vl")]
21624 unsafe fn test_mm256_mask_fmsubadd_ph() {
21625 let a = _mm256_set1_ph(1.0);
21626 let b = _mm256_set1_ph(2.0);
21627 let c = _mm256_set1_ph(3.0);
21628 let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
21629 let e = _mm256_set_ph(
21630 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21631 );
21632 assert_eq_m256h(r, e);
21633 }
21634
21635 #[simd_test(enable = "avx512fp16,avx512vl")]
21636 unsafe fn test_mm256_mask3_fmsubadd_ph() {
21637 let a = _mm256_set1_ph(1.0);
21638 let b = _mm256_set1_ph(2.0);
21639 let c = _mm256_set1_ph(3.0);
21640 let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
21641 let e = _mm256_set_ph(
21642 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21643 );
21644 assert_eq_m256h(r, e);
21645 }
21646
21647 #[simd_test(enable = "avx512fp16,avx512vl")]
21648 unsafe fn test_mm256_maskz_fmsubadd_ph() {
21649 let a = _mm256_set1_ph(1.0);
21650 let b = _mm256_set1_ph(2.0);
21651 let c = _mm256_set1_ph(3.0);
21652 let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
21653 let e = _mm256_set_ph(
21654 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21655 );
21656 assert_eq_m256h(r, e);
21657 }
21658
21659 #[simd_test(enable = "avx512fp16")]
21660 unsafe fn test_mm512_fmsubadd_ph() {
21661 let a = _mm512_set1_ph(1.0);
21662 let b = _mm512_set1_ph(2.0);
21663 let c = _mm512_set1_ph(3.0);
21664 let r = _mm512_fmsubadd_ph(a, b, c);
21665 let e = _mm512_set_ph(
21666 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21667 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21668 );
21669 assert_eq_m512h(r, e);
21670 }
21671
21672 #[simd_test(enable = "avx512fp16")]
21673 unsafe fn test_mm512_mask_fmsubadd_ph() {
21674 let a = _mm512_set1_ph(1.0);
21675 let b = _mm512_set1_ph(2.0);
21676 let c = _mm512_set1_ph(3.0);
21677 let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
21678 let e = _mm512_set_ph(
21679 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21680 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21681 );
21682 assert_eq_m512h(r, e);
21683 }
21684
21685 #[simd_test(enable = "avx512fp16")]
21686 unsafe fn test_mm512_mask3_fmsubadd_ph() {
21687 let a = _mm512_set1_ph(1.0);
21688 let b = _mm512_set1_ph(2.0);
21689 let c = _mm512_set1_ph(3.0);
21690 let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
21691 let e = _mm512_set_ph(
21692 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21693 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21694 );
21695 assert_eq_m512h(r, e);
21696 }
21697
21698 #[simd_test(enable = "avx512fp16")]
21699 unsafe fn test_mm512_maskz_fmsubadd_ph() {
21700 let a = _mm512_set1_ph(1.0);
21701 let b = _mm512_set1_ph(2.0);
21702 let c = _mm512_set1_ph(3.0);
21703 let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
21704 let e = _mm512_set_ph(
21705 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21706 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21707 );
21708 assert_eq_m512h(r, e);
21709 }
21710
21711 #[simd_test(enable = "avx512fp16")]
21712 unsafe fn test_mm512_fmsubadd_round_ph() {
21713 let a = _mm512_set1_ph(1.0);
21714 let b = _mm512_set1_ph(2.0);
21715 let c = _mm512_set1_ph(3.0);
21716 let r =
21717 _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21718 let e = _mm512_set_ph(
21719 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21720 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21721 );
21722 assert_eq_m512h(r, e);
21723 }
21724
21725 #[simd_test(enable = "avx512fp16")]
21726 unsafe fn test_mm512_mask_fmsubadd_round_ph() {
21727 let a = _mm512_set1_ph(1.0);
21728 let b = _mm512_set1_ph(2.0);
21729 let c = _mm512_set1_ph(3.0);
21730 let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21731 a,
21732 0b00110011001100110011001100110011,
21733 b,
21734 c,
21735 );
21736 let e = _mm512_set_ph(
21737 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21738 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21739 );
21740 assert_eq_m512h(r, e);
21741 }
21742
21743 #[simd_test(enable = "avx512fp16")]
21744 unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
21745 let a = _mm512_set1_ph(1.0);
21746 let b = _mm512_set1_ph(2.0);
21747 let c = _mm512_set1_ph(3.0);
21748 let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21749 a,
21750 b,
21751 c,
21752 0b00110011001100110011001100110011,
21753 );
21754 let e = _mm512_set_ph(
21755 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21756 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21757 );
21758 assert_eq_m512h(r, e);
21759 }
21760
21761 #[simd_test(enable = "avx512fp16")]
21762 unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
21763 let a = _mm512_set1_ph(1.0);
21764 let b = _mm512_set1_ph(2.0);
21765 let c = _mm512_set1_ph(3.0);
21766 let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21767 0b00110011001100110011001100110011,
21768 a,
21769 b,
21770 c,
21771 );
21772 let e = _mm512_set_ph(
21773 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21774 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21775 );
21776 assert_eq_m512h(r, e);
21777 }
21778
21779 #[simd_test(enable = "avx512fp16,avx512vl")]
21780 unsafe fn test_mm_rcp_ph() {
21781 let a = _mm_set1_ph(2.0);
21782 let r = _mm_rcp_ph(a);
21783 let e = _mm_set1_ph(0.5);
21784 assert_eq_m128h(r, e);
21785 }
21786
21787 #[simd_test(enable = "avx512fp16,avx512vl")]
21788 unsafe fn test_mm_mask_rcp_ph() {
21789 let a = _mm_set1_ph(2.0);
21790 let src = _mm_set1_ph(1.0);
21791 let r = _mm_mask_rcp_ph(src, 0b01010101, a);
21792 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21793 assert_eq_m128h(r, e);
21794 }
21795
21796 #[simd_test(enable = "avx512fp16,avx512vl")]
21797 unsafe fn test_mm_maskz_rcp_ph() {
21798 let a = _mm_set1_ph(2.0);
21799 let r = _mm_maskz_rcp_ph(0b01010101, a);
21800 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21801 assert_eq_m128h(r, e);
21802 }
21803
21804 #[simd_test(enable = "avx512fp16,avx512vl")]
21805 unsafe fn test_mm256_rcp_ph() {
21806 let a = _mm256_set1_ph(2.0);
21807 let r = _mm256_rcp_ph(a);
21808 let e = _mm256_set1_ph(0.5);
21809 assert_eq_m256h(r, e);
21810 }
21811
21812 #[simd_test(enable = "avx512fp16,avx512vl")]
21813 unsafe fn test_mm256_mask_rcp_ph() {
21814 let a = _mm256_set1_ph(2.0);
21815 let src = _mm256_set1_ph(1.0);
21816 let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
21817 let e = _mm256_set_ph(
21818 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21819 );
21820 assert_eq_m256h(r, e);
21821 }
21822
21823 #[simd_test(enable = "avx512fp16,avx512vl")]
21824 unsafe fn test_mm256_maskz_rcp_ph() {
21825 let a = _mm256_set1_ph(2.0);
21826 let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
21827 let e = _mm256_set_ph(
21828 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21829 );
21830 assert_eq_m256h(r, e);
21831 }
21832
21833 #[simd_test(enable = "avx512fp16")]
21834 unsafe fn test_mm512_rcp_ph() {
21835 let a = _mm512_set1_ph(2.0);
21836 let r = _mm512_rcp_ph(a);
21837 let e = _mm512_set1_ph(0.5);
21838 assert_eq_m512h(r, e);
21839 }
21840
21841 #[simd_test(enable = "avx512fp16")]
21842 unsafe fn test_mm512_mask_rcp_ph() {
21843 let a = _mm512_set1_ph(2.0);
21844 let src = _mm512_set1_ph(1.0);
21845 let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
21846 let e = _mm512_set_ph(
21847 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
21848 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21849 );
21850 assert_eq_m512h(r, e);
21851 }
21852
21853 #[simd_test(enable = "avx512fp16")]
21854 unsafe fn test_mm512_maskz_rcp_ph() {
21855 let a = _mm512_set1_ph(2.0);
21856 let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
21857 let e = _mm512_set_ph(
21858 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
21859 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21860 );
21861 assert_eq_m512h(r, e);
21862 }
21863
21864 #[simd_test(enable = "avx512fp16")]
21865 unsafe fn test_mm_rcp_sh() {
21866 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21867 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21868 let r = _mm_rcp_sh(a, b);
21869 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21870 assert_eq_m128h(r, e);
21871 }
21872
21873 #[simd_test(enable = "avx512fp16")]
21874 unsafe fn test_mm_mask_rcp_sh() {
21875 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21876 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21877 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
21878 let r = _mm_mask_rcp_sh(src, 0, a, b);
21879 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21880 assert_eq_m128h(r, e);
21881 let r = _mm_mask_rcp_sh(src, 1, a, b);
21882 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21883 assert_eq_m128h(r, e);
21884 }
21885
21886 #[simd_test(enable = "avx512fp16")]
21887 unsafe fn test_mm_maskz_rcp_sh() {
21888 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21889 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21890 let r = _mm_maskz_rcp_sh(0, a, b);
21891 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21892 assert_eq_m128h(r, e);
21893 let r = _mm_maskz_rcp_sh(1, a, b);
21894 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21895 assert_eq_m128h(r, e);
21896 }
21897
21898 #[simd_test(enable = "avx512fp16,avx512vl")]
21899 unsafe fn test_mm_rsqrt_ph() {
21900 let a = _mm_set1_ph(4.0);
21901 let r = _mm_rsqrt_ph(a);
21902 let e = _mm_set1_ph(0.5);
21903 assert_eq_m128h(r, e);
21904 }
21905
21906 #[simd_test(enable = "avx512fp16,avx512vl")]
21907 unsafe fn test_mm_mask_rsqrt_ph() {
21908 let a = _mm_set1_ph(4.0);
21909 let src = _mm_set1_ph(1.0);
21910 let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
21911 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21912 assert_eq_m128h(r, e);
21913 }
21914
21915 #[simd_test(enable = "avx512fp16,avx512vl")]
21916 unsafe fn test_mm_maskz_rsqrt_ph() {
21917 let a = _mm_set1_ph(4.0);
21918 let r = _mm_maskz_rsqrt_ph(0b01010101, a);
21919 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21920 assert_eq_m128h(r, e);
21921 }
21922
21923 #[simd_test(enable = "avx512fp16,avx512vl")]
21924 unsafe fn test_mm256_rsqrt_ph() {
21925 let a = _mm256_set1_ph(4.0);
21926 let r = _mm256_rsqrt_ph(a);
21927 let e = _mm256_set1_ph(0.5);
21928 assert_eq_m256h(r, e);
21929 }
21930
21931 #[simd_test(enable = "avx512fp16,avx512vl")]
21932 unsafe fn test_mm256_mask_rsqrt_ph() {
21933 let a = _mm256_set1_ph(4.0);
21934 let src = _mm256_set1_ph(1.0);
21935 let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
21936 let e = _mm256_set_ph(
21937 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21938 );
21939 assert_eq_m256h(r, e);
21940 }
21941
21942 #[simd_test(enable = "avx512fp16,avx512vl")]
21943 unsafe fn test_mm256_maskz_rsqrt_ph() {
21944 let a = _mm256_set1_ph(4.0);
21945 let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
21946 let e = _mm256_set_ph(
21947 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21948 );
21949 assert_eq_m256h(r, e);
21950 }
21951
21952 #[simd_test(enable = "avx512fp16")]
21953 unsafe fn test_mm512_rsqrt_ph() {
21954 let a = _mm512_set1_ph(4.0);
21955 let r = _mm512_rsqrt_ph(a);
21956 let e = _mm512_set1_ph(0.5);
21957 assert_eq_m512h(r, e);
21958 }
21959
21960 #[simd_test(enable = "avx512fp16")]
21961 unsafe fn test_mm512_mask_rsqrt_ph() {
21962 let a = _mm512_set1_ph(4.0);
21963 let src = _mm512_set1_ph(1.0);
21964 let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
21965 let e = _mm512_set_ph(
21966 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
21967 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21968 );
21969 assert_eq_m512h(r, e);
21970 }
21971
21972 #[simd_test(enable = "avx512fp16")]
21973 unsafe fn test_mm512_maskz_rsqrt_ph() {
21974 let a = _mm512_set1_ph(4.0);
21975 let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
21976 let e = _mm512_set_ph(
21977 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
21978 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21979 );
21980 assert_eq_m512h(r, e);
21981 }
21982
21983 #[simd_test(enable = "avx512fp16")]
21984 unsafe fn test_mm_rsqrt_sh() {
21985 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21986 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
21987 let r = _mm_rsqrt_sh(a, b);
21988 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21989 assert_eq_m128h(r, e);
21990 }
21991
21992 #[simd_test(enable = "avx512fp16")]
21993 unsafe fn test_mm_mask_rsqrt_sh() {
21994 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21995 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
21996 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
21997 let r = _mm_mask_rsqrt_sh(src, 0, a, b);
21998 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21999 assert_eq_m128h(r, e);
22000 let r = _mm_mask_rsqrt_sh(src, 1, a, b);
22001 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22002 assert_eq_m128h(r, e);
22003 }
22004
22005 #[simd_test(enable = "avx512fp16")]
22006 unsafe fn test_mm_maskz_rsqrt_sh() {
22007 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22008 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22009 let r = _mm_maskz_rsqrt_sh(0, a, b);
22010 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22011 assert_eq_m128h(r, e);
22012 let r = _mm_maskz_rsqrt_sh(1, a, b);
22013 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22014 assert_eq_m128h(r, e);
22015 }
22016
22017 #[simd_test(enable = "avx512fp16,avx512vl")]
22018 unsafe fn test_mm_sqrt_ph() {
22019 let a = _mm_set1_ph(4.0);
22020 let r = _mm_sqrt_ph(a);
22021 let e = _mm_set1_ph(2.0);
22022 assert_eq_m128h(r, e);
22023 }
22024
22025 #[simd_test(enable = "avx512fp16,avx512vl")]
22026 unsafe fn test_mm_mask_sqrt_ph() {
22027 let a = _mm_set1_ph(4.0);
22028 let src = _mm_set1_ph(1.0);
22029 let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
22030 let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
22031 assert_eq_m128h(r, e);
22032 }
22033
22034 #[simd_test(enable = "avx512fp16,avx512vl")]
22035 unsafe fn test_mm_maskz_sqrt_ph() {
22036 let a = _mm_set1_ph(4.0);
22037 let r = _mm_maskz_sqrt_ph(0b01010101, a);
22038 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22039 assert_eq_m128h(r, e);
22040 }
22041
22042 #[simd_test(enable = "avx512fp16,avx512vl")]
22043 unsafe fn test_mm256_sqrt_ph() {
22044 let a = _mm256_set1_ph(4.0);
22045 let r = _mm256_sqrt_ph(a);
22046 let e = _mm256_set1_ph(2.0);
22047 assert_eq_m256h(r, e);
22048 }
22049
22050 #[simd_test(enable = "avx512fp16,avx512vl")]
22051 unsafe fn test_mm256_mask_sqrt_ph() {
22052 let a = _mm256_set1_ph(4.0);
22053 let src = _mm256_set1_ph(1.0);
22054 let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
22055 let e = _mm256_set_ph(
22056 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22057 );
22058 assert_eq_m256h(r, e);
22059 }
22060
22061 #[simd_test(enable = "avx512fp16,avx512vl")]
22062 unsafe fn test_mm256_maskz_sqrt_ph() {
22063 let a = _mm256_set1_ph(4.0);
22064 let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
22065 let e = _mm256_set_ph(
22066 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22067 );
22068 assert_eq_m256h(r, e);
22069 }
22070
22071 #[simd_test(enable = "avx512fp16")]
22072 unsafe fn test_mm512_sqrt_ph() {
22073 let a = _mm512_set1_ph(4.0);
22074 let r = _mm512_sqrt_ph(a);
22075 let e = _mm512_set1_ph(2.0);
22076 assert_eq_m512h(r, e);
22077 }
22078
22079 #[simd_test(enable = "avx512fp16")]
22080 unsafe fn test_mm512_mask_sqrt_ph() {
22081 let a = _mm512_set1_ph(4.0);
22082 let src = _mm512_set1_ph(1.0);
22083 let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
22084 let e = _mm512_set_ph(
22085 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22086 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22087 );
22088 assert_eq_m512h(r, e);
22089 }
22090
22091 #[simd_test(enable = "avx512fp16")]
22092 unsafe fn test_mm512_maskz_sqrt_ph() {
22093 let a = _mm512_set1_ph(4.0);
22094 let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
22095 let e = _mm512_set_ph(
22096 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22097 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22098 );
22099 assert_eq_m512h(r, e);
22100 }
22101
22102 #[simd_test(enable = "avx512fp16")]
22103 unsafe fn test_mm512_sqrt_round_ph() {
22104 let a = _mm512_set1_ph(4.0);
22105 let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
22106 let e = _mm512_set1_ph(2.0);
22107 assert_eq_m512h(r, e);
22108 }
22109
22110 #[simd_test(enable = "avx512fp16")]
22111 unsafe fn test_mm512_mask_sqrt_round_ph() {
22112 let a = _mm512_set1_ph(4.0);
22113 let src = _mm512_set1_ph(1.0);
22114 let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22115 src,
22116 0b01010101010101010101010101010101,
22117 a,
22118 );
22119 let e = _mm512_set_ph(
22120 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22121 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22122 );
22123 assert_eq_m512h(r, e);
22124 }
22125
22126 #[simd_test(enable = "avx512fp16")]
22127 unsafe fn test_mm512_maskz_sqrt_round_ph() {
22128 let a = _mm512_set1_ph(4.0);
22129 let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22130 0b01010101010101010101010101010101,
22131 a,
22132 );
22133 let e = _mm512_set_ph(
22134 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22135 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22136 );
22137 assert_eq_m512h(r, e);
22138 }
22139
22140 #[simd_test(enable = "avx512fp16")]
22141 unsafe fn test_mm_sqrt_sh() {
22142 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22143 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22144 let r = _mm_sqrt_sh(a, b);
22145 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22146 assert_eq_m128h(r, e);
22147 }
22148
22149 #[simd_test(enable = "avx512fp16")]
22150 unsafe fn test_mm_mask_sqrt_sh() {
22151 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22152 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22153 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22154 let r = _mm_mask_sqrt_sh(src, 0, a, b);
22155 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22156 assert_eq_m128h(r, e);
22157 let r = _mm_mask_sqrt_sh(src, 1, a, b);
22158 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22159 assert_eq_m128h(r, e);
22160 }
22161
22162 #[simd_test(enable = "avx512fp16")]
22163 unsafe fn test_mm_maskz_sqrt_sh() {
22164 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22165 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22166 let r = _mm_maskz_sqrt_sh(0, a, b);
22167 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22168 assert_eq_m128h(r, e);
22169 let r = _mm_maskz_sqrt_sh(1, a, b);
22170 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22171 assert_eq_m128h(r, e);
22172 }
22173
22174 #[simd_test(enable = "avx512fp16")]
22175 unsafe fn test_mm_sqrt_round_sh() {
22176 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22177 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22178 let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22179 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22180 assert_eq_m128h(r, e);
22181 }
22182
22183 #[simd_test(enable = "avx512fp16")]
22184 unsafe fn test_mm_mask_sqrt_round_sh() {
22185 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22186 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22187 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22188 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22189 src, 0, a, b,
22190 );
22191 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22192 assert_eq_m128h(r, e);
22193 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22194 src, 1, a, b,
22195 );
22196 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22197 assert_eq_m128h(r, e);
22198 }
22199
22200 #[simd_test(enable = "avx512fp16")]
22201 unsafe fn test_mm_maskz_sqrt_round_sh() {
22202 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22203 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22204 let r =
22205 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22206 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22207 assert_eq_m128h(r, e);
22208 let r =
22209 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22210 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22211 assert_eq_m128h(r, e);
22212 }
22213
22214 #[simd_test(enable = "avx512fp16,avx512vl")]
22215 unsafe fn test_mm_max_ph() {
22216 let a = _mm_set1_ph(2.0);
22217 let b = _mm_set1_ph(1.0);
22218 let r = _mm_max_ph(a, b);
22219 let e = _mm_set1_ph(2.0);
22220 assert_eq_m128h(r, e);
22221 }
22222
22223 #[simd_test(enable = "avx512fp16,avx512vl")]
22224 unsafe fn test_mm_mask_max_ph() {
22225 let a = _mm_set1_ph(2.0);
22226 let b = _mm_set1_ph(1.0);
22227 let src = _mm_set1_ph(3.0);
22228 let r = _mm_mask_max_ph(src, 0b01010101, a, b);
22229 let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
22230 assert_eq_m128h(r, e);
22231 }
22232
22233 #[simd_test(enable = "avx512fp16,avx512vl")]
22234 unsafe fn test_mm_maskz_max_ph() {
22235 let a = _mm_set1_ph(2.0);
22236 let b = _mm_set1_ph(1.0);
22237 let r = _mm_maskz_max_ph(0b01010101, a, b);
22238 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22239 assert_eq_m128h(r, e);
22240 }
22241
22242 #[simd_test(enable = "avx512fp16,avx512vl")]
22243 unsafe fn test_mm256_max_ph() {
22244 let a = _mm256_set1_ph(2.0);
22245 let b = _mm256_set1_ph(1.0);
22246 let r = _mm256_max_ph(a, b);
22247 let e = _mm256_set1_ph(2.0);
22248 assert_eq_m256h(r, e);
22249 }
22250
22251 #[simd_test(enable = "avx512fp16,avx512vl")]
22252 unsafe fn test_mm256_mask_max_ph() {
22253 let a = _mm256_set1_ph(2.0);
22254 let b = _mm256_set1_ph(1.0);
22255 let src = _mm256_set1_ph(3.0);
22256 let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
22257 let e = _mm256_set_ph(
22258 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22259 );
22260 assert_eq_m256h(r, e);
22261 }
22262
22263 #[simd_test(enable = "avx512fp16,avx512vl")]
22264 unsafe fn test_mm256_maskz_max_ph() {
22265 let a = _mm256_set1_ph(2.0);
22266 let b = _mm256_set1_ph(1.0);
22267 let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
22268 let e = _mm256_set_ph(
22269 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22270 );
22271 assert_eq_m256h(r, e);
22272 }
22273
22274 #[simd_test(enable = "avx512fp16")]
22275 unsafe fn test_mm512_max_ph() {
22276 let a = _mm512_set1_ph(2.0);
22277 let b = _mm512_set1_ph(1.0);
22278 let r = _mm512_max_ph(a, b);
22279 let e = _mm512_set1_ph(2.0);
22280 assert_eq_m512h(r, e);
22281 }
22282
22283 #[simd_test(enable = "avx512fp16")]
22284 unsafe fn test_mm512_mask_max_ph() {
22285 let a = _mm512_set1_ph(2.0);
22286 let b = _mm512_set1_ph(1.0);
22287 let src = _mm512_set1_ph(3.0);
22288 let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
22289 let e = _mm512_set_ph(
22290 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22291 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22292 );
22293 assert_eq_m512h(r, e);
22294 }
22295
22296 #[simd_test(enable = "avx512fp16")]
22297 unsafe fn test_mm512_maskz_max_ph() {
22298 let a = _mm512_set1_ph(2.0);
22299 let b = _mm512_set1_ph(1.0);
22300 let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
22301 let e = _mm512_set_ph(
22302 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22303 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22304 );
22305 assert_eq_m512h(r, e);
22306 }
22307
22308 #[simd_test(enable = "avx512fp16")]
22309 unsafe fn test_mm512_max_round_ph() {
22310 let a = _mm512_set1_ph(2.0);
22311 let b = _mm512_set1_ph(1.0);
22312 let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22313 let e = _mm512_set1_ph(2.0);
22314 assert_eq_m512h(r, e);
22315 }
22316
22317 #[simd_test(enable = "avx512fp16")]
22318 unsafe fn test_mm512_mask_max_round_ph() {
22319 let a = _mm512_set1_ph(2.0);
22320 let b = _mm512_set1_ph(1.0);
22321 let src = _mm512_set1_ph(3.0);
22322 let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22323 src,
22324 0b01010101010101010101010101010101,
22325 a,
22326 b,
22327 );
22328 let e = _mm512_set_ph(
22329 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22330 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22331 );
22332 assert_eq_m512h(r, e);
22333 }
22334
22335 #[simd_test(enable = "avx512fp16")]
22336 unsafe fn test_mm512_maskz_max_round_ph() {
22337 let a = _mm512_set1_ph(2.0);
22338 let b = _mm512_set1_ph(1.0);
22339 let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22340 0b01010101010101010101010101010101,
22341 a,
22342 b,
22343 );
22344 let e = _mm512_set_ph(
22345 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22346 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22347 );
22348 assert_eq_m512h(r, e);
22349 }
22350
22351 #[simd_test(enable = "avx512fp16")]
22352 unsafe fn test_mm_max_sh() {
22353 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22354 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22355 let r = _mm_max_sh(a, b);
22356 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22357 assert_eq_m128h(r, e);
22358 }
22359
22360 #[simd_test(enable = "avx512fp16")]
22361 unsafe fn test_mm_mask_max_sh() {
22362 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22363 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22364 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22365 let r = _mm_mask_max_sh(src, 0, a, b);
22366 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22367 assert_eq_m128h(r, e);
22368 let r = _mm_mask_max_sh(src, 1, a, b);
22369 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22370 assert_eq_m128h(r, e);
22371 }
22372
22373 #[simd_test(enable = "avx512fp16")]
22374 unsafe fn test_mm_maskz_max_sh() {
22375 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22376 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22377 let r = _mm_maskz_max_sh(0, a, b);
22378 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22379 assert_eq_m128h(r, e);
22380 let r = _mm_maskz_max_sh(1, a, b);
22381 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22382 assert_eq_m128h(r, e);
22383 }
22384
22385 #[simd_test(enable = "avx512fp16")]
22386 unsafe fn test_mm_max_round_sh() {
22387 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22388 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22389 let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22390 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22391 assert_eq_m128h(r, e);
22392 }
22393
22394 #[simd_test(enable = "avx512fp16")]
22395 unsafe fn test_mm_mask_max_round_sh() {
22396 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22397 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22398 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22399 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22400 src, 0, a, b,
22401 );
22402 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22403 assert_eq_m128h(r, e);
22404 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22405 src, 1, a, b,
22406 );
22407 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22408 assert_eq_m128h(r, e);
22409 }
22410
22411 #[simd_test(enable = "avx512fp16")]
22412 unsafe fn test_mm_maskz_max_round_sh() {
22413 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22414 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22415 let r =
22416 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22417 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22418 assert_eq_m128h(r, e);
22419 let r =
22420 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22421 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22422 assert_eq_m128h(r, e);
22423 }
22424
22425 #[simd_test(enable = "avx512fp16,avx512vl")]
22426 unsafe fn test_mm_min_ph() {
22427 let a = _mm_set1_ph(2.0);
22428 let b = _mm_set1_ph(1.0);
22429 let r = _mm_min_ph(a, b);
22430 let e = _mm_set1_ph(1.0);
22431 assert_eq_m128h(r, e);
22432 }
22433
22434 #[simd_test(enable = "avx512fp16,avx512vl")]
22435 unsafe fn test_mm_mask_min_ph() {
22436 let a = _mm_set1_ph(2.0);
22437 let b = _mm_set1_ph(1.0);
22438 let src = _mm_set1_ph(3.0);
22439 let r = _mm_mask_min_ph(src, 0b01010101, a, b);
22440 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
22441 assert_eq_m128h(r, e);
22442 }
22443
22444 #[simd_test(enable = "avx512fp16,avx512vl")]
22445 unsafe fn test_mm_maskz_min_ph() {
22446 let a = _mm_set1_ph(2.0);
22447 let b = _mm_set1_ph(1.0);
22448 let r = _mm_maskz_min_ph(0b01010101, a, b);
22449 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22450 assert_eq_m128h(r, e);
22451 }
22452
22453 #[simd_test(enable = "avx512fp16,avx512vl")]
22454 unsafe fn test_mm256_min_ph() {
22455 let a = _mm256_set1_ph(2.0);
22456 let b = _mm256_set1_ph(1.0);
22457 let r = _mm256_min_ph(a, b);
22458 let e = _mm256_set1_ph(1.0);
22459 assert_eq_m256h(r, e);
22460 }
22461
22462 #[simd_test(enable = "avx512fp16,avx512vl")]
22463 unsafe fn test_mm256_mask_min_ph() {
22464 let a = _mm256_set1_ph(2.0);
22465 let b = _mm256_set1_ph(1.0);
22466 let src = _mm256_set1_ph(3.0);
22467 let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
22468 let e = _mm256_set_ph(
22469 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22470 );
22471 assert_eq_m256h(r, e);
22472 }
22473
22474 #[simd_test(enable = "avx512fp16,avx512vl")]
22475 unsafe fn test_mm256_maskz_min_ph() {
22476 let a = _mm256_set1_ph(2.0);
22477 let b = _mm256_set1_ph(1.0);
22478 let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
22479 let e = _mm256_set_ph(
22480 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22481 );
22482 assert_eq_m256h(r, e);
22483 }
22484
22485 #[simd_test(enable = "avx512fp16")]
22486 unsafe fn test_mm512_min_ph() {
22487 let a = _mm512_set1_ph(2.0);
22488 let b = _mm512_set1_ph(1.0);
22489 let r = _mm512_min_ph(a, b);
22490 let e = _mm512_set1_ph(1.0);
22491 assert_eq_m512h(r, e);
22492 }
22493
22494 #[simd_test(enable = "avx512fp16")]
22495 unsafe fn test_mm512_mask_min_ph() {
22496 let a = _mm512_set1_ph(2.0);
22497 let b = _mm512_set1_ph(1.0);
22498 let src = _mm512_set1_ph(3.0);
22499 let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
22500 let e = _mm512_set_ph(
22501 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22502 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22503 );
22504 assert_eq_m512h(r, e);
22505 }
22506
22507 #[simd_test(enable = "avx512fp16")]
22508 unsafe fn test_mm512_maskz_min_ph() {
22509 let a = _mm512_set1_ph(2.0);
22510 let b = _mm512_set1_ph(1.0);
22511 let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
22512 let e = _mm512_set_ph(
22513 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22514 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22515 );
22516 assert_eq_m512h(r, e);
22517 }
22518
22519 #[simd_test(enable = "avx512fp16")]
22520 unsafe fn test_mm512_min_round_ph() {
22521 let a = _mm512_set1_ph(2.0);
22522 let b = _mm512_set1_ph(1.0);
22523 let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22524 let e = _mm512_set1_ph(1.0);
22525 assert_eq_m512h(r, e);
22526 }
22527
22528 #[simd_test(enable = "avx512fp16")]
22529 unsafe fn test_mm512_mask_min_round_ph() {
22530 let a = _mm512_set1_ph(2.0);
22531 let b = _mm512_set1_ph(1.0);
22532 let src = _mm512_set1_ph(3.0);
22533 let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22534 src,
22535 0b01010101010101010101010101010101,
22536 a,
22537 b,
22538 );
22539 let e = _mm512_set_ph(
22540 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22541 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22542 );
22543 assert_eq_m512h(r, e);
22544 }
22545
22546 #[simd_test(enable = "avx512fp16")]
22547 unsafe fn test_mm512_maskz_min_round_ph() {
22548 let a = _mm512_set1_ph(2.0);
22549 let b = _mm512_set1_ph(1.0);
22550 let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22551 0b01010101010101010101010101010101,
22552 a,
22553 b,
22554 );
22555 let e = _mm512_set_ph(
22556 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22557 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22558 );
22559 assert_eq_m512h(r, e);
22560 }
22561
22562 #[simd_test(enable = "avx512fp16")]
22563 unsafe fn test_mm_min_sh() {
22564 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22565 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22566 let r = _mm_min_sh(a, b);
22567 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22568 assert_eq_m128h(r, e);
22569 }
22570
22571 #[simd_test(enable = "avx512fp16")]
22572 unsafe fn test_mm_mask_min_sh() {
22573 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22574 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22575 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22576 let r = _mm_mask_min_sh(src, 0, a, b);
22577 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22578 assert_eq_m128h(r, e);
22579 let r = _mm_mask_min_sh(src, 1, a, b);
22580 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22581 assert_eq_m128h(r, e);
22582 }
22583
22584 #[simd_test(enable = "avx512fp16")]
22585 unsafe fn test_mm_maskz_min_sh() {
22586 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22587 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22588 let r = _mm_maskz_min_sh(0, a, b);
22589 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22590 assert_eq_m128h(r, e);
22591 let r = _mm_maskz_min_sh(1, a, b);
22592 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22593 assert_eq_m128h(r, e);
22594 }
22595
22596 #[simd_test(enable = "avx512fp16")]
22597 unsafe fn test_mm_min_round_sh() {
22598 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22599 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22600 let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22601 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22602 assert_eq_m128h(r, e);
22603 }
22604
22605 #[simd_test(enable = "avx512fp16")]
22606 unsafe fn test_mm_mask_min_round_sh() {
22607 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22608 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22609 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22610 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22611 src, 0, a, b,
22612 );
22613 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22614 assert_eq_m128h(r, e);
22615 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22616 src, 1, a, b,
22617 );
22618 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22619 assert_eq_m128h(r, e);
22620 }
22621
22622 #[simd_test(enable = "avx512fp16")]
22623 unsafe fn test_mm_maskz_min_round_sh() {
22624 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22625 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22626 let r =
22627 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22628 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22629 assert_eq_m128h(r, e);
22630 let r =
22631 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22632 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22633 assert_eq_m128h(r, e);
22634 }
22635
22636 #[simd_test(enable = "avx512fp16,avx512vl")]
22637 unsafe fn test_mm_getexp_ph() {
22638 let a = _mm_set1_ph(3.0);
22639 let r = _mm_getexp_ph(a);
22640 let e = _mm_set1_ph(1.0);
22641 assert_eq_m128h(r, e);
22642 }
22643
22644 #[simd_test(enable = "avx512fp16,avx512vl")]
22645 unsafe fn test_mm_mask_getexp_ph() {
22646 let a = _mm_set1_ph(3.0);
22647 let src = _mm_set1_ph(4.0);
22648 let r = _mm_mask_getexp_ph(src, 0b01010101, a);
22649 let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
22650 assert_eq_m128h(r, e);
22651 }
22652
22653 #[simd_test(enable = "avx512fp16,avx512vl")]
22654 unsafe fn test_mm_maskz_getexp_ph() {
22655 let a = _mm_set1_ph(3.0);
22656 let r = _mm_maskz_getexp_ph(0b01010101, a);
22657 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22658 assert_eq_m128h(r, e);
22659 }
22660
22661 #[simd_test(enable = "avx512fp16,avx512vl")]
22662 unsafe fn test_mm256_getexp_ph() {
22663 let a = _mm256_set1_ph(3.0);
22664 let r = _mm256_getexp_ph(a);
22665 let e = _mm256_set1_ph(1.0);
22666 assert_eq_m256h(r, e);
22667 }
22668
22669 #[simd_test(enable = "avx512fp16,avx512vl")]
22670 unsafe fn test_mm256_mask_getexp_ph() {
22671 let a = _mm256_set1_ph(3.0);
22672 let src = _mm256_set1_ph(4.0);
22673 let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
22674 let e = _mm256_set_ph(
22675 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22676 );
22677 assert_eq_m256h(r, e);
22678 }
22679
22680 #[simd_test(enable = "avx512fp16,avx512vl")]
22681 unsafe fn test_mm256_maskz_getexp_ph() {
22682 let a = _mm256_set1_ph(3.0);
22683 let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
22684 let e = _mm256_set_ph(
22685 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22686 );
22687 assert_eq_m256h(r, e);
22688 }
22689
22690 #[simd_test(enable = "avx512fp16")]
22691 unsafe fn test_mm512_getexp_ph() {
22692 let a = _mm512_set1_ph(3.0);
22693 let r = _mm512_getexp_ph(a);
22694 let e = _mm512_set1_ph(1.0);
22695 assert_eq_m512h(r, e);
22696 }
22697
22698 #[simd_test(enable = "avx512fp16")]
22699 unsafe fn test_mm512_mask_getexp_ph() {
22700 let a = _mm512_set1_ph(3.0);
22701 let src = _mm512_set1_ph(4.0);
22702 let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
22703 let e = _mm512_set_ph(
22704 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22705 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22706 );
22707 assert_eq_m512h(r, e);
22708 }
22709
22710 #[simd_test(enable = "avx512fp16")]
22711 unsafe fn test_mm512_maskz_getexp_ph() {
22712 let a = _mm512_set1_ph(3.0);
22713 let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
22714 let e = _mm512_set_ph(
22715 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22716 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22717 );
22718 assert_eq_m512h(r, e);
22719 }
22720
22721 #[simd_test(enable = "avx512fp16")]
22722 unsafe fn test_mm512_getexp_round_ph() {
22723 let a = _mm512_set1_ph(3.0);
22724 let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
22725 let e = _mm512_set1_ph(1.0);
22726 assert_eq_m512h(r, e);
22727 }
22728
22729 #[simd_test(enable = "avx512fp16")]
22730 unsafe fn test_mm512_mask_getexp_round_ph() {
22731 let a = _mm512_set1_ph(3.0);
22732 let src = _mm512_set1_ph(4.0);
22733 let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22734 src,
22735 0b01010101010101010101010101010101,
22736 a,
22737 );
22738 let e = _mm512_set_ph(
22739 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22740 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22741 );
22742 assert_eq_m512h(r, e);
22743 }
22744
22745 #[simd_test(enable = "avx512fp16")]
22746 unsafe fn test_mm512_maskz_getexp_round_ph() {
22747 let a = _mm512_set1_ph(3.0);
22748 let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22749 0b01010101010101010101010101010101,
22750 a,
22751 );
22752 let e = _mm512_set_ph(
22753 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22754 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22755 );
22756 assert_eq_m512h(r, e);
22757 }
22758
22759 #[simd_test(enable = "avx512fp16")]
22760 unsafe fn test_mm_getexp_sh() {
22761 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22762 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22763 let r = _mm_getexp_sh(a, b);
22764 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22765 assert_eq_m128h(r, e);
22766 }
22767
22768 #[simd_test(enable = "avx512fp16")]
22769 unsafe fn test_mm_mask_getexp_sh() {
22770 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22771 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22772 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22773 let r = _mm_mask_getexp_sh(src, 0, a, b);
22774 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22775 assert_eq_m128h(r, e);
22776 let r = _mm_mask_getexp_sh(src, 1, a, b);
22777 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22778 assert_eq_m128h(r, e);
22779 }
22780
22781 #[simd_test(enable = "avx512fp16")]
22782 unsafe fn test_mm_maskz_getexp_sh() {
22783 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22784 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22785 let r = _mm_maskz_getexp_sh(0, a, b);
22786 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22787 assert_eq_m128h(r, e);
22788 let r = _mm_maskz_getexp_sh(1, a, b);
22789 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22790 assert_eq_m128h(r, e);
22791 }
22792
22793 #[simd_test(enable = "avx512fp16")]
22794 unsafe fn test_mm_getexp_round_sh() {
22795 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22796 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22797 let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
22798 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22799 assert_eq_m128h(r, e);
22800 }
22801
22802 #[simd_test(enable = "avx512fp16")]
22803 unsafe fn test_mm_mask_getexp_round_sh() {
22804 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22805 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22806 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22807 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
22808 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22809 assert_eq_m128h(r, e);
22810 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
22811 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22812 assert_eq_m128h(r, e);
22813 }
22814
22815 #[simd_test(enable = "avx512fp16")]
22816 unsafe fn test_mm_maskz_getexp_round_sh() {
22817 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22818 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22819 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
22820 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22821 assert_eq_m128h(r, e);
22822 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
22823 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22824 assert_eq_m128h(r, e);
22825 }
22826
22827 #[simd_test(enable = "avx512fp16,avx512vl")]
22828 unsafe fn test_mm_getmant_ph() {
22829 let a = _mm_set1_ph(10.0);
22830 let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22831 let e = _mm_set1_ph(1.25);
22832 assert_eq_m128h(r, e);
22833 }
22834
22835 #[simd_test(enable = "avx512fp16,avx512vl")]
22836 unsafe fn test_mm_mask_getmant_ph() {
22837 let a = _mm_set1_ph(10.0);
22838 let src = _mm_set1_ph(20.0);
22839 let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
22840 let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
22841 assert_eq_m128h(r, e);
22842 }
22843
22844 #[simd_test(enable = "avx512fp16,avx512vl")]
22845 unsafe fn test_mm_maskz_getmant_ph() {
22846 let a = _mm_set1_ph(10.0);
22847 let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
22848 let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
22849 assert_eq_m128h(r, e);
22850 }
22851
22852 #[simd_test(enable = "avx512fp16,avx512vl")]
22853 unsafe fn test_mm256_getmant_ph() {
22854 let a = _mm256_set1_ph(10.0);
22855 let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22856 let e = _mm256_set1_ph(1.25);
22857 assert_eq_m256h(r, e);
22858 }
22859
22860 #[simd_test(enable = "avx512fp16,avx512vl")]
22861 unsafe fn test_mm256_mask_getmant_ph() {
22862 let a = _mm256_set1_ph(10.0);
22863 let src = _mm256_set1_ph(20.0);
22864 let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22865 src,
22866 0b0101010101010101,
22867 a,
22868 );
22869 let e = _mm256_set_ph(
22870 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22871 20.0, 1.25,
22872 );
22873 assert_eq_m256h(r, e);
22874 }
22875
22876 #[simd_test(enable = "avx512fp16,avx512vl")]
22877 unsafe fn test_mm256_maskz_getmant_ph() {
22878 let a = _mm256_set1_ph(10.0);
22879 let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22880 0b0101010101010101,
22881 a,
22882 );
22883 let e = _mm256_set_ph(
22884 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22885 );
22886 assert_eq_m256h(r, e);
22887 }
22888
22889 #[simd_test(enable = "avx512fp16")]
22890 unsafe fn test_mm512_getmant_ph() {
22891 let a = _mm512_set1_ph(10.0);
22892 let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22893 let e = _mm512_set1_ph(1.25);
22894 assert_eq_m512h(r, e);
22895 }
22896
22897 #[simd_test(enable = "avx512fp16")]
22898 unsafe fn test_mm512_mask_getmant_ph() {
22899 let a = _mm512_set1_ph(10.0);
22900 let src = _mm512_set1_ph(20.0);
22901 let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22902 src,
22903 0b01010101010101010101010101010101,
22904 a,
22905 );
22906 let e = _mm512_set_ph(
22907 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22908 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22909 20.0, 1.25, 20.0, 1.25,
22910 );
22911 assert_eq_m512h(r, e);
22912 }
22913
22914 #[simd_test(enable = "avx512fp16")]
22915 unsafe fn test_mm512_maskz_getmant_ph() {
22916 let a = _mm512_set1_ph(10.0);
22917 let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22918 0b01010101010101010101010101010101,
22919 a,
22920 );
22921 let e = _mm512_set_ph(
22922 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22923 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22924 );
22925 assert_eq_m512h(r, e);
22926 }
22927
22928 #[simd_test(enable = "avx512fp16")]
22929 unsafe fn test_mm512_getmant_round_ph() {
22930 let a = _mm512_set1_ph(10.0);
22931 let r =
22932 _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
22933 a,
22934 );
22935 let e = _mm512_set1_ph(1.25);
22936 assert_eq_m512h(r, e);
22937 }
22938
22939 #[simd_test(enable = "avx512fp16")]
22940 unsafe fn test_mm512_mask_getmant_round_ph() {
22941 let a = _mm512_set1_ph(10.0);
22942 let src = _mm512_set1_ph(20.0);
22943 let r = _mm512_mask_getmant_round_ph::<
22944 _MM_MANT_NORM_P75_1P5,
22945 _MM_MANT_SIGN_NAN,
22946 _MM_FROUND_NO_EXC,
22947 >(src, 0b01010101010101010101010101010101, a);
22948 let e = _mm512_set_ph(
22949 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22950 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22951 20.0, 1.25, 20.0, 1.25,
22952 );
22953 assert_eq_m512h(r, e);
22954 }
22955
22956 #[simd_test(enable = "avx512fp16")]
22957 unsafe fn test_mm512_maskz_getmant_round_ph() {
22958 let a = _mm512_set1_ph(10.0);
22959 let r = _mm512_maskz_getmant_round_ph::<
22960 _MM_MANT_NORM_P75_1P5,
22961 _MM_MANT_SIGN_NAN,
22962 _MM_FROUND_NO_EXC,
22963 >(0b01010101010101010101010101010101, a);
22964 let e = _mm512_set_ph(
22965 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22966 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22967 );
22968 assert_eq_m512h(r, e);
22969 }
22970
22971 #[simd_test(enable = "avx512fp16")]
22972 unsafe fn test_mm_getmant_sh() {
22973 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22974 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22975 let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
22976 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
22977 assert_eq_m128h(r, e);
22978 }
22979
22980 #[simd_test(enable = "avx512fp16")]
22981 unsafe fn test_mm_mask_getmant_sh() {
22982 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22983 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22984 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
22985 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
22986 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
22987 assert_eq_m128h(r, e);
22988 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
22989 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
22990 assert_eq_m128h(r, e);
22991 }
22992
22993 #[simd_test(enable = "avx512fp16")]
22994 unsafe fn test_mm_maskz_getmant_sh() {
22995 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22996 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22997 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
22998 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22999 assert_eq_m128h(r, e);
23000 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
23001 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23002 assert_eq_m128h(r, e);
23003 }
23004
23005 #[simd_test(enable = "avx512fp16")]
23006 unsafe fn test_mm_getmant_round_sh() {
23007 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23008 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23009 let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23010 a, b,
23011 );
23012 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23013 assert_eq_m128h(r, e);
23014 }
23015
23016 #[simd_test(enable = "avx512fp16")]
23017 unsafe fn test_mm_mask_getmant_round_sh() {
23018 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23019 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23020 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23021 let r = _mm_mask_getmant_round_sh::<
23022 _MM_MANT_NORM_P75_1P5,
23023 _MM_MANT_SIGN_NAN,
23024 _MM_FROUND_NO_EXC,
23025 >(src, 0, a, b);
23026 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23027 assert_eq_m128h(r, e);
23028 let r = _mm_mask_getmant_round_sh::<
23029 _MM_MANT_NORM_P75_1P5,
23030 _MM_MANT_SIGN_NAN,
23031 _MM_FROUND_NO_EXC,
23032 >(src, 1, a, b);
23033 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23034 assert_eq_m128h(r, e);
23035 }
23036
23037 #[simd_test(enable = "avx512fp16")]
23038 unsafe fn test_mm_maskz_getmant_round_sh() {
23039 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23040 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23041 let r = _mm_maskz_getmant_round_sh::<
23042 _MM_MANT_NORM_P75_1P5,
23043 _MM_MANT_SIGN_NAN,
23044 _MM_FROUND_NO_EXC,
23045 >(0, a, b);
23046 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23047 assert_eq_m128h(r, e);
23048 let r = _mm_maskz_getmant_round_sh::<
23049 _MM_MANT_NORM_P75_1P5,
23050 _MM_MANT_SIGN_NAN,
23051 _MM_FROUND_NO_EXC,
23052 >(1, a, b);
23053 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23054 assert_eq_m128h(r, e);
23055 }
23056
23057 #[simd_test(enable = "avx512fp16,avx512vl")]
23058 unsafe fn test_mm_roundscale_ph() {
23059 let a = _mm_set1_ph(1.1);
23060 let r = _mm_roundscale_ph::<0>(a);
23061 let e = _mm_set1_ph(1.0);
23062 assert_eq_m128h(r, e);
23063 }
23064
23065 #[simd_test(enable = "avx512fp16,avx512vl")]
23066 unsafe fn test_mm_mask_roundscale_ph() {
23067 let a = _mm_set1_ph(1.1);
23068 let src = _mm_set1_ph(2.0);
23069 let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
23070 let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
23071 assert_eq_m128h(r, e);
23072 }
23073
23074 #[simd_test(enable = "avx512fp16,avx512vl")]
23075 unsafe fn test_mm_maskz_roundscale_ph() {
23076 let a = _mm_set1_ph(1.1);
23077 let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
23078 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
23079 assert_eq_m128h(r, e);
23080 }
23081
23082 #[simd_test(enable = "avx512fp16,avx512vl")]
23083 unsafe fn test_mm256_roundscale_ph() {
23084 let a = _mm256_set1_ph(1.1);
23085 let r = _mm256_roundscale_ph::<0>(a);
23086 let e = _mm256_set1_ph(1.0);
23087 assert_eq_m256h(r, e);
23088 }
23089
23090 #[simd_test(enable = "avx512fp16,avx512vl")]
23091 unsafe fn test_mm256_mask_roundscale_ph() {
23092 let a = _mm256_set1_ph(1.1);
23093 let src = _mm256_set1_ph(2.0);
23094 let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
23095 let e = _mm256_set_ph(
23096 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23097 );
23098 assert_eq_m256h(r, e);
23099 }
23100
23101 #[simd_test(enable = "avx512fp16,avx512vl")]
23102 unsafe fn test_mm256_maskz_roundscale_ph() {
23103 let a = _mm256_set1_ph(1.1);
23104 let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
23105 let e = _mm256_set_ph(
23106 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23107 );
23108 assert_eq_m256h(r, e);
23109 }
23110
23111 #[simd_test(enable = "avx512fp16")]
23112 unsafe fn test_mm512_roundscale_ph() {
23113 let a = _mm512_set1_ph(1.1);
23114 let r = _mm512_roundscale_ph::<0>(a);
23115 let e = _mm512_set1_ph(1.0);
23116 assert_eq_m512h(r, e);
23117 }
23118
23119 #[simd_test(enable = "avx512fp16")]
23120 unsafe fn test_mm512_mask_roundscale_ph() {
23121 let a = _mm512_set1_ph(1.1);
23122 let src = _mm512_set1_ph(2.0);
23123 let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
23124 let e = _mm512_set_ph(
23125 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23126 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23127 );
23128 assert_eq_m512h(r, e);
23129 }
23130
23131 #[simd_test(enable = "avx512fp16")]
23132 unsafe fn test_mm512_maskz_roundscale_ph() {
23133 let a = _mm512_set1_ph(1.1);
23134 let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
23135 let e = _mm512_set_ph(
23136 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23137 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23138 );
23139 assert_eq_m512h(r, e);
23140 }
23141
23142 #[simd_test(enable = "avx512fp16")]
23143 unsafe fn test_mm512_roundscale_round_ph() {
23144 let a = _mm512_set1_ph(1.1);
23145 let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
23146 let e = _mm512_set1_ph(1.0);
23147 assert_eq_m512h(r, e);
23148 }
23149
23150 #[simd_test(enable = "avx512fp16")]
23151 unsafe fn test_mm512_mask_roundscale_round_ph() {
23152 let a = _mm512_set1_ph(1.1);
23153 let src = _mm512_set1_ph(2.0);
23154 let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23155 src,
23156 0b01010101010101010101010101010101,
23157 a,
23158 );
23159 let e = _mm512_set_ph(
23160 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23161 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23162 );
23163 assert_eq_m512h(r, e);
23164 }
23165
23166 #[simd_test(enable = "avx512fp16")]
23167 unsafe fn test_mm512_maskz_roundscale_round_ph() {
23168 let a = _mm512_set1_ph(1.1);
23169 let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23170 0b01010101010101010101010101010101,
23171 a,
23172 );
23173 let e = _mm512_set_ph(
23174 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23175 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23176 );
23177 assert_eq_m512h(r, e);
23178 }
23179
23180 #[simd_test(enable = "avx512fp16")]
23181 unsafe fn test_mm_roundscale_sh() {
23182 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23183 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23184 let r = _mm_roundscale_sh::<0>(a, b);
23185 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23186 assert_eq_m128h(r, e);
23187 }
23188
23189 #[simd_test(enable = "avx512fp16")]
23190 unsafe fn test_mm_mask_roundscale_sh() {
23191 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23192 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23193 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23194 let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
23195 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23196 assert_eq_m128h(r, e);
23197 let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
23198 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23199 assert_eq_m128h(r, e);
23200 }
23201
23202 #[simd_test(enable = "avx512fp16")]
23203 unsafe fn test_mm_maskz_roundscale_sh() {
23204 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23205 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23206 let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
23207 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23208 assert_eq_m128h(r, e);
23209 let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
23210 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23211 assert_eq_m128h(r, e);
23212 }
23213
23214 #[simd_test(enable = "avx512fp16")]
23215 unsafe fn test_mm_roundscale_round_sh() {
23216 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23217 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23218 let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
23219 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23220 assert_eq_m128h(r, e);
23221 }
23222
23223 #[simd_test(enable = "avx512fp16")]
23224 unsafe fn test_mm_mask_roundscale_round_sh() {
23225 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23226 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23227 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23228 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
23229 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23230 assert_eq_m128h(r, e);
23231 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
23232 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23233 assert_eq_m128h(r, e);
23234 }
23235
23236 #[simd_test(enable = "avx512fp16")]
23237 unsafe fn test_mm_maskz_roundscale_round_sh() {
23238 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23239 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23240 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
23241 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23242 assert_eq_m128h(r, e);
23243 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
23244 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23245 assert_eq_m128h(r, e);
23246 }
23247
23248 #[simd_test(enable = "avx512fp16,avx512vl")]
23249 unsafe fn test_mm_scalef_ph() {
23250 let a = _mm_set1_ph(1.);
23251 let b = _mm_set1_ph(3.);
23252 let r = _mm_scalef_ph(a, b);
23253 let e = _mm_set1_ph(8.0);
23254 assert_eq_m128h(r, e);
23255 }
23256
23257 #[simd_test(enable = "avx512fp16,avx512vl")]
23258 unsafe fn test_mm_mask_scalef_ph() {
23259 let a = _mm_set1_ph(1.);
23260 let b = _mm_set1_ph(3.);
23261 let src = _mm_set1_ph(2.);
23262 let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
23263 let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
23264 assert_eq_m128h(r, e);
23265 }
23266
23267 #[simd_test(enable = "avx512fp16,avx512vl")]
23268 unsafe fn test_mm_maskz_scalef_ph() {
23269 let a = _mm_set1_ph(1.);
23270 let b = _mm_set1_ph(3.);
23271 let r = _mm_maskz_scalef_ph(0b01010101, a, b);
23272 let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
23273 assert_eq_m128h(r, e);
23274 }
23275
23276 #[simd_test(enable = "avx512fp16,avx512vl")]
23277 unsafe fn test_mm256_scalef_ph() {
23278 let a = _mm256_set1_ph(1.);
23279 let b = _mm256_set1_ph(3.);
23280 let r = _mm256_scalef_ph(a, b);
23281 let e = _mm256_set1_ph(8.0);
23282 assert_eq_m256h(r, e);
23283 }
23284
23285 #[simd_test(enable = "avx512fp16,avx512vl")]
23286 unsafe fn test_mm256_mask_scalef_ph() {
23287 let a = _mm256_set1_ph(1.);
23288 let b = _mm256_set1_ph(3.);
23289 let src = _mm256_set1_ph(2.);
23290 let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
23291 let e = _mm256_set_ph(
23292 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23293 );
23294 assert_eq_m256h(r, e);
23295 }
23296
23297 #[simd_test(enable = "avx512fp16,avx512vl")]
23298 unsafe fn test_mm256_maskz_scalef_ph() {
23299 let a = _mm256_set1_ph(1.);
23300 let b = _mm256_set1_ph(3.);
23301 let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
23302 let e = _mm256_set_ph(
23303 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23304 );
23305 assert_eq_m256h(r, e);
23306 }
23307
23308 #[simd_test(enable = "avx512fp16")]
23309 unsafe fn test_mm512_scalef_ph() {
23310 let a = _mm512_set1_ph(1.);
23311 let b = _mm512_set1_ph(3.);
23312 let r = _mm512_scalef_ph(a, b);
23313 let e = _mm512_set1_ph(8.0);
23314 assert_eq_m512h(r, e);
23315 }
23316
23317 #[simd_test(enable = "avx512fp16")]
23318 unsafe fn test_mm512_mask_scalef_ph() {
23319 let a = _mm512_set1_ph(1.);
23320 let b = _mm512_set1_ph(3.);
23321 let src = _mm512_set1_ph(2.);
23322 let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
23323 let e = _mm512_set_ph(
23324 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23325 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23326 );
23327 assert_eq_m512h(r, e);
23328 }
23329
23330 #[simd_test(enable = "avx512fp16")]
23331 unsafe fn test_mm512_maskz_scalef_ph() {
23332 let a = _mm512_set1_ph(1.);
23333 let b = _mm512_set1_ph(3.);
23334 let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
23335 let e = _mm512_set_ph(
23336 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23337 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23338 );
23339 assert_eq_m512h(r, e);
23340 }
23341
23342 #[simd_test(enable = "avx512fp16")]
23343 unsafe fn test_mm512_scalef_round_ph() {
23344 let a = _mm512_set1_ph(1.);
23345 let b = _mm512_set1_ph(3.);
23346 let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23347 let e = _mm512_set1_ph(8.0);
23348 assert_eq_m512h(r, e);
23349 }
23350
23351 #[simd_test(enable = "avx512fp16")]
23352 unsafe fn test_mm512_mask_scalef_round_ph() {
23353 let a = _mm512_set1_ph(1.);
23354 let b = _mm512_set1_ph(3.);
23355 let src = _mm512_set1_ph(2.);
23356 let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23357 src,
23358 0b01010101010101010101010101010101,
23359 a,
23360 b,
23361 );
23362 let e = _mm512_set_ph(
23363 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23364 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23365 );
23366 assert_eq_m512h(r, e);
23367 }
23368
23369 #[simd_test(enable = "avx512fp16")]
23370 unsafe fn test_mm512_maskz_scalef_round_ph() {
23371 let a = _mm512_set1_ph(1.);
23372 let b = _mm512_set1_ph(3.);
23373 let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23374 0b01010101010101010101010101010101,
23375 a,
23376 b,
23377 );
23378 let e = _mm512_set_ph(
23379 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23380 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23381 );
23382 assert_eq_m512h(r, e);
23383 }
23384
23385 #[simd_test(enable = "avx512fp16")]
23386 unsafe fn test_mm_scalef_sh() {
23387 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23388 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23389 let r = _mm_scalef_sh(a, b);
23390 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23391 assert_eq_m128h(r, e);
23392 }
23393
23394 #[simd_test(enable = "avx512fp16")]
23395 unsafe fn test_mm_mask_scalef_sh() {
23396 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23397 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23398 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23399 let r = _mm_mask_scalef_sh(src, 0, a, b);
23400 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23401 assert_eq_m128h(r, e);
23402 let r = _mm_mask_scalef_sh(src, 1, a, b);
23403 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23404 assert_eq_m128h(r, e);
23405 }
23406
23407 #[simd_test(enable = "avx512fp16")]
23408 unsafe fn test_mm_maskz_scalef_sh() {
23409 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23410 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23411 let r = _mm_maskz_scalef_sh(0, a, b);
23412 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23413 assert_eq_m128h(r, e);
23414 let r = _mm_maskz_scalef_sh(1, a, b);
23415 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23416 assert_eq_m128h(r, e);
23417 }
23418
23419 #[simd_test(enable = "avx512fp16")]
23420 unsafe fn test_mm_scalef_round_sh() {
23421 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23422 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23423 let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23424 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23425 assert_eq_m128h(r, e);
23426 }
23427
23428 #[simd_test(enable = "avx512fp16")]
23429 unsafe fn test_mm_mask_scalef_round_sh() {
23430 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23431 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23432 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23433 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23434 src, 0, a, b,
23435 );
23436 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23437 assert_eq_m128h(r, e);
23438 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23439 src, 1, a, b,
23440 );
23441 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23442 assert_eq_m128h(r, e);
23443 }
23444
23445 #[simd_test(enable = "avx512fp16")]
23446 unsafe fn test_mm_maskz_scalef_round_sh() {
23447 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23448 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23449 let r =
23450 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
23451 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23452 assert_eq_m128h(r, e);
23453 let r =
23454 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
23455 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23456 assert_eq_m128h(r, e);
23457 }
23458
23459 #[simd_test(enable = "avx512fp16,avx512vl")]
23460 unsafe fn test_mm_reduce_ph() {
23461 let a = _mm_set1_ph(1.25);
23462 let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23463 let e = _mm_set1_ph(0.25);
23464 assert_eq_m128h(r, e);
23465 }
23466
23467 #[simd_test(enable = "avx512fp16,avx512vl")]
23468 unsafe fn test_mm_mask_reduce_ph() {
23469 let a = _mm_set1_ph(1.25);
23470 let src = _mm_set1_ph(2.0);
23471 let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
23472 let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
23473 assert_eq_m128h(r, e);
23474 }
23475
23476 #[simd_test(enable = "avx512fp16,avx512vl")]
23477 unsafe fn test_mm_maskz_reduce_ph() {
23478 let a = _mm_set1_ph(1.25);
23479 let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
23480 let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
23481 assert_eq_m128h(r, e);
23482 }
23483
23484 #[simd_test(enable = "avx512fp16,avx512vl")]
23485 unsafe fn test_mm256_reduce_ph() {
23486 let a = _mm256_set1_ph(1.25);
23487 let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23488 let e = _mm256_set1_ph(0.25);
23489 assert_eq_m256h(r, e);
23490 }
23491
23492 #[simd_test(enable = "avx512fp16,avx512vl")]
23493 unsafe fn test_mm256_mask_reduce_ph() {
23494 let a = _mm256_set1_ph(1.25);
23495 let src = _mm256_set1_ph(2.0);
23496 let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
23497 let e = _mm256_set_ph(
23498 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23499 );
23500 assert_eq_m256h(r, e);
23501 }
23502
23503 #[simd_test(enable = "avx512fp16,avx512vl")]
23504 unsafe fn test_mm256_maskz_reduce_ph() {
23505 let a = _mm256_set1_ph(1.25);
23506 let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
23507 let e = _mm256_set_ph(
23508 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23509 );
23510 assert_eq_m256h(r, e);
23511 }
23512
23513 #[simd_test(enable = "avx512fp16")]
23514 unsafe fn test_mm512_reduce_ph() {
23515 let a = _mm512_set1_ph(1.25);
23516 let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23517 let e = _mm512_set1_ph(0.25);
23518 assert_eq_m512h(r, e);
23519 }
23520
23521 #[simd_test(enable = "avx512fp16")]
23522 unsafe fn test_mm512_mask_reduce_ph() {
23523 let a = _mm512_set1_ph(1.25);
23524 let src = _mm512_set1_ph(2.0);
23525 let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23526 src,
23527 0b01010101010101010101010101010101,
23528 a,
23529 );
23530 let e = _mm512_set_ph(
23531 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23532 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23533 );
23534 assert_eq_m512h(r, e);
23535 }
23536
23537 #[simd_test(enable = "avx512fp16")]
23538 unsafe fn test_mm512_maskz_reduce_ph() {
23539 let a = _mm512_set1_ph(1.25);
23540 let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23541 0b01010101010101010101010101010101,
23542 a,
23543 );
23544 let e = _mm512_set_ph(
23545 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23546 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23547 );
23548 assert_eq_m512h(r, e);
23549 }
23550
23551 #[simd_test(enable = "avx512fp16")]
23552 unsafe fn test_mm512_reduce_round_ph() {
23553 let a = _mm512_set1_ph(1.25);
23554 let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23555 let e = _mm512_set1_ph(0.25);
23556 assert_eq_m512h(r, e);
23557 }
23558
23559 #[simd_test(enable = "avx512fp16")]
23560 unsafe fn test_mm512_mask_reduce_round_ph() {
23561 let a = _mm512_set1_ph(1.25);
23562 let src = _mm512_set1_ph(2.0);
23563 let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23564 src,
23565 0b01010101010101010101010101010101,
23566 a,
23567 );
23568 let e = _mm512_set_ph(
23569 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23570 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23571 );
23572 assert_eq_m512h(r, e);
23573 }
23574
23575 #[simd_test(enable = "avx512fp16")]
23576 unsafe fn test_mm512_maskz_reduce_round_ph() {
23577 let a = _mm512_set1_ph(1.25);
23578 let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23579 0b01010101010101010101010101010101,
23580 a,
23581 );
23582 let e = _mm512_set_ph(
23583 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23584 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23585 );
23586 assert_eq_m512h(r, e);
23587 }
23588
23589 #[simd_test(enable = "avx512fp16")]
23590 unsafe fn test_mm_reduce_sh() {
23591 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23592 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23593 let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
23594 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23595 assert_eq_m128h(r, e);
23596 }
23597
23598 #[simd_test(enable = "avx512fp16")]
23599 unsafe fn test_mm_mask_reduce_sh() {
23600 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23601 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23602 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23603 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
23604 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23605 assert_eq_m128h(r, e);
23606 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
23607 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23608 assert_eq_m128h(r, e);
23609 }
23610
23611 #[simd_test(enable = "avx512fp16")]
23612 unsafe fn test_mm_maskz_reduce_sh() {
23613 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23614 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23615 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
23616 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23617 assert_eq_m128h(r, e);
23618 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
23619 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23620 assert_eq_m128h(r, e);
23621 }
23622
23623 #[simd_test(enable = "avx512fp16")]
23624 unsafe fn test_mm_reduce_round_sh() {
23625 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23626 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23627 let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23628 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23629 assert_eq_m128h(r, e);
23630 }
23631
23632 #[simd_test(enable = "avx512fp16")]
23633 unsafe fn test_mm_mask_reduce_round_sh() {
23634 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23635 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23636 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23637 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23638 src, 0, a, b,
23639 );
23640 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23641 assert_eq_m128h(r, e);
23642 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23643 src, 1, a, b,
23644 );
23645 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23646 assert_eq_m128h(r, e);
23647 }
23648
23649 #[simd_test(enable = "avx512fp16")]
23650 unsafe fn test_mm_maskz_reduce_round_sh() {
23651 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23652 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23653 let r =
23654 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
23655 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23656 assert_eq_m128h(r, e);
23657 let r =
23658 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
23659 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23660 assert_eq_m128h(r, e);
23661 }
23662
23663 #[simd_test(enable = "avx512fp16,avx512vl")]
23664 unsafe fn test_mm_reduce_add_ph() {
23665 let a = _mm_set1_ph(2.0);
23666 let r = _mm_reduce_add_ph(a);
23667 assert_eq!(r, 16.0);
23668 }
23669
23670 #[simd_test(enable = "avx512fp16,avx512vl")]
23671 unsafe fn test_mm256_reduce_add_ph() {
23672 let a = _mm256_set1_ph(2.0);
23673 let r = _mm256_reduce_add_ph(a);
23674 assert_eq!(r, 32.0);
23675 }
23676
23677 #[simd_test(enable = "avx512fp16")]
23678 unsafe fn test_mm512_reduce_add_ph() {
23679 let a = _mm512_set1_ph(2.0);
23680 let r = _mm512_reduce_add_ph(a);
23681 assert_eq!(r, 64.0);
23682 }
23683
23684 #[simd_test(enable = "avx512fp16,avx512vl")]
23685 unsafe fn test_mm_reduce_mul_ph() {
23686 let a = _mm_set1_ph(2.0);
23687 let r = _mm_reduce_mul_ph(a);
23688 assert_eq!(r, 256.0);
23689 }
23690
23691 #[simd_test(enable = "avx512fp16,avx512vl")]
23692 unsafe fn test_mm256_reduce_mul_ph() {
23693 let a = _mm256_set1_ph(2.0);
23694 let r = _mm256_reduce_mul_ph(a);
23695 assert_eq!(r, 65536.0);
23696 }
23697
23698 #[simd_test(enable = "avx512fp16")]
23699 unsafe fn test_mm512_reduce_mul_ph() {
23700 let a = _mm512_set1_ph(2.0);
23701 let r = _mm512_reduce_mul_ph(a);
23702 assert_eq!(r, 16777216.0);
23703 }
23704
23705 #[simd_test(enable = "avx512fp16,avx512vl")]
23706 unsafe fn test_mm_reduce_max_ph() {
23707 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23708 let r = _mm_reduce_max_ph(a);
23709 assert_eq!(r, 8.0);
23710 }
23711
23712 #[simd_test(enable = "avx512fp16,avx512vl")]
23713 unsafe fn test_mm256_reduce_max_ph() {
23714 let a = _mm256_set_ph(
23715 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23716 );
23717 let r = _mm256_reduce_max_ph(a);
23718 assert_eq!(r, 16.0);
23719 }
23720
23721 #[simd_test(enable = "avx512fp16")]
23722 unsafe fn test_mm512_reduce_max_ph() {
23723 let a = _mm512_set_ph(
23724 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23725 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23726 31.0, 32.0,
23727 );
23728 let r = _mm512_reduce_max_ph(a);
23729 assert_eq!(r, 32.0);
23730 }
23731
23732 #[simd_test(enable = "avx512fp16,avx512vl")]
23733 unsafe fn test_mm_reduce_min_ph() {
23734 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23735 let r = _mm_reduce_min_ph(a);
23736 assert_eq!(r, 1.0);
23737 }
23738
23739 #[simd_test(enable = "avx512fp16,avx512vl")]
23740 unsafe fn test_mm256_reduce_min_ph() {
23741 let a = _mm256_set_ph(
23742 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23743 );
23744 let r = _mm256_reduce_min_ph(a);
23745 assert_eq!(r, 1.0);
23746 }
23747
23748 #[simd_test(enable = "avx512fp16")]
23749 unsafe fn test_mm512_reduce_min_ph() {
23750 let a = _mm512_set_ph(
23751 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23752 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23753 31.0, 32.0,
23754 );
23755 let r = _mm512_reduce_min_ph(a);
23756 assert_eq!(r, 1.0);
23757 }
23758
23759 #[simd_test(enable = "avx512fp16,avx512vl")]
23760 unsafe fn test_mm_fpclass_ph_mask() {
23761 let a = _mm_set_ph(
23762 1.,
23763 f16::INFINITY,
23764 f16::NEG_INFINITY,
23765 0.0,
23766 -0.0,
23767 -2.0,
23768 f16::NAN,
23769 5.9e-8, // Denormal
23770 );
23771 let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
23772 assert_eq!(r, 0b01100000);
23773 }
23774
23775 #[simd_test(enable = "avx512fp16,avx512vl")]
23776 unsafe fn test_mm_mask_fpclass_ph_mask() {
23777 let a = _mm_set_ph(
23778 1.,
23779 f16::INFINITY,
23780 f16::NEG_INFINITY,
23781 0.0,
23782 -0.0,
23783 -2.0,
23784 f16::NAN,
23785 5.9e-8, // Denormal
23786 );
23787 let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
23788 assert_eq!(r, 0b01000000);
23789 }
23790
23791 #[simd_test(enable = "avx512fp16,avx512vl")]
23792 unsafe fn test_mm256_fpclass_ph_mask() {
23793 let a = _mm256_set_ph(
23794 1.,
23795 f16::INFINITY,
23796 f16::NEG_INFINITY,
23797 0.0,
23798 -0.0,
23799 -2.0,
23800 f16::NAN,
23801 5.9e-8, // Denormal
23802 1.,
23803 f16::INFINITY,
23804 f16::NEG_INFINITY,
23805 0.0,
23806 -0.0,
23807 -2.0,
23808 f16::NAN,
23809 5.9e-8, // Denormal
23810 );
23811 let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
23812 assert_eq!(r, 0b0110000001100000);
23813 }
23814
23815 #[simd_test(enable = "avx512fp16,avx512vl")]
23816 unsafe fn test_mm256_mask_fpclass_ph_mask() {
23817 let a = _mm256_set_ph(
23818 1.,
23819 f16::INFINITY,
23820 f16::NEG_INFINITY,
23821 0.0,
23822 -0.0,
23823 -2.0,
23824 f16::NAN,
23825 5.9e-8, // Denormal
23826 1.,
23827 f16::INFINITY,
23828 f16::NEG_INFINITY,
23829 0.0,
23830 -0.0,
23831 -2.0,
23832 f16::NAN,
23833 5.9e-8, // Denormal
23834 );
23835 let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
23836 assert_eq!(r, 0b0100000001000000);
23837 }
23838
23839 #[simd_test(enable = "avx512fp16")]
23840 unsafe fn test_mm512_fpclass_ph_mask() {
23841 let a = _mm512_set_ph(
23842 1.,
23843 f16::INFINITY,
23844 f16::NEG_INFINITY,
23845 0.0,
23846 -0.0,
23847 -2.0,
23848 f16::NAN,
23849 5.9e-8, // Denormal
23850 1.,
23851 f16::INFINITY,
23852 f16::NEG_INFINITY,
23853 0.0,
23854 -0.0,
23855 -2.0,
23856 f16::NAN,
23857 5.9e-8, // Denormal
23858 1.,
23859 f16::INFINITY,
23860 f16::NEG_INFINITY,
23861 0.0,
23862 -0.0,
23863 -2.0,
23864 f16::NAN,
23865 5.9e-8, // Denormal
23866 1.,
23867 f16::INFINITY,
23868 f16::NEG_INFINITY,
23869 0.0,
23870 -0.0,
23871 -2.0,
23872 f16::NAN,
23873 5.9e-8, // Denormal
23874 );
23875 let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
23876 assert_eq!(r, 0b01100000011000000110000001100000);
23877 }
23878
23879 #[simd_test(enable = "avx512fp16")]
23880 unsafe fn test_mm512_mask_fpclass_ph_mask() {
23881 let a = _mm512_set_ph(
23882 1.,
23883 f16::INFINITY,
23884 f16::NEG_INFINITY,
23885 0.0,
23886 -0.0,
23887 -2.0,
23888 f16::NAN,
23889 5.9e-8, // Denormal
23890 1.,
23891 f16::INFINITY,
23892 f16::NEG_INFINITY,
23893 0.0,
23894 -0.0,
23895 -2.0,
23896 f16::NAN,
23897 5.9e-8, // Denormal
23898 1.,
23899 f16::INFINITY,
23900 f16::NEG_INFINITY,
23901 0.0,
23902 -0.0,
23903 -2.0,
23904 f16::NAN,
23905 5.9e-8, // Denormal
23906 1.,
23907 f16::INFINITY,
23908 f16::NEG_INFINITY,
23909 0.0,
23910 -0.0,
23911 -2.0,
23912 f16::NAN,
23913 5.9e-8, // Denormal
23914 );
23915 let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
23916 assert_eq!(r, 0b01000000010000000100000001000000);
23917 }
23918
23919 #[simd_test(enable = "avx512fp16")]
23920 unsafe fn test_mm_fpclass_sh_mask() {
23921 let a = _mm_set_sh(f16::INFINITY);
23922 let r = _mm_fpclass_sh_mask::<0x18>(a);
23923 assert_eq!(r, 1);
23924 }
23925
23926 #[simd_test(enable = "avx512fp16")]
23927 unsafe fn test_mm_mask_fpclass_sh_mask() {
23928 let a = _mm_set_sh(f16::INFINITY);
23929 let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
23930 assert_eq!(r, 0);
23931 let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
23932 assert_eq!(r, 1);
23933 }
23934
23935 #[simd_test(enable = "avx512fp16,avx512vl")]
23936 unsafe fn test_mm_mask_blend_ph() {
23937 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23938 let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
23939 let r = _mm_mask_blend_ph(0b01010101, a, b);
23940 let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
23941 assert_eq_m128h(r, e);
23942 }
23943
23944 #[simd_test(enable = "avx512fp16,avx512vl")]
23945 unsafe fn test_mm256_mask_blend_ph() {
23946 let a = _mm256_set_ph(
23947 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23948 );
23949 let b = _mm256_set_ph(
23950 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
23951 -14.0, -15.0, -16.0,
23952 );
23953 let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
23954 let e = _mm256_set_ph(
23955 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
23956 -16.0,
23957 );
23958 assert_eq_m256h(r, e);
23959 }
23960
23961 #[simd_test(enable = "avx512fp16")]
23962 unsafe fn test_mm512_mask_blend_ph() {
23963 let a = _mm512_set_ph(
23964 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23965 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23966 31.0, 32.0,
23967 );
23968 let b = _mm512_set_ph(
23969 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
23970 -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
23971 -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
23972 );
23973 let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
23974 let e = _mm512_set_ph(
23975 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
23976 -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
23977 29.0, -30.0, 31.0, -32.0,
23978 );
23979 assert_eq_m512h(r, e);
23980 }
23981
23982 #[simd_test(enable = "avx512fp16,avx512vl")]
23983 unsafe fn test_mm_permutex2var_ph() {
23984 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23985 let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
23986 let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
23987 let r = _mm_permutex2var_ph(a, idx, b);
23988 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
23989 assert_eq_m128h(r, e);
23990 }
23991
23992 #[simd_test(enable = "avx512fp16,avx512vl")]
23993 unsafe fn test_mm256_permutex2var_ph() {
23994 let a = _mm256_setr_ph(
23995 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23996 );
23997 let b = _mm256_setr_ph(
23998 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23999 31.0, 32.0,
24000 );
24001 let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
24002 let r = _mm256_permutex2var_ph(a, idx, b);
24003 let e = _mm256_setr_ph(
24004 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24005 31.0,
24006 );
24007 assert_eq_m256h(r, e);
24008 }
24009
24010 #[simd_test(enable = "avx512fp16")]
24011 unsafe fn test_mm512_permutex2var_ph() {
24012 let a = _mm512_setr_ph(
24013 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24014 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24015 31.0, 32.0,
24016 );
24017 let b = _mm512_setr_ph(
24018 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
24019 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
24020 61.0, 62.0, 63.0, 64.0,
24021 );
24022 let idx = _mm512_set_epi16(
24023 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
24024 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
24025 );
24026 let r = _mm512_permutex2var_ph(a, idx, b);
24027 let e = _mm512_setr_ph(
24028 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24029 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
24030 59.0, 61.0, 63.0,
24031 );
24032 assert_eq_m512h(r, e);
24033 }
24034
24035 #[simd_test(enable = "avx512fp16,avx512vl")]
24036 unsafe fn test_mm_permutexvar_ph() {
24037 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24038 let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
24039 let r = _mm_permutexvar_ph(idx, a);
24040 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
24041 assert_eq_m128h(r, e);
24042 }
24043
24044 #[simd_test(enable = "avx512fp16,avx512vl")]
24045 unsafe fn test_mm256_permutexvar_ph() {
24046 let a = _mm256_set_ph(
24047 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24048 );
24049 let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
24050 let r = _mm256_permutexvar_ph(idx, a);
24051 let e = _mm256_setr_ph(
24052 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
24053 );
24054 assert_eq_m256h(r, e);
24055 }
24056
24057 #[simd_test(enable = "avx512fp16")]
24058 unsafe fn test_mm512_permutexvar_ph() {
24059 let a = _mm512_set_ph(
24060 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24061 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24062 31.0, 32.0,
24063 );
24064 let idx = _mm512_set_epi16(
24065 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
24066 17, 19, 21, 23, 25, 27, 29, 31,
24067 );
24068 let r = _mm512_permutexvar_ph(idx, a);
24069 let e = _mm512_setr_ph(
24070 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24071 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
24072 30.0, 32.0,
24073 );
24074 assert_eq_m512h(r, e);
24075 }
24076
24077 #[simd_test(enable = "avx512fp16,avx512vl")]
24078 unsafe fn test_mm_cvtepi16_ph() {
24079 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24080 let r = _mm_cvtepi16_ph(a);
24081 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24082 assert_eq_m128h(r, e);
24083 }
24084
24085 #[simd_test(enable = "avx512fp16,avx512vl")]
24086 unsafe fn test_mm_mask_cvtepi16_ph() {
24087 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24088 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24089 let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
24090 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24091 assert_eq_m128h(r, e);
24092 }
24093
24094 #[simd_test(enable = "avx512fp16,avx512vl")]
24095 unsafe fn test_mm_maskz_cvtepi16_ph() {
24096 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24097 let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
24098 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24099 assert_eq_m128h(r, e);
24100 }
24101
24102 #[simd_test(enable = "avx512fp16,avx512vl")]
24103 unsafe fn test_mm256_cvtepi16_ph() {
24104 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24105 let r = _mm256_cvtepi16_ph(a);
24106 let e = _mm256_set_ph(
24107 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24108 );
24109 assert_eq_m256h(r, e);
24110 }
24111
24112 #[simd_test(enable = "avx512fp16,avx512vl")]
24113 unsafe fn test_mm256_mask_cvtepi16_ph() {
24114 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24115 let src = _mm256_set_ph(
24116 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24117 );
24118 let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a);
24119 let e = _mm256_set_ph(
24120 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24121 );
24122 assert_eq_m256h(r, e);
24123 }
24124
24125 #[simd_test(enable = "avx512fp16,avx512vl")]
24126 unsafe fn test_mm256_maskz_cvtepi16_ph() {
24127 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24128 let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
24129 let e = _mm256_set_ph(
24130 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24131 );
24132 assert_eq_m256h(r, e);
24133 }
24134
24135 #[simd_test(enable = "avx512fp16")]
24136 unsafe fn test_mm512_cvtepi16_ph() {
24137 let a = _mm512_set_epi16(
24138 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24139 25, 26, 27, 28, 29, 30, 31, 32,
24140 );
24141 let r = _mm512_cvtepi16_ph(a);
24142 let e = _mm512_set_ph(
24143 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24144 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24145 31.0, 32.0,
24146 );
24147 assert_eq_m512h(r, e);
24148 }
24149
24150 #[simd_test(enable = "avx512fp16")]
24151 unsafe fn test_mm512_mask_cvtepi16_ph() {
24152 let a = _mm512_set_epi16(
24153 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24154 25, 26, 27, 28, 29, 30, 31, 32,
24155 );
24156 let src = _mm512_set_ph(
24157 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24158 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24159 );
24160 let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a);
24161 let e = _mm512_set_ph(
24162 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24163 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24164 );
24165 assert_eq_m512h(r, e);
24166 }
24167
24168 #[simd_test(enable = "avx512fp16")]
24169 unsafe fn test_mm512_maskz_cvtepi16_ph() {
24170 let a = _mm512_set_epi16(
24171 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24172 25, 26, 27, 28, 29, 30, 31, 32,
24173 );
24174 let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a);
24175 let e = _mm512_set_ph(
24176 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24177 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24178 );
24179 assert_eq_m512h(r, e);
24180 }
24181
24182 #[simd_test(enable = "avx512fp16")]
24183 unsafe fn test_mm512_cvt_roundepi16_ph() {
24184 let a = _mm512_set_epi16(
24185 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24186 25, 26, 27, 28, 29, 30, 31, 32,
24187 );
24188 let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24189 let e = _mm512_set_ph(
24190 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24191 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24192 31.0, 32.0,
24193 );
24194 assert_eq_m512h(r, e);
24195 }
24196
24197 #[simd_test(enable = "avx512fp16")]
24198 unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
24199 let a = _mm512_set_epi16(
24200 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24201 25, 26, 27, 28, 29, 30, 31, 32,
24202 );
24203 let src = _mm512_set_ph(
24204 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24205 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24206 );
24207 let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24208 src,
24209 0b01010101010101010101010101010101,
24210 a,
24211 );
24212 let e = _mm512_set_ph(
24213 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24214 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24215 );
24216 assert_eq_m512h(r, e);
24217 }
24218
24219 #[simd_test(enable = "avx512fp16")]
24220 unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
24221 let a = _mm512_set_epi16(
24222 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24223 25, 26, 27, 28, 29, 30, 31, 32,
24224 );
24225 let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24226 0b01010101010101010101010101010101,
24227 a,
24228 );
24229 let e = _mm512_set_ph(
24230 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24231 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24232 );
24233 assert_eq_m512h(r, e);
24234 }
24235
24236 #[simd_test(enable = "avx512fp16,avx512vl")]
24237 unsafe fn test_mm_cvtepu16_ph() {
24238 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24239 let r = _mm_cvtepu16_ph(a);
24240 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24241 assert_eq_m128h(r, e);
24242 }
24243
24244 #[simd_test(enable = "avx512fp16,avx512vl")]
24245 unsafe fn test_mm_mask_cvtepu16_ph() {
24246 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24247 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24248 let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
24249 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24250 assert_eq_m128h(r, e);
24251 }
24252
24253 #[simd_test(enable = "avx512fp16,avx512vl")]
24254 unsafe fn test_mm_maskz_cvtepu16_ph() {
24255 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24256 let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
24257 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24258 assert_eq_m128h(r, e);
24259 }
24260
24261 #[simd_test(enable = "avx512fp16,avx512vl")]
24262 unsafe fn test_mm256_cvtepu16_ph() {
24263 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24264 let r = _mm256_cvtepu16_ph(a);
24265 let e = _mm256_set_ph(
24266 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24267 );
24268 assert_eq_m256h(r, e);
24269 }
24270
24271 #[simd_test(enable = "avx512fp16,avx512vl")]
24272 unsafe fn test_mm256_mask_cvtepu16_ph() {
24273 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24274 let src = _mm256_set_ph(
24275 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24276 );
24277 let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a);
24278 let e = _mm256_set_ph(
24279 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24280 );
24281 assert_eq_m256h(r, e);
24282 }
24283
24284 #[simd_test(enable = "avx512fp16,avx512vl")]
24285 unsafe fn test_mm256_maskz_cvtepu16_ph() {
24286 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24287 let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
24288 let e = _mm256_set_ph(
24289 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24290 );
24291 assert_eq_m256h(r, e);
24292 }
24293
24294 #[simd_test(enable = "avx512fp16")]
24295 unsafe fn test_mm512_cvtepu16_ph() {
24296 let a = _mm512_set_epi16(
24297 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24298 25, 26, 27, 28, 29, 30, 31, 32,
24299 );
24300 let r = _mm512_cvtepu16_ph(a);
24301 let e = _mm512_set_ph(
24302 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24303 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24304 31.0, 32.0,
24305 );
24306 assert_eq_m512h(r, e);
24307 }
24308
24309 #[simd_test(enable = "avx512fp16")]
24310 unsafe fn test_mm512_mask_cvtepu16_ph() {
24311 let a = _mm512_set_epi16(
24312 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24313 25, 26, 27, 28, 29, 30, 31, 32,
24314 );
24315 let src = _mm512_set_ph(
24316 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24317 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24318 );
24319 let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a);
24320 let e = _mm512_set_ph(
24321 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24322 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24323 );
24324 assert_eq_m512h(r, e);
24325 }
24326
24327 #[simd_test(enable = "avx512fp16")]
24328 unsafe fn test_mm512_maskz_cvtepu16_ph() {
24329 let a = _mm512_set_epi16(
24330 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24331 25, 26, 27, 28, 29, 30, 31, 32,
24332 );
24333 let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a);
24334 let e = _mm512_set_ph(
24335 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24336 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24337 );
24338 assert_eq_m512h(r, e);
24339 }
24340
24341 #[simd_test(enable = "avx512fp16")]
24342 unsafe fn test_mm512_cvt_roundepu16_ph() {
24343 let a = _mm512_set_epi16(
24344 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24345 25, 26, 27, 28, 29, 30, 31, 32,
24346 );
24347 let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24348 let e = _mm512_set_ph(
24349 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24350 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24351 31.0, 32.0,
24352 );
24353 assert_eq_m512h(r, e);
24354 }
24355
24356 #[simd_test(enable = "avx512fp16")]
24357 unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
24358 let a = _mm512_set_epi16(
24359 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24360 25, 26, 27, 28, 29, 30, 31, 32,
24361 );
24362 let src = _mm512_set_ph(
24363 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24364 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24365 );
24366 let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24367 src,
24368 0b01010101010101010101010101010101,
24369 a,
24370 );
24371 let e = _mm512_set_ph(
24372 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24373 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24374 );
24375 assert_eq_m512h(r, e);
24376 }
24377
24378 #[simd_test(enable = "avx512fp16")]
24379 unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
24380 let a = _mm512_set_epi16(
24381 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24382 25, 26, 27, 28, 29, 30, 31, 32,
24383 );
24384 let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24385 0b01010101010101010101010101010101,
24386 a,
24387 );
24388 let e = _mm512_set_ph(
24389 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24390 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24391 );
24392 assert_eq_m512h(r, e);
24393 }
24394
24395 #[simd_test(enable = "avx512fp16,avx512vl")]
24396 unsafe fn test_mm_cvtepi32_ph() {
24397 let a = _mm_set_epi32(1, 2, 3, 4);
24398 let r = _mm_cvtepi32_ph(a);
24399 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24400 assert_eq_m128h(r, e);
24401 }
24402
24403 #[simd_test(enable = "avx512fp16,avx512vl")]
24404 unsafe fn test_mm_mask_cvtepi32_ph() {
24405 let a = _mm_set_epi32(1, 2, 3, 4);
24406 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24407 let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
24408 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24409 assert_eq_m128h(r, e);
24410 }
24411
24412 #[simd_test(enable = "avx512fp16,avx512vl")]
24413 unsafe fn test_mm_maskz_cvtepi32_ph() {
24414 let a = _mm_set_epi32(1, 2, 3, 4);
24415 let r = _mm_maskz_cvtepi32_ph(0b0101, a);
24416 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24417 assert_eq_m128h(r, e);
24418 }
24419
24420 #[simd_test(enable = "avx512fp16,avx512vl")]
24421 unsafe fn test_mm256_cvtepi32_ph() {
24422 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24423 let r = _mm256_cvtepi32_ph(a);
24424 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24425 assert_eq_m128h(r, e);
24426 }
24427
24428 #[simd_test(enable = "avx512fp16,avx512vl")]
24429 unsafe fn test_mm256_mask_cvtepi32_ph() {
24430 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24431 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24432 let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
24433 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24434 assert_eq_m128h(r, e);
24435 }
24436
24437 #[simd_test(enable = "avx512fp16,avx512vl")]
24438 unsafe fn test_mm256_maskz_cvtepi32_ph() {
24439 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24440 let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
24441 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24442 assert_eq_m128h(r, e);
24443 }
24444
24445 #[simd_test(enable = "avx512fp16")]
24446 unsafe fn test_mm512_cvtepi32_ph() {
24447 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24448 let r = _mm512_cvtepi32_ph(a);
24449 let e = _mm256_set_ph(
24450 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24451 );
24452 assert_eq_m256h(r, e);
24453 }
24454
24455 #[simd_test(enable = "avx512fp16")]
24456 unsafe fn test_mm512_mask_cvtepi32_ph() {
24457 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24458 let src = _mm256_set_ph(
24459 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24460 );
24461 let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a);
24462 let e = _mm256_set_ph(
24463 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24464 );
24465 assert_eq_m256h(r, e);
24466 }
24467
24468 #[simd_test(enable = "avx512fp16")]
24469 unsafe fn test_mm512_maskz_cvtepi32_ph() {
24470 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24471 let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
24472 let e = _mm256_set_ph(
24473 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24474 );
24475 assert_eq_m256h(r, e);
24476 }
24477
24478 #[simd_test(enable = "avx512fp16")]
24479 unsafe fn test_mm512_cvt_roundepi32_ph() {
24480 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24481 let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24482 let e = _mm256_set_ph(
24483 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24484 );
24485 assert_eq_m256h(r, e);
24486 }
24487
24488 #[simd_test(enable = "avx512fp16")]
24489 unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
24490 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24491 let src = _mm256_set_ph(
24492 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24493 );
24494 let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24495 src,
24496 0b0101010101010101,
24497 a,
24498 );
24499 let e = _mm256_set_ph(
24500 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24501 );
24502 assert_eq_m256h(r, e);
24503 }
24504
24505 #[simd_test(enable = "avx512fp16")]
24506 unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
24507 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24508 let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24509 0b0101010101010101,
24510 a,
24511 );
24512 let e = _mm256_set_ph(
24513 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24514 );
24515 assert_eq_m256h(r, e);
24516 }
24517
24518 #[simd_test(enable = "avx512fp16")]
24519 unsafe fn test_mm_cvti32_sh() {
24520 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24521 let r = _mm_cvti32_sh(a, 10);
24522 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24523 assert_eq_m128h(r, e);
24524 }
24525
24526 #[simd_test(enable = "avx512fp16")]
24527 unsafe fn test_mm_cvt_roundi32_sh() {
24528 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24529 let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24530 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24531 assert_eq_m128h(r, e);
24532 }
24533
24534 #[simd_test(enable = "avx512fp16,avx512vl")]
24535 unsafe fn test_mm_cvtepu32_ph() {
24536 let a = _mm_set_epi32(1, 2, 3, 4);
24537 let r = _mm_cvtepu32_ph(a);
24538 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24539 assert_eq_m128h(r, e);
24540 }
24541
24542 #[simd_test(enable = "avx512fp16,avx512vl")]
24543 unsafe fn test_mm_mask_cvtepu32_ph() {
24544 let a = _mm_set_epi32(1, 2, 3, 4);
24545 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24546 let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
24547 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24548 assert_eq_m128h(r, e);
24549 }
24550
24551 #[simd_test(enable = "avx512fp16,avx512vl")]
24552 unsafe fn test_mm_maskz_cvtepu32_ph() {
24553 let a = _mm_set_epi32(1, 2, 3, 4);
24554 let r = _mm_maskz_cvtepu32_ph(0b0101, a);
24555 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24556 assert_eq_m128h(r, e);
24557 }
24558
24559 #[simd_test(enable = "avx512fp16,avx512vl")]
24560 unsafe fn test_mm256_cvtepu32_ph() {
24561 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24562 let r = _mm256_cvtepu32_ph(a);
24563 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24564 assert_eq_m128h(r, e);
24565 }
24566
24567 #[simd_test(enable = "avx512fp16,avx512vl")]
24568 unsafe fn test_mm256_mask_cvtepu32_ph() {
24569 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24570 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24571 let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
24572 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24573 assert_eq_m128h(r, e);
24574 }
24575
24576 #[simd_test(enable = "avx512fp16,avx512vl")]
24577 unsafe fn test_mm256_maskz_cvtepu32_ph() {
24578 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24579 let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
24580 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24581 assert_eq_m128h(r, e);
24582 }
24583
24584 #[simd_test(enable = "avx512fp16")]
24585 unsafe fn test_mm512_cvtepu32_ph() {
24586 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24587 let r = _mm512_cvtepu32_ph(a);
24588 let e = _mm256_set_ph(
24589 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24590 );
24591 assert_eq_m256h(r, e);
24592 }
24593
24594 #[simd_test(enable = "avx512fp16")]
24595 unsafe fn test_mm512_mask_cvtepu32_ph() {
24596 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24597 let src = _mm256_set_ph(
24598 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24599 );
24600 let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a);
24601 let e = _mm256_set_ph(
24602 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24603 );
24604 assert_eq_m256h(r, e);
24605 }
24606
24607 #[simd_test(enable = "avx512fp16")]
24608 unsafe fn test_mm512_maskz_cvtepu32_ph() {
24609 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24610 let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
24611 let e = _mm256_set_ph(
24612 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24613 );
24614 assert_eq_m256h(r, e);
24615 }
24616
24617 #[simd_test(enable = "avx512fp16")]
24618 unsafe fn test_mm512_cvt_roundepu32_ph() {
24619 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24620 let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24621 let e = _mm256_set_ph(
24622 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24623 );
24624 assert_eq_m256h(r, e);
24625 }
24626
24627 #[simd_test(enable = "avx512fp16")]
24628 unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
24629 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24630 let src = _mm256_set_ph(
24631 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24632 );
24633 let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24634 src,
24635 0b0101010101010101,
24636 a,
24637 );
24638 let e = _mm256_set_ph(
24639 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24640 16.0,
24641 );
24642 assert_eq_m256h(r, e);
24643 }
24644
24645 #[simd_test(enable = "avx512fp16")]
24646 unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
24647 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24648 let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24649 0b0101010101010101,
24650 a,
24651 );
24652 let e = _mm256_set_ph(
24653 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24654 );
24655 assert_eq_m256h(r, e);
24656 }
24657
24658 #[simd_test(enable = "avx512fp16")]
24659 unsafe fn test_mm_cvtu32_sh() {
24660 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24661 let r = _mm_cvtu32_sh(a, 10);
24662 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24663 assert_eq_m128h(r, e);
24664 }
24665
24666 #[simd_test(enable = "avx512fp16")]
24667 unsafe fn test_mm_cvt_roundu32_sh() {
24668 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24669 let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24670 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24671 assert_eq_m128h(r, e);
24672 }
24673
24674 #[simd_test(enable = "avx512fp16,avx512vl")]
24675 unsafe fn test_mm_cvtepi64_ph() {
24676 let a = _mm_set_epi64x(1, 2);
24677 let r = _mm_cvtepi64_ph(a);
24678 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24679 assert_eq_m128h(r, e);
24680 }
24681
24682 #[simd_test(enable = "avx512fp16,avx512vl")]
24683 unsafe fn test_mm_mask_cvtepi64_ph() {
24684 let a = _mm_set_epi64x(1, 2);
24685 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24686 let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
24687 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24688 assert_eq_m128h(r, e);
24689 }
24690
24691 #[simd_test(enable = "avx512fp16,avx512vl")]
24692 unsafe fn test_mm_maskz_cvtepi64_ph() {
24693 let a = _mm_set_epi64x(1, 2);
24694 let r = _mm_maskz_cvtepi64_ph(0b01, a);
24695 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
24696 assert_eq_m128h(r, e);
24697 }
24698
24699 #[simd_test(enable = "avx512fp16,avx512vl")]
24700 unsafe fn test_mm256_cvtepi64_ph() {
24701 let a = _mm256_set_epi64x(1, 2, 3, 4);
24702 let r = _mm256_cvtepi64_ph(a);
24703 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24704 assert_eq_m128h(r, e);
24705 }
24706
24707 #[simd_test(enable = "avx512fp16,avx512vl")]
24708 unsafe fn test_mm256_mask_cvtepi64_ph() {
24709 let a = _mm256_set_epi64x(1, 2, 3, 4);
24710 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24711 let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
24712 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24713 assert_eq_m128h(r, e);
24714 }
24715
24716 #[simd_test(enable = "avx512fp16,avx512vl")]
24717 unsafe fn test_mm256_maskz_cvtepi64_ph() {
24718 let a = _mm256_set_epi64x(1, 2, 3, 4);
24719 let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
24720 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24721 assert_eq_m128h(r, e);
24722 }
24723
24724 #[simd_test(enable = "avx512fp16")]
24725 unsafe fn test_mm512_cvtepi64_ph() {
24726 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24727 let r = _mm512_cvtepi64_ph(a);
24728 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24729 assert_eq_m128h(r, e);
24730 }
24731
24732 #[simd_test(enable = "avx512fp16")]
24733 unsafe fn test_mm512_mask_cvtepi64_ph() {
24734 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24735 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24736 let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
24737 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24738 assert_eq_m128h(r, e);
24739 }
24740
24741 #[simd_test(enable = "avx512fp16")]
24742 unsafe fn test_mm512_maskz_cvtepi64_ph() {
24743 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24744 let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
24745 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24746 assert_eq_m128h(r, e);
24747 }
24748
24749 #[simd_test(enable = "avx512fp16")]
24750 unsafe fn test_mm512_cvt_roundepi64_ph() {
24751 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24752 let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24753 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24754 assert_eq_m128h(r, e);
24755 }
24756
24757 #[simd_test(enable = "avx512fp16")]
24758 unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
24759 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24760 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24761 let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24762 src, 0b01010101, a,
24763 );
24764 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24765 assert_eq_m128h(r, e);
24766 }
24767
24768 #[simd_test(enable = "avx512fp16")]
24769 unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
24770 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24771 let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24772 0b01010101, a,
24773 );
24774 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24775 assert_eq_m128h(r, e);
24776 }
24777
24778 #[simd_test(enable = "avx512fp16,avx512vl")]
24779 unsafe fn test_mm_cvtepu64_ph() {
24780 let a = _mm_set_epi64x(1, 2);
24781 let r = _mm_cvtepu64_ph(a);
24782 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24783 assert_eq_m128h(r, e);
24784 }
24785
24786 #[simd_test(enable = "avx512fp16,avx512vl")]
24787 unsafe fn test_mm_mask_cvtepu64_ph() {
24788 let a = _mm_set_epi64x(1, 2);
24789 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24790 let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
24791 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24792 assert_eq_m128h(r, e);
24793 }
24794
24795 #[simd_test(enable = "avx512fp16,avx512vl")]
24796 unsafe fn test_mm_maskz_cvtepu64_ph() {
24797 let a = _mm_set_epi64x(1, 2);
24798 let r = _mm_maskz_cvtepu64_ph(0b01, a);
24799 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
24800 assert_eq_m128h(r, e);
24801 }
24802
24803 #[simd_test(enable = "avx512fp16,avx512vl")]
24804 unsafe fn test_mm256_cvtepu64_ph() {
24805 let a = _mm256_set_epi64x(1, 2, 3, 4);
24806 let r = _mm256_cvtepu64_ph(a);
24807 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24808 assert_eq_m128h(r, e);
24809 }
24810
24811 #[simd_test(enable = "avx512fp16,avx512vl")]
24812 unsafe fn test_mm256_mask_cvtepu64_ph() {
24813 let a = _mm256_set_epi64x(1, 2, 3, 4);
24814 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24815 let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
24816 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24817 assert_eq_m128h(r, e);
24818 }
24819
24820 #[simd_test(enable = "avx512fp16,avx512vl")]
24821 unsafe fn test_mm256_maskz_cvtepu64_ph() {
24822 let a = _mm256_set_epi64x(1, 2, 3, 4);
24823 let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
24824 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24825 assert_eq_m128h(r, e);
24826 }
24827
24828 #[simd_test(enable = "avx512fp16")]
24829 unsafe fn test_mm512_cvtepu64_ph() {
24830 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24831 let r = _mm512_cvtepu64_ph(a);
24832 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24833 assert_eq_m128h(r, e);
24834 }
24835
24836 #[simd_test(enable = "avx512fp16")]
24837 unsafe fn test_mm512_mask_cvtepu64_ph() {
24838 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24839 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24840 let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
24841 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24842 assert_eq_m128h(r, e);
24843 }
24844
24845 #[simd_test(enable = "avx512fp16")]
24846 unsafe fn test_mm512_maskz_cvtepu64_ph() {
24847 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24848 let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
24849 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24850 assert_eq_m128h(r, e);
24851 }
24852
24853 #[simd_test(enable = "avx512fp16")]
24854 unsafe fn test_mm512_cvt_roundepu64_ph() {
24855 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24856 let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24857 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24858 assert_eq_m128h(r, e);
24859 }
24860
24861 #[simd_test(enable = "avx512fp16")]
24862 unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
24863 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24864 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24865 let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24866 src, 0b01010101, a,
24867 );
24868 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24869 assert_eq_m128h(r, e);
24870 }
24871
24872 #[simd_test(enable = "avx512fp16")]
24873 unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
24874 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24875 let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24876 0b01010101, a,
24877 );
24878 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24879 assert_eq_m128h(r, e);
24880 }
24881
24882 #[simd_test(enable = "avx512fp16,avx512vl")]
24883 unsafe fn test_mm_cvtxps_ph() {
24884 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24885 let r = _mm_cvtxps_ph(a);
24886 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24887 assert_eq_m128h(r, e);
24888 }
24889
24890 #[simd_test(enable = "avx512fp16,avx512vl")]
24891 unsafe fn test_mm_mask_cvtxps_ph() {
24892 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24893 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24894 let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
24895 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0);
24896 assert_eq_m128h(r, e);
24897 }
24898
24899 #[simd_test(enable = "avx512fp16,avx512vl")]
24900 unsafe fn test_mm_maskz_cvtxps_ph() {
24901 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24902 let r = _mm_maskz_cvtxps_ph(0b0101, a);
24903 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24904 assert_eq_m128h(r, e);
24905 }
24906
24907 #[simd_test(enable = "avx512fp16,avx512vl")]
24908 unsafe fn test_mm256_cvtxps_ph() {
24909 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24910 let r = _mm256_cvtxps_ph(a);
24911 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24912 assert_eq_m128h(r, e);
24913 }
24914
24915 #[simd_test(enable = "avx512fp16,avx512vl")]
24916 unsafe fn test_mm256_mask_cvtxps_ph() {
24917 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24918 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24919 let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
24920 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24921 assert_eq_m128h(r, e);
24922 }
24923
24924 #[simd_test(enable = "avx512fp16,avx512vl")]
24925 unsafe fn test_mm256_maskz_cvtxps_ph() {
24926 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24927 let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
24928 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24929 assert_eq_m128h(r, e);
24930 }
24931
24932 #[simd_test(enable = "avx512fp16")]
24933 unsafe fn test_mm512_cvtxps_ph() {
24934 let a = _mm512_set_ps(
24935 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24936 );
24937 let r = _mm512_cvtxps_ph(a);
24938 let e = _mm256_set_ph(
24939 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24940 );
24941 assert_eq_m256h(r, e);
24942 }
24943
24944 #[simd_test(enable = "avx512fp16")]
24945 unsafe fn test_mm512_mask_cvtxps_ph() {
24946 let a = _mm512_set_ps(
24947 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24948 );
24949 let src = _mm256_set_ph(
24950 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24951 );
24952 let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a);
24953 let e = _mm256_set_ph(
24954 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24955 );
24956 assert_eq_m256h(r, e);
24957 }
24958
24959 #[simd_test(enable = "avx512fp16")]
24960 unsafe fn test_mm512_maskz_cvtxps_ph() {
24961 let a = _mm512_set_ps(
24962 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24963 );
24964 let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a);
24965 let e = _mm256_set_ph(
24966 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24967 );
24968 assert_eq_m256h(r, e);
24969 }
24970
24971 #[simd_test(enable = "avx512fp16")]
24972 unsafe fn test_mm512_cvtx_roundps_ph() {
24973 let a = _mm512_set_ps(
24974 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24975 );
24976 let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24977 let e = _mm256_set_ph(
24978 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24979 );
24980 assert_eq_m256h(r, e);
24981 }
24982
24983 #[simd_test(enable = "avx512fp16")]
24984 unsafe fn test_mm512_mask_cvtx_roundps_ph() {
24985 let a = _mm512_set_ps(
24986 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24987 );
24988 let src = _mm256_set_ph(
24989 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24990 );
24991 let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24992 src,
24993 0b0101010101010101,
24994 a,
24995 );
24996 let e = _mm256_set_ph(
24997 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24998 16.0,
24999 );
25000 assert_eq_m256h(r, e);
25001 }
25002
25003 #[simd_test(enable = "avx512fp16")]
25004 unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
25005 let a = _mm512_set_ps(
25006 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25007 );
25008 let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25009 0b0101010101010101,
25010 a,
25011 );
25012 let e = _mm256_set_ph(
25013 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25014 );
25015 assert_eq_m256h(r, e);
25016 }
25017
25018 #[simd_test(enable = "avx512fp16")]
25019 unsafe fn test_mm_cvtss_sh() {
25020 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25021 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25022 let r = _mm_cvtss_sh(a, b);
25023 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25024 assert_eq_m128h(r, e);
25025 }
25026
25027 #[simd_test(enable = "avx512fp16")]
25028 unsafe fn test_mm_mask_cvtss_sh() {
25029 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25030 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25031 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25032 let r = _mm_mask_cvtss_sh(src, 0, a, b);
25033 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25034 assert_eq_m128h(r, e);
25035 let r = _mm_mask_cvtss_sh(src, 1, a, b);
25036 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25037 assert_eq_m128h(r, e);
25038 }
25039
25040 #[simd_test(enable = "avx512fp16")]
25041 unsafe fn test_mm_maskz_cvtss_sh() {
25042 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25043 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25044 let r = _mm_maskz_cvtss_sh(0, a, b);
25045 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25046 assert_eq_m128h(r, e);
25047 let r = _mm_maskz_cvtss_sh(1, a, b);
25048 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25049 assert_eq_m128h(r, e);
25050 }
25051
25052 #[simd_test(enable = "avx512fp16")]
25053 unsafe fn test_mm_cvt_roundss_sh() {
25054 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25055 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25056 let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25057 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25058 assert_eq_m128h(r, e);
25059 }
25060
25061 #[simd_test(enable = "avx512fp16")]
25062 unsafe fn test_mm_mask_cvt_roundss_sh() {
25063 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25064 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25065 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25066 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25067 src, 0, a, b,
25068 );
25069 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25070 assert_eq_m128h(r, e);
25071 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25072 src, 1, a, b,
25073 );
25074 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25075 assert_eq_m128h(r, e);
25076 }
25077
25078 #[simd_test(enable = "avx512fp16")]
25079 unsafe fn test_mm_maskz_cvt_roundss_sh() {
25080 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25081 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25082 let r =
25083 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25084 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25085 assert_eq_m128h(r, e);
25086 let r =
25087 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25088 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25089 assert_eq_m128h(r, e);
25090 }
25091
25092 #[simd_test(enable = "avx512fp16,avx512vl")]
25093 unsafe fn test_mm_cvtpd_ph() {
25094 let a = _mm_set_pd(1.0, 2.0);
25095 let r = _mm_cvtpd_ph(a);
25096 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
25097 assert_eq_m128h(r, e);
25098 }
25099
25100 #[simd_test(enable = "avx512fp16,avx512vl")]
25101 unsafe fn test_mm_mask_cvtpd_ph() {
25102 let a = _mm_set_pd(1.0, 2.0);
25103 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25104 let r = _mm_mask_cvtpd_ph(src, 0b01, a);
25105 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25106 assert_eq_m128h(r, e);
25107 }
25108
25109 #[simd_test(enable = "avx512fp16,avx512vl")]
25110 unsafe fn test_mm_maskz_cvtpd_ph() {
25111 let a = _mm_set_pd(1.0, 2.0);
25112 let r = _mm_maskz_cvtpd_ph(0b01, a);
25113 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
25114 assert_eq_m128h(r, e);
25115 }
25116
25117 #[simd_test(enable = "avx512fp16,avx512vl")]
25118 unsafe fn test_mm256_cvtpd_ph() {
25119 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25120 let r = _mm256_cvtpd_ph(a);
25121 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25122 assert_eq_m128h(r, e);
25123 }
25124
25125 #[simd_test(enable = "avx512fp16,avx512vl")]
25126 unsafe fn test_mm256_mask_cvtpd_ph() {
25127 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25128 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25129 let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
25130 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25131 assert_eq_m128h(r, e);
25132 }
25133
25134 #[simd_test(enable = "avx512fp16,avx512vl")]
25135 unsafe fn test_mm256_maskz_cvtpd_ph() {
25136 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25137 let r = _mm256_maskz_cvtpd_ph(0b0101, a);
25138 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25139 assert_eq_m128h(r, e);
25140 }
25141
25142 #[simd_test(enable = "avx512fp16")]
25143 unsafe fn test_mm512_cvtpd_ph() {
25144 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25145 let r = _mm512_cvtpd_ph(a);
25146 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25147 assert_eq_m128h(r, e);
25148 }
25149
25150 #[simd_test(enable = "avx512fp16")]
25151 unsafe fn test_mm512_mask_cvtpd_ph() {
25152 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25153 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25154 let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
25155 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25156 assert_eq_m128h(r, e);
25157 }
25158
25159 #[simd_test(enable = "avx512fp16")]
25160 unsafe fn test_mm512_maskz_cvtpd_ph() {
25161 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25162 let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
25163 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25164 assert_eq_m128h(r, e);
25165 }
25166
25167 #[simd_test(enable = "avx512fp16")]
25168 unsafe fn test_mm512_cvt_roundpd_ph() {
25169 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25170 let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25171 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25172 assert_eq_m128h(r, e);
25173 }
25174
25175 #[simd_test(enable = "avx512fp16")]
25176 unsafe fn test_mm512_mask_cvt_roundpd_ph() {
25177 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25178 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25179 let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25180 src, 0b01010101, a,
25181 );
25182 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25183 assert_eq_m128h(r, e);
25184 }
25185
25186 #[simd_test(enable = "avx512fp16")]
25187 unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
25188 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25189 let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25190 0b01010101, a,
25191 );
25192 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25193 assert_eq_m128h(r, e);
25194 }
25195
25196 #[simd_test(enable = "avx512fp16")]
25197 unsafe fn test_mm_cvtsd_sh() {
25198 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25199 let b = _mm_setr_pd(1.0, 2.0);
25200 let r = _mm_cvtsd_sh(a, b);
25201 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25202 assert_eq_m128h(r, e);
25203 }
25204
25205 #[simd_test(enable = "avx512fp16")]
25206 unsafe fn test_mm_mask_cvtsd_sh() {
25207 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25208 let b = _mm_setr_pd(1.0, 2.0);
25209 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25210 let r = _mm_mask_cvtsd_sh(src, 0, a, b);
25211 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25212 assert_eq_m128h(r, e);
25213 let r = _mm_mask_cvtsd_sh(src, 1, a, b);
25214 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25215 assert_eq_m128h(r, e);
25216 }
25217
25218 #[simd_test(enable = "avx512fp16")]
25219 unsafe fn test_mm_maskz_cvtsd_sh() {
25220 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25221 let b = _mm_setr_pd(1.0, 2.0);
25222 let r = _mm_maskz_cvtsd_sh(0, a, b);
25223 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25224 assert_eq_m128h(r, e);
25225 let r = _mm_maskz_cvtsd_sh(1, a, b);
25226 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25227 assert_eq_m128h(r, e);
25228 }
25229
25230 #[simd_test(enable = "avx512fp16")]
25231 unsafe fn test_mm_cvt_roundsd_sh() {
25232 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25233 let b = _mm_setr_pd(1.0, 2.0);
25234 let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25235 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25236 assert_eq_m128h(r, e);
25237 }
25238
25239 #[simd_test(enable = "avx512fp16")]
25240 unsafe fn test_mm_mask_cvt_roundsd_sh() {
25241 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25242 let b = _mm_setr_pd(1.0, 2.0);
25243 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25244 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25245 src, 0, a, b,
25246 );
25247 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25248 assert_eq_m128h(r, e);
25249 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25250 src, 1, a, b,
25251 );
25252 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25253 assert_eq_m128h(r, e);
25254 }
25255
25256 #[simd_test(enable = "avx512fp16")]
25257 unsafe fn test_mm_maskz_cvt_roundsd_sh() {
25258 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25259 let b = _mm_setr_pd(1.0, 2.0);
25260 let r =
25261 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25262 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25263 assert_eq_m128h(r, e);
25264 let r =
25265 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25266 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25267 assert_eq_m128h(r, e);
25268 }
25269
25270 #[simd_test(enable = "avx512fp16,avx512vl")]
25271 unsafe fn test_mm_cvtph_epi16() {
25272 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25273 let r = _mm_cvttph_epi16(a);
25274 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25275 assert_eq_m128i(r, e);
25276 }
25277
25278 #[simd_test(enable = "avx512fp16,avx512vl")]
25279 unsafe fn test_mm_mask_cvtph_epi16() {
25280 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25281 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25282 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25283 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25284 assert_eq_m128i(r, e);
25285 }
25286
25287 #[simd_test(enable = "avx512fp16,avx512vl")]
25288 unsafe fn test_mm_maskz_cvtph_epi16() {
25289 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25290 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25291 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25292 assert_eq_m128i(r, e);
25293 }
25294
25295 #[simd_test(enable = "avx512fp16,avx512vl")]
25296 unsafe fn test_mm256_cvtph_epi16() {
25297 let a = _mm256_set_ph(
25298 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25299 );
25300 let r = _mm256_cvttph_epi16(a);
25301 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25302 assert_eq_m256i(r, e);
25303 }
25304
25305 #[simd_test(enable = "avx512fp16,avx512vl")]
25306 unsafe fn test_mm256_mask_cvtph_epi16() {
25307 let a = _mm256_set_ph(
25308 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25309 );
25310 let src = _mm256_set_epi16(
25311 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25312 );
25313 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25314 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25315 assert_eq_m256i(r, e);
25316 }
25317
25318 #[simd_test(enable = "avx512fp16,avx512vl")]
25319 unsafe fn test_mm256_maskz_cvtph_epi16() {
25320 let a = _mm256_set_ph(
25321 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25322 );
25323 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25324 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25325 assert_eq_m256i(r, e);
25326 }
25327
25328 #[simd_test(enable = "avx512fp16")]
25329 unsafe fn test_mm512_cvtph_epi16() {
25330 let a = _mm512_set_ph(
25331 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25332 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25333 31.0, 32.0,
25334 );
25335 let r = _mm512_cvttph_epi16(a);
25336 let e = _mm512_set_epi16(
25337 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25338 25, 26, 27, 28, 29, 30, 31, 32,
25339 );
25340 assert_eq_m512i(r, e);
25341 }
25342
25343 #[simd_test(enable = "avx512fp16")]
25344 unsafe fn test_mm512_mask_cvtph_epi16() {
25345 let a = _mm512_set_ph(
25346 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25347 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25348 31.0, 32.0,
25349 );
25350 let src = _mm512_set_epi16(
25351 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25352 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25353 );
25354 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25355 let e = _mm512_set_epi16(
25356 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25357 24, 34, 26, 36, 28, 38, 30, 40, 32,
25358 );
25359 assert_eq_m512i(r, e);
25360 }
25361
25362 #[simd_test(enable = "avx512fp16")]
25363 unsafe fn test_mm512_maskz_cvtph_epi16() {
25364 let a = _mm512_set_ph(
25365 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25366 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25367 31.0, 32.0,
25368 );
25369 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25370 let e = _mm512_set_epi16(
25371 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25372 0, 28, 0, 30, 0, 32,
25373 );
25374 assert_eq_m512i(r, e);
25375 }
25376
25377 #[simd_test(enable = "avx512fp16")]
25378 unsafe fn test_mm512_cvt_roundph_epi16() {
25379 let a = _mm512_set_ph(
25380 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25381 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25382 31.0, 32.0,
25383 );
25384 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25385 let e = _mm512_set_epi16(
25386 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25387 25, 26, 27, 28, 29, 30, 31, 32,
25388 );
25389 assert_eq_m512i(r, e);
25390 }
25391
25392 #[simd_test(enable = "avx512fp16")]
25393 unsafe fn test_mm512_mask_cvt_roundph_epi16() {
25394 let a = _mm512_set_ph(
25395 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25396 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25397 31.0, 32.0,
25398 );
25399 let src = _mm512_set_epi16(
25400 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25401 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25402 );
25403 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25404 src,
25405 0b01010101010101010101010101010101,
25406 a,
25407 );
25408 let e = _mm512_set_epi16(
25409 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25410 24, 34, 26, 36, 28, 38, 30, 40, 32,
25411 );
25412 assert_eq_m512i(r, e);
25413 }
25414
25415 #[simd_test(enable = "avx512fp16")]
25416 unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
25417 let a = _mm512_set_ph(
25418 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25419 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25420 31.0, 32.0,
25421 );
25422 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25423 0b01010101010101010101010101010101,
25424 a,
25425 );
25426 let e = _mm512_set_epi16(
25427 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25428 0, 28, 0, 30, 0, 32,
25429 );
25430 assert_eq_m512i(r, e);
25431 }
25432
25433 #[simd_test(enable = "avx512fp16,avx512vl")]
25434 unsafe fn test_mm_cvtph_epu16() {
25435 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25436 let r = _mm_cvttph_epu16(a);
25437 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25438 assert_eq_m128i(r, e);
25439 }
25440
25441 #[simd_test(enable = "avx512fp16,avx512vl")]
25442 unsafe fn test_mm_mask_cvtph_epu16() {
25443 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25444 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25445 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25446 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25447 assert_eq_m128i(r, e);
25448 }
25449
25450 #[simd_test(enable = "avx512fp16,avx512vl")]
25451 unsafe fn test_mm_maskz_cvtph_epu16() {
25452 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25453 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25454 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25455 assert_eq_m128i(r, e);
25456 }
25457
25458 #[simd_test(enable = "avx512fp16,avx512vl")]
25459 unsafe fn test_mm256_cvtph_epu16() {
25460 let a = _mm256_set_ph(
25461 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25462 );
25463 let r = _mm256_cvttph_epu16(a);
25464 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25465 assert_eq_m256i(r, e);
25466 }
25467
25468 #[simd_test(enable = "avx512fp16,avx512vl")]
25469 unsafe fn test_mm256_mask_cvtph_epu16() {
25470 let a = _mm256_set_ph(
25471 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25472 );
25473 let src = _mm256_set_epi16(
25474 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25475 );
25476 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25477 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25478 assert_eq_m256i(r, e);
25479 }
25480
25481 #[simd_test(enable = "avx512fp16,avx512vl")]
25482 unsafe fn test_mm256_maskz_cvtph_epu16() {
25483 let a = _mm256_set_ph(
25484 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25485 );
25486 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25487 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25488 assert_eq_m256i(r, e);
25489 }
25490
25491 #[simd_test(enable = "avx512fp16")]
25492 unsafe fn test_mm512_cvtph_epu16() {
25493 let a = _mm512_set_ph(
25494 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25495 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25496 31.0, 32.0,
25497 );
25498 let r = _mm512_cvttph_epu16(a);
25499 let e = _mm512_set_epi16(
25500 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25501 25, 26, 27, 28, 29, 30, 31, 32,
25502 );
25503 assert_eq_m512i(r, e);
25504 }
25505
25506 #[simd_test(enable = "avx512fp16")]
25507 unsafe fn test_mm512_mask_cvtph_epu16() {
25508 let a = _mm512_set_ph(
25509 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25510 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25511 31.0, 32.0,
25512 );
25513 let src = _mm512_set_epi16(
25514 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25515 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25516 );
25517 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25518 let e = _mm512_set_epi16(
25519 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25520 24, 34, 26, 36, 28, 38, 30, 40, 32,
25521 );
25522 assert_eq_m512i(r, e);
25523 }
25524
25525 #[simd_test(enable = "avx512fp16")]
25526 unsafe fn test_mm512_maskz_cvtph_epu16() {
25527 let a = _mm512_set_ph(
25528 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25529 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25530 31.0, 32.0,
25531 );
25532 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25533 let e = _mm512_set_epi16(
25534 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25535 0, 28, 0, 30, 0, 32,
25536 );
25537 assert_eq_m512i(r, e);
25538 }
25539
25540 #[simd_test(enable = "avx512fp16")]
25541 unsafe fn test_mm512_cvt_roundph_epu16() {
25542 let a = _mm512_set_ph(
25543 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25544 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25545 31.0, 32.0,
25546 );
25547 let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25548 let e = _mm512_set_epi16(
25549 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25550 25, 26, 27, 28, 29, 30, 31, 32,
25551 );
25552 assert_eq_m512i(r, e);
25553 }
25554
25555 #[simd_test(enable = "avx512fp16")]
25556 unsafe fn test_mm512_mask_cvt_roundph_epu16() {
25557 let a = _mm512_set_ph(
25558 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25559 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25560 31.0, 32.0,
25561 );
25562 let src = _mm512_set_epi16(
25563 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25564 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25565 );
25566 let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25567 src,
25568 0b01010101010101010101010101010101,
25569 a,
25570 );
25571 let e = _mm512_set_epi16(
25572 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25573 24, 34, 26, 36, 28, 38, 30, 40, 32,
25574 );
25575 assert_eq_m512i(r, e);
25576 }
25577
25578 #[simd_test(enable = "avx512fp16")]
25579 unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
25580 let a = _mm512_set_ph(
25581 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25582 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25583 31.0, 32.0,
25584 );
25585 let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25586 0b01010101010101010101010101010101,
25587 a,
25588 );
25589 let e = _mm512_set_epi16(
25590 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25591 0, 28, 0, 30, 0, 32,
25592 );
25593 assert_eq_m512i(r, e);
25594 }
25595
25596 #[simd_test(enable = "avx512fp16,avx512vl")]
25597 unsafe fn test_mm_cvttph_epi16() {
25598 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25599 let r = _mm_cvttph_epi16(a);
25600 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25601 assert_eq_m128i(r, e);
25602 }
25603
25604 #[simd_test(enable = "avx512fp16,avx512vl")]
25605 unsafe fn test_mm_mask_cvttph_epi16() {
25606 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25607 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25608 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25609 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25610 assert_eq_m128i(r, e);
25611 }
25612
25613 #[simd_test(enable = "avx512fp16,avx512vl")]
25614 unsafe fn test_mm_maskz_cvttph_epi16() {
25615 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25616 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25617 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25618 assert_eq_m128i(r, e);
25619 }
25620
25621 #[simd_test(enable = "avx512fp16,avx512vl")]
25622 unsafe fn test_mm256_cvttph_epi16() {
25623 let a = _mm256_set_ph(
25624 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25625 );
25626 let r = _mm256_cvttph_epi16(a);
25627 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25628 assert_eq_m256i(r, e);
25629 }
25630
25631 #[simd_test(enable = "avx512fp16,avx512vl")]
25632 unsafe fn test_mm256_mask_cvttph_epi16() {
25633 let a = _mm256_set_ph(
25634 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25635 );
25636 let src = _mm256_set_epi16(
25637 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25638 );
25639 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25640 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25641 assert_eq_m256i(r, e);
25642 }
25643
25644 #[simd_test(enable = "avx512fp16,avx512vl")]
25645 unsafe fn test_mm256_maskz_cvttph_epi16() {
25646 let a = _mm256_set_ph(
25647 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25648 );
25649 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25650 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25651 assert_eq_m256i(r, e);
25652 }
25653
25654 #[simd_test(enable = "avx512fp16")]
25655 unsafe fn test_mm512_cvttph_epi16() {
25656 let a = _mm512_set_ph(
25657 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25658 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25659 31.0, 32.0,
25660 );
25661 let r = _mm512_cvttph_epi16(a);
25662 let e = _mm512_set_epi16(
25663 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25664 25, 26, 27, 28, 29, 30, 31, 32,
25665 );
25666 assert_eq_m512i(r, e);
25667 }
25668
25669 #[simd_test(enable = "avx512fp16")]
25670 unsafe fn test_mm512_mask_cvttph_epi16() {
25671 let a = _mm512_set_ph(
25672 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25673 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25674 31.0, 32.0,
25675 );
25676 let src = _mm512_set_epi16(
25677 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25678 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25679 );
25680 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25681 let e = _mm512_set_epi16(
25682 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25683 24, 34, 26, 36, 28, 38, 30, 40, 32,
25684 );
25685 assert_eq_m512i(r, e);
25686 }
25687
25688 #[simd_test(enable = "avx512fp16")]
25689 unsafe fn test_mm512_maskz_cvttph_epi16() {
25690 let a = _mm512_set_ph(
25691 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25692 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25693 31.0, 32.0,
25694 );
25695 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25696 let e = _mm512_set_epi16(
25697 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25698 0, 28, 0, 30, 0, 32,
25699 );
25700 assert_eq_m512i(r, e);
25701 }
25702
25703 #[simd_test(enable = "avx512fp16")]
25704 unsafe fn test_mm512_cvtt_roundph_epi16() {
25705 let a = _mm512_set_ph(
25706 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25707 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25708 31.0, 32.0,
25709 );
25710 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25711 let e = _mm512_set_epi16(
25712 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25713 25, 26, 27, 28, 29, 30, 31, 32,
25714 );
25715 assert_eq_m512i(r, e);
25716 }
25717
25718 #[simd_test(enable = "avx512fp16")]
25719 unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
25720 let a = _mm512_set_ph(
25721 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25722 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25723 31.0, 32.0,
25724 );
25725 let src = _mm512_set_epi16(
25726 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25727 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25728 );
25729 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25730 src,
25731 0b01010101010101010101010101010101,
25732 a,
25733 );
25734 let e = _mm512_set_epi16(
25735 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25736 24, 34, 26, 36, 28, 38, 30, 40, 32,
25737 );
25738 assert_eq_m512i(r, e);
25739 }
25740
25741 #[simd_test(enable = "avx512fp16")]
25742 unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
25743 let a = _mm512_set_ph(
25744 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25745 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25746 31.0, 32.0,
25747 );
25748 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25749 0b01010101010101010101010101010101,
25750 a,
25751 );
25752 let e = _mm512_set_epi16(
25753 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25754 0, 28, 0, 30, 0, 32,
25755 );
25756 assert_eq_m512i(r, e);
25757 }
25758
25759 #[simd_test(enable = "avx512fp16,avx512vl")]
25760 unsafe fn test_mm_cvttph_epu16() {
25761 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25762 let r = _mm_cvttph_epu16(a);
25763 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25764 assert_eq_m128i(r, e);
25765 }
25766
25767 #[simd_test(enable = "avx512fp16,avx512vl")]
25768 unsafe fn test_mm_mask_cvttph_epu16() {
25769 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25770 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25771 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25772 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25773 assert_eq_m128i(r, e);
25774 }
25775
25776 #[simd_test(enable = "avx512fp16,avx512vl")]
25777 unsafe fn test_mm_maskz_cvttph_epu16() {
25778 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25779 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25780 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25781 assert_eq_m128i(r, e);
25782 }
25783
25784 #[simd_test(enable = "avx512fp16,avx512vl")]
25785 unsafe fn test_mm256_cvttph_epu16() {
25786 let a = _mm256_set_ph(
25787 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25788 );
25789 let r = _mm256_cvttph_epu16(a);
25790 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25791 assert_eq_m256i(r, e);
25792 }
25793
25794 #[simd_test(enable = "avx512fp16,avx512vl")]
25795 unsafe fn test_mm256_mask_cvttph_epu16() {
25796 let a = _mm256_set_ph(
25797 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25798 );
25799 let src = _mm256_set_epi16(
25800 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25801 );
25802 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25803 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25804 assert_eq_m256i(r, e);
25805 }
25806
25807 #[simd_test(enable = "avx512fp16,avx512vl")]
25808 unsafe fn test_mm256_maskz_cvttph_epu16() {
25809 let a = _mm256_set_ph(
25810 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25811 );
25812 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25813 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25814 assert_eq_m256i(r, e);
25815 }
25816
25817 #[simd_test(enable = "avx512fp16")]
25818 unsafe fn test_mm512_cvttph_epu16() {
25819 let a = _mm512_set_ph(
25820 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25821 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25822 31.0, 32.0,
25823 );
25824 let r = _mm512_cvttph_epu16(a);
25825 let e = _mm512_set_epi16(
25826 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25827 25, 26, 27, 28, 29, 30, 31, 32,
25828 );
25829 assert_eq_m512i(r, e);
25830 }
25831
25832 #[simd_test(enable = "avx512fp16")]
25833 unsafe fn test_mm512_mask_cvttph_epu16() {
25834 let a = _mm512_set_ph(
25835 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25836 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25837 31.0, 32.0,
25838 );
25839 let src = _mm512_set_epi16(
25840 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25841 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25842 );
25843 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25844 let e = _mm512_set_epi16(
25845 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25846 24, 34, 26, 36, 28, 38, 30, 40, 32,
25847 );
25848 assert_eq_m512i(r, e);
25849 }
25850
25851 #[simd_test(enable = "avx512fp16")]
25852 unsafe fn test_mm512_maskz_cvttph_epu16() {
25853 let a = _mm512_set_ph(
25854 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25855 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25856 31.0, 32.0,
25857 );
25858 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25859 let e = _mm512_set_epi16(
25860 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25861 0, 28, 0, 30, 0, 32,
25862 );
25863 assert_eq_m512i(r, e);
25864 }
25865
25866 #[simd_test(enable = "avx512fp16")]
25867 unsafe fn test_mm512_cvtt_roundph_epu16() {
25868 let a = _mm512_set_ph(
25869 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25870 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25871 31.0, 32.0,
25872 );
25873 let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
25874 let e = _mm512_set_epi16(
25875 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25876 25, 26, 27, 28, 29, 30, 31, 32,
25877 );
25878 assert_eq_m512i(r, e);
25879 }
25880
25881 #[simd_test(enable = "avx512fp16")]
25882 unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
25883 let a = _mm512_set_ph(
25884 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25885 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25886 31.0, 32.0,
25887 );
25888 let src = _mm512_set_epi16(
25889 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25890 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25891 );
25892 let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25893 src,
25894 0b01010101010101010101010101010101,
25895 a,
25896 );
25897 let e = _mm512_set_epi16(
25898 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25899 24, 34, 26, 36, 28, 38, 30, 40, 32,
25900 );
25901 assert_eq_m512i(r, e);
25902 }
25903
25904 #[simd_test(enable = "avx512fp16")]
25905 unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
25906 let a = _mm512_set_ph(
25907 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25908 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25909 31.0, 32.0,
25910 );
25911 let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25912 0b01010101010101010101010101010101,
25913 a,
25914 );
25915 let e = _mm512_set_epi16(
25916 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25917 0, 28, 0, 30, 0, 32,
25918 );
25919 assert_eq_m512i(r, e);
25920 }
25921
25922 #[simd_test(enable = "avx512fp16,avx512vl")]
25923 unsafe fn test_mm_cvtph_epi32() {
25924 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25925 let r = _mm_cvtph_epi32(a);
25926 let e = _mm_set_epi32(1, 2, 3, 4);
25927 assert_eq_m128i(r, e);
25928 }
25929
25930 #[simd_test(enable = "avx512fp16,avx512vl")]
25931 unsafe fn test_mm_mask_cvtph_epi32() {
25932 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25933 let src = _mm_set_epi32(10, 11, 12, 13);
25934 let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
25935 let e = _mm_set_epi32(10, 2, 12, 4);
25936 assert_eq_m128i(r, e);
25937 }
25938
25939 #[simd_test(enable = "avx512fp16,avx512vl")]
25940 unsafe fn test_mm_maskz_cvtph_epi32() {
25941 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25942 let r = _mm_maskz_cvtph_epi32(0b0101, a);
25943 let e = _mm_set_epi32(0, 2, 0, 4);
25944 assert_eq_m128i(r, e);
25945 }
25946
25947 #[simd_test(enable = "avx512fp16,avx512vl")]
25948 unsafe fn test_mm256_cvtph_epi32() {
25949 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25950 let r = _mm256_cvtph_epi32(a);
25951 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
25952 assert_eq_m256i(r, e);
25953 }
25954
25955 #[simd_test(enable = "avx512fp16,avx512vl")]
25956 unsafe fn test_mm256_mask_cvtph_epi32() {
25957 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25958 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
25959 let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
25960 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
25961 assert_eq_m256i(r, e);
25962 }
25963
25964 #[simd_test(enable = "avx512fp16,avx512vl")]
25965 unsafe fn test_mm256_maskz_cvtph_epi32() {
25966 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25967 let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
25968 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
25969 assert_eq_m256i(r, e);
25970 }
25971
25972 #[simd_test(enable = "avx512fp16")]
25973 unsafe fn test_mm512_cvtph_epi32() {
25974 let a = _mm256_set_ph(
25975 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25976 );
25977 let r = _mm512_cvtph_epi32(a);
25978 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25979 assert_eq_m512i(r, e);
25980 }
25981
25982 #[simd_test(enable = "avx512fp16")]
25983 unsafe fn test_mm512_mask_cvtph_epi32() {
25984 let a = _mm256_set_ph(
25985 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25986 );
25987 let src = _mm512_set_epi32(
25988 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25989 );
25990 let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a);
25991 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25992 assert_eq_m512i(r, e);
25993 }
25994
25995 #[simd_test(enable = "avx512fp16")]
25996 unsafe fn test_mm512_maskz_cvtph_epi32() {
25997 let a = _mm256_set_ph(
25998 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25999 );
26000 let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a);
26001 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26002 assert_eq_m512i(r, e);
26003 }
26004
26005 #[simd_test(enable = "avx512fp16")]
26006 unsafe fn test_mm512_cvt_roundph_epi32() {
26007 let a = _mm256_set_ph(
26008 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26009 );
26010 let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26011 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26012 assert_eq_m512i(r, e);
26013 }
26014
26015 #[simd_test(enable = "avx512fp16")]
26016 unsafe fn test_mm512_mask_cvt_roundph_epi32() {
26017 let a = _mm256_set_ph(
26018 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26019 );
26020 let src = _mm512_set_epi32(
26021 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26022 );
26023 let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26024 src,
26025 0b0101010101010101,
26026 a,
26027 );
26028 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26029 assert_eq_m512i(r, e);
26030 }
26031
26032 #[simd_test(enable = "avx512fp16")]
26033 unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
26034 let a = _mm256_set_ph(
26035 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26036 );
26037 let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26038 0b0101010101010101,
26039 a,
26040 );
26041 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26042 assert_eq_m512i(r, e);
26043 }
26044
26045 #[simd_test(enable = "avx512fp16")]
26046 unsafe fn test_mm_cvtsh_i32() {
26047 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26048 let r = _mm_cvtsh_i32(a);
26049 assert_eq!(r, 1);
26050 }
26051
26052 #[simd_test(enable = "avx512fp16")]
26053 unsafe fn test_mm_cvt_roundsh_i32() {
26054 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26055 let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26056 assert_eq!(r, 1);
26057 }
26058
26059 #[simd_test(enable = "avx512fp16,avx512vl")]
26060 unsafe fn test_mm_cvtph_epu32() {
26061 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26062 let r = _mm_cvtph_epu32(a);
26063 let e = _mm_set_epi32(1, 2, 3, 4);
26064 assert_eq_m128i(r, e);
26065 }
26066
26067 #[simd_test(enable = "avx512fp16,avx512vl")]
26068 unsafe fn test_mm_mask_cvtph_epu32() {
26069 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26070 let src = _mm_set_epi32(10, 11, 12, 13);
26071 let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
26072 let e = _mm_set_epi32(10, 2, 12, 4);
26073 assert_eq_m128i(r, e);
26074 }
26075
26076 #[simd_test(enable = "avx512fp16,avx512vl")]
26077 unsafe fn test_mm_maskz_cvtph_epu32() {
26078 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26079 let r = _mm_maskz_cvtph_epu32(0b0101, a);
26080 let e = _mm_set_epi32(0, 2, 0, 4);
26081 assert_eq_m128i(r, e);
26082 }
26083
26084 #[simd_test(enable = "avx512fp16,avx512vl")]
26085 unsafe fn test_mm256_cvtph_epu32() {
26086 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26087 let r = _mm256_cvtph_epu32(a);
26088 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26089 assert_eq_m256i(r, e);
26090 }
26091
26092 #[simd_test(enable = "avx512fp16,avx512vl")]
26093 unsafe fn test_mm256_mask_cvtph_epu32() {
26094 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26095 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26096 let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
26097 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26098 assert_eq_m256i(r, e);
26099 }
26100
26101 #[simd_test(enable = "avx512fp16,avx512vl")]
26102 unsafe fn test_mm256_maskz_cvtph_epu32() {
26103 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26104 let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
26105 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26106 assert_eq_m256i(r, e);
26107 }
26108
26109 #[simd_test(enable = "avx512fp16")]
26110 unsafe fn test_mm512_cvtph_epu32() {
26111 let a = _mm256_set_ph(
26112 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26113 );
26114 let r = _mm512_cvtph_epu32(a);
26115 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26116 assert_eq_m512i(r, e);
26117 }
26118
26119 #[simd_test(enable = "avx512fp16")]
26120 unsafe fn test_mm512_mask_cvtph_epu32() {
26121 let a = _mm256_set_ph(
26122 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26123 );
26124 let src = _mm512_set_epi32(
26125 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26126 );
26127 let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a);
26128 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26129 assert_eq_m512i(r, e);
26130 }
26131
26132 #[simd_test(enable = "avx512fp16")]
26133 unsafe fn test_mm512_maskz_cvtph_epu32() {
26134 let a = _mm256_set_ph(
26135 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26136 );
26137 let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a);
26138 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26139 assert_eq_m512i(r, e);
26140 }
26141
26142 #[simd_test(enable = "avx512fp16")]
26143 unsafe fn test_mm512_cvt_roundph_epu32() {
26144 let a = _mm256_set_ph(
26145 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26146 );
26147 let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26148 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26149 assert_eq_m512i(r, e);
26150 }
26151
26152 #[simd_test(enable = "avx512fp16")]
26153 unsafe fn test_mm512_mask_cvt_roundph_epu32() {
26154 let a = _mm256_set_ph(
26155 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26156 );
26157 let src = _mm512_set_epi32(
26158 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26159 );
26160 let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26161 src,
26162 0b0101010101010101,
26163 a,
26164 );
26165 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26166 assert_eq_m512i(r, e);
26167 }
26168
26169 #[simd_test(enable = "avx512fp16")]
26170 unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
26171 let a = _mm256_set_ph(
26172 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26173 );
26174 let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26175 0b0101010101010101,
26176 a,
26177 );
26178 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26179 assert_eq_m512i(r, e);
26180 }
26181
26182 #[simd_test(enable = "avx512fp16")]
26183 unsafe fn test_mm_cvtsh_u32() {
26184 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26185 let r = _mm_cvtsh_u32(a);
26186 assert_eq!(r, 1);
26187 }
26188
26189 #[simd_test(enable = "avx512fp16")]
26190 unsafe fn test_mm_cvt_roundsh_u32() {
26191 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26192 let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26193 assert_eq!(r, 1);
26194 }
26195
26196 #[simd_test(enable = "avx512fp16,avx512vl")]
26197 unsafe fn test_mm_cvttph_epi32() {
26198 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26199 let r = _mm_cvttph_epi32(a);
26200 let e = _mm_set_epi32(1, 2, 3, 4);
26201 assert_eq_m128i(r, e);
26202 }
26203
26204 #[simd_test(enable = "avx512fp16,avx512vl")]
26205 unsafe fn test_mm_mask_cvttph_epi32() {
26206 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26207 let src = _mm_set_epi32(10, 11, 12, 13);
26208 let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
26209 let e = _mm_set_epi32(10, 2, 12, 4);
26210 assert_eq_m128i(r, e);
26211 }
26212
26213 #[simd_test(enable = "avx512fp16,avx512vl")]
26214 unsafe fn test_mm_maskz_cvttph_epi32() {
26215 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26216 let r = _mm_maskz_cvttph_epi32(0b0101, a);
26217 let e = _mm_set_epi32(0, 2, 0, 4);
26218 assert_eq_m128i(r, e);
26219 }
26220
26221 #[simd_test(enable = "avx512fp16,avx512vl")]
26222 unsafe fn test_mm256_cvttph_epi32() {
26223 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26224 let r = _mm256_cvttph_epi32(a);
26225 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26226 assert_eq_m256i(r, e);
26227 }
26228
26229 #[simd_test(enable = "avx512fp16,avx512vl")]
26230 unsafe fn test_mm256_mask_cvttph_epi32() {
26231 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26232 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26233 let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
26234 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26235 assert_eq_m256i(r, e);
26236 }
26237
26238 #[simd_test(enable = "avx512fp16,avx512vl")]
26239 unsafe fn test_mm256_maskz_cvttph_epi32() {
26240 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26241 let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
26242 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26243 assert_eq_m256i(r, e);
26244 }
26245
26246 #[simd_test(enable = "avx512fp16")]
26247 unsafe fn test_mm512_cvttph_epi32() {
26248 let a = _mm256_set_ph(
26249 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26250 );
26251 let r = _mm512_cvttph_epi32(a);
26252 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26253 assert_eq_m512i(r, e);
26254 }
26255
26256 #[simd_test(enable = "avx512fp16")]
26257 unsafe fn test_mm512_mask_cvttph_epi32() {
26258 let a = _mm256_set_ph(
26259 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26260 );
26261 let src = _mm512_set_epi32(
26262 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26263 );
26264 let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a);
26265 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26266 assert_eq_m512i(r, e);
26267 }
26268
26269 #[simd_test(enable = "avx512fp16")]
26270 unsafe fn test_mm512_maskz_cvttph_epi32() {
26271 let a = _mm256_set_ph(
26272 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26273 );
26274 let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a);
26275 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26276 assert_eq_m512i(r, e);
26277 }
26278
26279 #[simd_test(enable = "avx512fp16")]
26280 unsafe fn test_mm512_cvtt_roundph_epi32() {
26281 let a = _mm256_set_ph(
26282 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26283 );
26284 let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26285 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26286 assert_eq_m512i(r, e);
26287 }
26288
26289 #[simd_test(enable = "avx512fp16")]
26290 unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
26291 let a = _mm256_set_ph(
26292 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26293 );
26294 let src = _mm512_set_epi32(
26295 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26296 );
26297 let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26298 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26299 assert_eq_m512i(r, e);
26300 }
26301
26302 #[simd_test(enable = "avx512fp16")]
26303 unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
26304 let a = _mm256_set_ph(
26305 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26306 );
26307 let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26308 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26309 assert_eq_m512i(r, e);
26310 }
26311
26312 #[simd_test(enable = "avx512fp16")]
26313 unsafe fn test_mm_cvttsh_i32() {
26314 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26315 let r = _mm_cvttsh_i32(a);
26316 assert_eq!(r, 1);
26317 }
26318
26319 #[simd_test(enable = "avx512fp16")]
26320 unsafe fn test_mm_cvtt_roundsh_i32() {
26321 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26322 let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26323 assert_eq!(r, 1);
26324 }
26325
26326 #[simd_test(enable = "avx512fp16,avx512vl")]
26327 unsafe fn test_mm_cvttph_epu32() {
26328 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26329 let r = _mm_cvttph_epu32(a);
26330 let e = _mm_set_epi32(1, 2, 3, 4);
26331 assert_eq_m128i(r, e);
26332 }
26333
26334 #[simd_test(enable = "avx512fp16,avx512vl")]
26335 unsafe fn test_mm_mask_cvttph_epu32() {
26336 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26337 let src = _mm_set_epi32(10, 11, 12, 13);
26338 let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
26339 let e = _mm_set_epi32(10, 2, 12, 4);
26340 assert_eq_m128i(r, e);
26341 }
26342
26343 #[simd_test(enable = "avx512fp16,avx512vl")]
26344 unsafe fn test_mm_maskz_cvttph_epu32() {
26345 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26346 let r = _mm_maskz_cvttph_epu32(0b0101, a);
26347 let e = _mm_set_epi32(0, 2, 0, 4);
26348 assert_eq_m128i(r, e);
26349 }
26350
26351 #[simd_test(enable = "avx512fp16,avx512vl")]
26352 unsafe fn test_mm256_cvttph_epu32() {
26353 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26354 let r = _mm256_cvttph_epu32(a);
26355 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26356 assert_eq_m256i(r, e);
26357 }
26358
26359 #[simd_test(enable = "avx512fp16,avx512vl")]
26360 unsafe fn test_mm256_mask_cvttph_epu32() {
26361 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26362 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26363 let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
26364 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26365 assert_eq_m256i(r, e);
26366 }
26367
26368 #[simd_test(enable = "avx512fp16,avx512vl")]
26369 unsafe fn test_mm256_maskz_cvttph_epu32() {
26370 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26371 let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
26372 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26373 assert_eq_m256i(r, e);
26374 }
26375
26376 #[simd_test(enable = "avx512fp16")]
26377 unsafe fn test_mm512_cvttph_epu32() {
26378 let a = _mm256_set_ph(
26379 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26380 );
26381 let r = _mm512_cvttph_epu32(a);
26382 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26383 assert_eq_m512i(r, e);
26384 }
26385
26386 #[simd_test(enable = "avx512fp16")]
26387 unsafe fn test_mm512_mask_cvttph_epu32() {
26388 let a = _mm256_set_ph(
26389 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26390 );
26391 let src = _mm512_set_epi32(
26392 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26393 );
26394 let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a);
26395 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26396 assert_eq_m512i(r, e);
26397 }
26398
26399 #[simd_test(enable = "avx512fp16")]
26400 unsafe fn test_mm512_maskz_cvttph_epu32() {
26401 let a = _mm256_set_ph(
26402 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26403 );
26404 let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a);
26405 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26406 assert_eq_m512i(r, e);
26407 }
26408
26409 #[simd_test(enable = "avx512fp16")]
26410 unsafe fn test_mm512_cvtt_roundph_epu32() {
26411 let a = _mm256_set_ph(
26412 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26413 );
26414 let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26415 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26416 assert_eq_m512i(r, e);
26417 }
26418
26419 #[simd_test(enable = "avx512fp16")]
26420 unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
26421 let a = _mm256_set_ph(
26422 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26423 );
26424 let src = _mm512_set_epi32(
26425 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26426 );
26427 let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26428 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26429 assert_eq_m512i(r, e);
26430 }
26431
26432 #[simd_test(enable = "avx512fp16")]
26433 unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
26434 let a = _mm256_set_ph(
26435 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26436 );
26437 let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26438 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26439 assert_eq_m512i(r, e);
26440 }
26441
26442 #[simd_test(enable = "avx512fp16")]
26443 unsafe fn test_mm_cvttsh_u32() {
26444 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26445 let r = _mm_cvttsh_u32(a);
26446 assert_eq!(r, 1);
26447 }
26448
26449 #[simd_test(enable = "avx512fp16")]
26450 unsafe fn test_mm_cvtt_roundsh_u32() {
26451 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26452 let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26453 assert_eq!(r, 1);
26454 }
26455
26456 #[simd_test(enable = "avx512fp16,avx512vl")]
26457 unsafe fn test_mm_cvtph_epi64() {
26458 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26459 let r = _mm_cvtph_epi64(a);
26460 let e = _mm_set_epi64x(1, 2);
26461 assert_eq_m128i(r, e);
26462 }
26463
26464 #[simd_test(enable = "avx512fp16,avx512vl")]
26465 unsafe fn test_mm_mask_cvtph_epi64() {
26466 let src = _mm_set_epi64x(3, 4);
26467 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26468 let r = _mm_mask_cvtph_epi64(src, 0b01, a);
26469 let e = _mm_set_epi64x(3, 2);
26470 assert_eq_m128i(r, e);
26471 }
26472
26473 #[simd_test(enable = "avx512fp16,avx512vl")]
26474 unsafe fn test_mm_maskz_cvtph_epi64() {
26475 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26476 let r = _mm_maskz_cvtph_epi64(0b01, a);
26477 let e = _mm_set_epi64x(0, 2);
26478 assert_eq_m128i(r, e);
26479 }
26480
26481 #[simd_test(enable = "avx512fp16,avx512vl")]
26482 unsafe fn test_mm256_cvtph_epi64() {
26483 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26484 let r = _mm256_cvtph_epi64(a);
26485 let e = _mm256_set_epi64x(1, 2, 3, 4);
26486 assert_eq_m256i(r, e);
26487 }
26488
26489 #[simd_test(enable = "avx512fp16,avx512vl")]
26490 unsafe fn test_mm256_mask_cvtph_epi64() {
26491 let src = _mm256_set_epi64x(5, 6, 7, 8);
26492 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26493 let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
26494 let e = _mm256_set_epi64x(5, 2, 7, 4);
26495 assert_eq_m256i(r, e);
26496 }
26497
26498 #[simd_test(enable = "avx512fp16,avx512vl")]
26499 unsafe fn test_mm256_maskz_cvtph_epi64() {
26500 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26501 let r = _mm256_maskz_cvtph_epi64(0b0101, a);
26502 let e = _mm256_set_epi64x(0, 2, 0, 4);
26503 assert_eq_m256i(r, e);
26504 }
26505
26506 #[simd_test(enable = "avx512fp16")]
26507 unsafe fn test_mm512_cvtph_epi64() {
26508 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26509 let r = _mm512_cvtph_epi64(a);
26510 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26511 assert_eq_m512i(r, e);
26512 }
26513
26514 #[simd_test(enable = "avx512fp16")]
26515 unsafe fn test_mm512_mask_cvtph_epi64() {
26516 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26517 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26518 let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
26519 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26520 assert_eq_m512i(r, e);
26521 }
26522
26523 #[simd_test(enable = "avx512fp16")]
26524 unsafe fn test_mm512_maskz_cvtph_epi64() {
26525 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26526 let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
26527 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26528 assert_eq_m512i(r, e);
26529 }
26530
26531 #[simd_test(enable = "avx512fp16")]
26532 unsafe fn test_mm512_cvt_roundph_epi64() {
26533 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26534 let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26535 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26536 assert_eq_m512i(r, e);
26537 }
26538
26539 #[simd_test(enable = "avx512fp16")]
26540 unsafe fn test_mm512_mask_cvt_roundph_epi64() {
26541 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26542 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26543 let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26544 src, 0b01010101, a,
26545 );
26546 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26547 assert_eq_m512i(r, e);
26548 }
26549
26550 #[simd_test(enable = "avx512fp16")]
26551 unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
26552 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26553 let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26554 0b01010101, a,
26555 );
26556 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26557 assert_eq_m512i(r, e);
26558 }
26559
26560 #[simd_test(enable = "avx512fp16,avx512vl")]
26561 unsafe fn test_mm_cvtph_epu64() {
26562 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26563 let r = _mm_cvtph_epu64(a);
26564 let e = _mm_set_epi64x(1, 2);
26565 assert_eq_m128i(r, e);
26566 }
26567
26568 #[simd_test(enable = "avx512fp16,avx512vl")]
26569 unsafe fn test_mm_mask_cvtph_epu64() {
26570 let src = _mm_set_epi64x(3, 4);
26571 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26572 let r = _mm_mask_cvtph_epu64(src, 0b01, a);
26573 let e = _mm_set_epi64x(3, 2);
26574 assert_eq_m128i(r, e);
26575 }
26576
26577 #[simd_test(enable = "avx512fp16,avx512vl")]
26578 unsafe fn test_mm_maskz_cvtph_epu64() {
26579 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26580 let r = _mm_maskz_cvtph_epu64(0b01, a);
26581 let e = _mm_set_epi64x(0, 2);
26582 assert_eq_m128i(r, e);
26583 }
26584
26585 #[simd_test(enable = "avx512fp16,avx512vl")]
26586 unsafe fn test_mm256_cvtph_epu64() {
26587 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26588 let r = _mm256_cvtph_epu64(a);
26589 let e = _mm256_set_epi64x(1, 2, 3, 4);
26590 assert_eq_m256i(r, e);
26591 }
26592
26593 #[simd_test(enable = "avx512fp16,avx512vl")]
26594 unsafe fn test_mm256_mask_cvtph_epu64() {
26595 let src = _mm256_set_epi64x(5, 6, 7, 8);
26596 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26597 let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
26598 let e = _mm256_set_epi64x(5, 2, 7, 4);
26599 assert_eq_m256i(r, e);
26600 }
26601
26602 #[simd_test(enable = "avx512fp16,avx512vl")]
26603 unsafe fn test_mm256_maskz_cvtph_epu64() {
26604 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26605 let r = _mm256_maskz_cvtph_epu64(0b0101, a);
26606 let e = _mm256_set_epi64x(0, 2, 0, 4);
26607 assert_eq_m256i(r, e);
26608 }
26609
26610 #[simd_test(enable = "avx512fp16")]
26611 unsafe fn test_mm512_cvtph_epu64() {
26612 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26613 let r = _mm512_cvtph_epu64(a);
26614 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26615 assert_eq_m512i(r, e);
26616 }
26617
26618 #[simd_test(enable = "avx512fp16")]
26619 unsafe fn test_mm512_mask_cvtph_epu64() {
26620 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26621 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26622 let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
26623 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26624 assert_eq_m512i(r, e);
26625 }
26626
26627 #[simd_test(enable = "avx512fp16")]
26628 unsafe fn test_mm512_maskz_cvtph_epu64() {
26629 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26630 let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
26631 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26632 assert_eq_m512i(r, e);
26633 }
26634
26635 #[simd_test(enable = "avx512fp16")]
26636 unsafe fn test_mm512_cvt_roundph_epu64() {
26637 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26638 let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26639 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26640 assert_eq_m512i(r, e);
26641 }
26642
26643 #[simd_test(enable = "avx512fp16")]
26644 unsafe fn test_mm512_mask_cvt_roundph_epu64() {
26645 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26646 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26647 let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26648 src, 0b01010101, a,
26649 );
26650 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26651 assert_eq_m512i(r, e);
26652 }
26653
26654 #[simd_test(enable = "avx512fp16")]
26655 unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
26656 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26657 let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26658 0b01010101, a,
26659 );
26660 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26661 assert_eq_m512i(r, e);
26662 }
26663
26664 #[simd_test(enable = "avx512fp16,avx512vl")]
26665 unsafe fn test_mm_cvttph_epi64() {
26666 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26667 let r = _mm_cvttph_epi64(a);
26668 let e = _mm_set_epi64x(1, 2);
26669 assert_eq_m128i(r, e);
26670 }
26671
26672 #[simd_test(enable = "avx512fp16,avx512vl")]
26673 unsafe fn test_mm_mask_cvttph_epi64() {
26674 let src = _mm_set_epi64x(3, 4);
26675 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26676 let r = _mm_mask_cvttph_epi64(src, 0b01, a);
26677 let e = _mm_set_epi64x(3, 2);
26678 assert_eq_m128i(r, e);
26679 }
26680
26681 #[simd_test(enable = "avx512fp16,avx512vl")]
26682 unsafe fn test_mm_maskz_cvttph_epi64() {
26683 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26684 let r = _mm_maskz_cvttph_epi64(0b01, a);
26685 let e = _mm_set_epi64x(0, 2);
26686 assert_eq_m128i(r, e);
26687 }
26688
26689 #[simd_test(enable = "avx512fp16,avx512vl")]
26690 unsafe fn test_mm256_cvttph_epi64() {
26691 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26692 let r = _mm256_cvttph_epi64(a);
26693 let e = _mm256_set_epi64x(1, 2, 3, 4);
26694 assert_eq_m256i(r, e);
26695 }
26696
26697 #[simd_test(enable = "avx512fp16,avx512vl")]
26698 unsafe fn test_mm256_mask_cvttph_epi64() {
26699 let src = _mm256_set_epi64x(5, 6, 7, 8);
26700 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26701 let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
26702 let e = _mm256_set_epi64x(5, 2, 7, 4);
26703 assert_eq_m256i(r, e);
26704 }
26705
26706 #[simd_test(enable = "avx512fp16,avx512vl")]
26707 unsafe fn test_mm256_maskz_cvttph_epi64() {
26708 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26709 let r = _mm256_maskz_cvttph_epi64(0b0101, a);
26710 let e = _mm256_set_epi64x(0, 2, 0, 4);
26711 assert_eq_m256i(r, e);
26712 }
26713
26714 #[simd_test(enable = "avx512fp16")]
26715 unsafe fn test_mm512_cvttph_epi64() {
26716 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26717 let r = _mm512_cvttph_epi64(a);
26718 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26719 assert_eq_m512i(r, e);
26720 }
26721
26722 #[simd_test(enable = "avx512fp16")]
26723 unsafe fn test_mm512_mask_cvttph_epi64() {
26724 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26725 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26726 let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
26727 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26728 assert_eq_m512i(r, e);
26729 }
26730
26731 #[simd_test(enable = "avx512fp16")]
26732 unsafe fn test_mm512_maskz_cvttph_epi64() {
26733 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26734 let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
26735 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26736 assert_eq_m512i(r, e);
26737 }
26738
26739 #[simd_test(enable = "avx512fp16")]
26740 unsafe fn test_mm512_cvtt_roundph_epi64() {
26741 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26742 let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
26743 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26744 assert_eq_m512i(r, e);
26745 }
26746
26747 #[simd_test(enable = "avx512fp16")]
26748 unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
26749 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26750 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26751 let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26752 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26753 assert_eq_m512i(r, e);
26754 }
26755
26756 #[simd_test(enable = "avx512fp16")]
26757 unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
26758 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26759 let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26760 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26761 assert_eq_m512i(r, e);
26762 }
26763
26764 #[simd_test(enable = "avx512fp16,avx512vl")]
26765 unsafe fn test_mm_cvttph_epu64() {
26766 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26767 let r = _mm_cvttph_epu64(a);
26768 let e = _mm_set_epi64x(1, 2);
26769 assert_eq_m128i(r, e);
26770 }
26771
26772 #[simd_test(enable = "avx512fp16,avx512vl")]
26773 unsafe fn test_mm_mask_cvttph_epu64() {
26774 let src = _mm_set_epi64x(3, 4);
26775 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26776 let r = _mm_mask_cvttph_epu64(src, 0b01, a);
26777 let e = _mm_set_epi64x(3, 2);
26778 assert_eq_m128i(r, e);
26779 }
26780
26781 #[simd_test(enable = "avx512fp16,avx512vl")]
26782 unsafe fn test_mm_maskz_cvttph_epu64() {
26783 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26784 let r = _mm_maskz_cvttph_epu64(0b01, a);
26785 let e = _mm_set_epi64x(0, 2);
26786 assert_eq_m128i(r, e);
26787 }
26788
26789 #[simd_test(enable = "avx512fp16,avx512vl")]
26790 unsafe fn test_mm256_cvttph_epu64() {
26791 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26792 let r = _mm256_cvttph_epu64(a);
26793 let e = _mm256_set_epi64x(1, 2, 3, 4);
26794 assert_eq_m256i(r, e);
26795 }
26796
26797 #[simd_test(enable = "avx512fp16,avx512vl")]
26798 unsafe fn test_mm256_mask_cvttph_epu64() {
26799 let src = _mm256_set_epi64x(5, 6, 7, 8);
26800 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26801 let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
26802 let e = _mm256_set_epi64x(5, 2, 7, 4);
26803 assert_eq_m256i(r, e);
26804 }
26805
26806 #[simd_test(enable = "avx512fp16,avx512vl")]
26807 unsafe fn test_mm256_maskz_cvttph_epu64() {
26808 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26809 let r = _mm256_maskz_cvttph_epu64(0b0101, a);
26810 let e = _mm256_set_epi64x(0, 2, 0, 4);
26811 assert_eq_m256i(r, e);
26812 }
26813
26814 #[simd_test(enable = "avx512fp16")]
26815 unsafe fn test_mm512_cvttph_epu64() {
26816 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26817 let r = _mm512_cvttph_epu64(a);
26818 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26819 assert_eq_m512i(r, e);
26820 }
26821
26822 #[simd_test(enable = "avx512fp16")]
26823 unsafe fn test_mm512_mask_cvttph_epu64() {
26824 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26825 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26826 let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
26827 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26828 assert_eq_m512i(r, e);
26829 }
26830
26831 #[simd_test(enable = "avx512fp16")]
26832 unsafe fn test_mm512_maskz_cvttph_epu64() {
26833 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26834 let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
26835 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26836 assert_eq_m512i(r, e);
26837 }
26838
26839 #[simd_test(enable = "avx512fp16")]
26840 unsafe fn test_mm512_cvtt_roundph_epu64() {
26841 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26842 let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
26843 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26844 assert_eq_m512i(r, e);
26845 }
26846
26847 #[simd_test(enable = "avx512fp16")]
26848 unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
26849 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26850 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26851 let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26852 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26853 assert_eq_m512i(r, e);
26854 }
26855
26856 #[simd_test(enable = "avx512fp16")]
26857 unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
26858 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26859 let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26860 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26861 assert_eq_m512i(r, e);
26862 }
26863
26864 #[simd_test(enable = "avx512fp16,avx512vl")]
26865 unsafe fn test_mm_cvtxph_ps() {
26866 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26867 let r = _mm_cvtxph_ps(a);
26868 let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
26869 assert_eq_m128(r, e);
26870 }
26871
26872 #[simd_test(enable = "avx512fp16,avx512vl")]
26873 unsafe fn test_mm_mask_cvtxph_ps() {
26874 let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
26875 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26876 let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
26877 let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0);
26878 assert_eq_m128(r, e);
26879 }
26880
26881 #[simd_test(enable = "avx512fp16,avx512vl")]
26882 unsafe fn test_mm_maskz_cvtxph_ps() {
26883 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26884 let r = _mm_maskz_cvtxph_ps(0b0101, a);
26885 let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
26886 assert_eq_m128(r, e);
26887 }
26888
26889 #[simd_test(enable = "avx512fp16,avx512vl")]
26890 unsafe fn test_mm256_cvtxph_ps() {
26891 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26892 let r = _mm256_cvtxph_ps(a);
26893 let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26894 assert_eq_m256(r, e);
26895 }
26896
26897 #[simd_test(enable = "avx512fp16,avx512vl")]
26898 unsafe fn test_mm256_mask_cvtxph_ps() {
26899 let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
26900 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26901 let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
26902 let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
26903 assert_eq_m256(r, e);
26904 }
26905
26906 #[simd_test(enable = "avx512fp16,avx512vl")]
26907 unsafe fn test_mm256_maskz_cvtxph_ps() {
26908 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26909 let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
26910 let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
26911 assert_eq_m256(r, e);
26912 }
26913
26914 #[simd_test(enable = "avx512fp16")]
26915 unsafe fn test_mm512_cvtxph_ps() {
26916 let a = _mm256_set_ph(
26917 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26918 );
26919 let r = _mm512_cvtxph_ps(a);
26920 let e = _mm512_set_ps(
26921 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26922 );
26923 assert_eq_m512(r, e);
26924 }
26925
26926 #[simd_test(enable = "avx512fp16")]
26927 unsafe fn test_mm512_mask_cvtxph_ps() {
26928 let src = _mm512_set_ps(
26929 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
26930 24.0, 25.0,
26931 );
26932 let a = _mm256_set_ph(
26933 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26934 );
26935 let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a);
26936 let e = _mm512_set_ps(
26937 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
26938 16.0,
26939 );
26940 assert_eq_m512(r, e);
26941 }
26942
26943 #[simd_test(enable = "avx512fp16")]
26944 unsafe fn test_mm512_maskz_cvtxph_ps() {
26945 let a = _mm256_set_ph(
26946 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26947 );
26948 let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a);
26949 let e = _mm512_set_ps(
26950 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
26951 );
26952 assert_eq_m512(r, e);
26953 }
26954
26955 #[simd_test(enable = "avx512fp16")]
26956 unsafe fn test_mm512_cvtx_roundph_ps() {
26957 let a = _mm256_set_ph(
26958 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26959 );
26960 let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
26961 let e = _mm512_set_ps(
26962 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26963 );
26964 assert_eq_m512(r, e);
26965 }
26966
26967 #[simd_test(enable = "avx512fp16")]
26968 unsafe fn test_mm512_mask_cvtx_roundph_ps() {
26969 let src = _mm512_set_ps(
26970 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
26971 24.0, 25.0,
26972 );
26973 let a = _mm256_set_ph(
26974 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26975 );
26976 let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26977 let e = _mm512_set_ps(
26978 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
26979 16.0,
26980 );
26981 assert_eq_m512(r, e);
26982 }
26983
26984 #[simd_test(enable = "avx512fp16")]
26985 unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
26986 let a = _mm256_set_ph(
26987 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26988 );
26989 let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26990 let e = _mm512_set_ps(
26991 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
26992 );
26993 assert_eq_m512(r, e);
26994 }
26995
26996 #[simd_test(enable = "avx512fp16")]
26997 unsafe fn test_mm_cvtsh_ss() {
26998 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
26999 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27000 let r = _mm_cvtsh_ss(a, b);
27001 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27002 assert_eq_m128(r, e);
27003 }
27004
27005 #[simd_test(enable = "avx512fp16")]
27006 unsafe fn test_mm_mask_cvtsh_ss() {
27007 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27008 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27009 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27010 let r = _mm_mask_cvtsh_ss(src, 0, a, b);
27011 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27012 assert_eq_m128(r, e);
27013 let r = _mm_mask_cvtsh_ss(src, 1, a, b);
27014 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27015 assert_eq_m128(r, e);
27016 }
27017
27018 #[simd_test(enable = "avx512fp16")]
27019 unsafe fn test_mm_maskz_cvtsh_ss() {
27020 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27021 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27022 let r = _mm_maskz_cvtsh_ss(0, a, b);
27023 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27024 assert_eq_m128(r, e);
27025 let r = _mm_maskz_cvtsh_ss(1, a, b);
27026 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27027 assert_eq_m128(r, e);
27028 }
27029
27030 #[simd_test(enable = "avx512fp16")]
27031 unsafe fn test_mm_cvt_roundsh_ss() {
27032 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27033 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27034 let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27035 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27036 assert_eq_m128(r, e);
27037 }
27038
27039 #[simd_test(enable = "avx512fp16")]
27040 unsafe fn test_mm_mask_cvt_roundsh_ss() {
27041 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27042 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27043 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27044 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27045 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27046 assert_eq_m128(r, e);
27047 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27048 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27049 assert_eq_m128(r, e);
27050 }
27051
27052 #[simd_test(enable = "avx512fp16")]
27053 unsafe fn test_mm_maskz_cvt_roundsh_ss() {
27054 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27055 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27056 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
27057 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27058 assert_eq_m128(r, e);
27059 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b);
27060 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27061 assert_eq_m128(r, e);
27062 }
27063
27064 #[simd_test(enable = "avx512fp16,avx512vl")]
27065 unsafe fn test_mm_cvtph_pd() {
27066 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27067 let r = _mm_cvtph_pd(a);
27068 let e = _mm_set_pd(1.0, 2.0);
27069 assert_eq_m128d(r, e);
27070 }
27071
27072 #[simd_test(enable = "avx512fp16,avx512vl")]
27073 unsafe fn test_mm_mask_cvtph_pd() {
27074 let src = _mm_set_pd(10.0, 11.0);
27075 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27076 let r = _mm_mask_cvtph_pd(src, 0b01, a);
27077 let e = _mm_set_pd(10.0, 2.0);
27078 assert_eq_m128d(r, e);
27079 }
27080
27081 #[simd_test(enable = "avx512fp16,avx512vl")]
27082 unsafe fn test_mm_maskz_cvtph_pd() {
27083 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27084 let r = _mm_maskz_cvtph_pd(0b01, a);
27085 let e = _mm_set_pd(0.0, 2.0);
27086 assert_eq_m128d(r, e);
27087 }
27088
27089 #[simd_test(enable = "avx512fp16,avx512vl")]
27090 unsafe fn test_mm256_cvtph_pd() {
27091 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27092 let r = _mm256_cvtph_pd(a);
27093 let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
27094 assert_eq_m256d(r, e);
27095 }
27096
27097 #[simd_test(enable = "avx512fp16,avx512vl")]
27098 unsafe fn test_mm256_mask_cvtph_pd() {
27099 let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
27100 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27101 let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
27102 let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0);
27103 assert_eq_m256d(r, e);
27104 }
27105
27106 #[simd_test(enable = "avx512fp16,avx512vl")]
27107 unsafe fn test_mm256_maskz_cvtph_pd() {
27108 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27109 let r = _mm256_maskz_cvtph_pd(0b0101, a);
27110 let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
27111 assert_eq_m256d(r, e);
27112 }
27113
27114 #[simd_test(enable = "avx512fp16")]
27115 unsafe fn test_mm512_cvtph_pd() {
27116 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27117 let r = _mm512_cvtph_pd(a);
27118 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27119 assert_eq_m512d(r, e);
27120 }
27121
27122 #[simd_test(enable = "avx512fp16")]
27123 unsafe fn test_mm512_mask_cvtph_pd() {
27124 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27125 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27126 let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
27127 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27128 assert_eq_m512d(r, e);
27129 }
27130
27131 #[simd_test(enable = "avx512fp16")]
27132 unsafe fn test_mm512_maskz_cvtph_pd() {
27133 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27134 let r = _mm512_maskz_cvtph_pd(0b01010101, a);
27135 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27136 assert_eq_m512d(r, e);
27137 }
27138
27139 #[simd_test(enable = "avx512fp16")]
27140 unsafe fn test_mm512_cvt_roundph_pd() {
27141 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27142 let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27143 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27144 assert_eq_m512d(r, e);
27145 }
27146
27147 #[simd_test(enable = "avx512fp16")]
27148 unsafe fn test_mm512_mask_cvt_roundph_pd() {
27149 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27150 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27151 let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27152 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27153 assert_eq_m512d(r, e);
27154 }
27155
27156 #[simd_test(enable = "avx512fp16")]
27157 unsafe fn test_mm512_maskz_cvt_roundph_pd() {
27158 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27159 let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
27160 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27161 assert_eq_m512d(r, e);
27162 }
27163
27164 #[simd_test(enable = "avx512fp16")]
27165 unsafe fn test_mm_cvtsh_sd() {
27166 let a = _mm_setr_pd(2.0, 20.0);
27167 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27168 let r = _mm_cvtsh_sd(a, b);
27169 let e = _mm_setr_pd(1.0, 20.0);
27170 assert_eq_m128d(r, e);
27171 }
27172
27173 #[simd_test(enable = "avx512fp16")]
27174 unsafe fn test_mm_mask_cvtsh_sd() {
27175 let src = _mm_setr_pd(3.0, 11.0);
27176 let a = _mm_setr_pd(2.0, 20.0);
27177 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27178 let r = _mm_mask_cvtsh_sd(src, 0, a, b);
27179 let e = _mm_setr_pd(3.0, 20.0);
27180 assert_eq_m128d(r, e);
27181 let r = _mm_mask_cvtsh_sd(src, 1, a, b);
27182 let e = _mm_setr_pd(1.0, 20.0);
27183 assert_eq_m128d(r, e);
27184 }
27185
27186 #[simd_test(enable = "avx512fp16")]
27187 unsafe fn test_mm_maskz_cvtsh_sd() {
27188 let a = _mm_setr_pd(2.0, 20.0);
27189 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27190 let r = _mm_maskz_cvtsh_sd(0, a, b);
27191 let e = _mm_setr_pd(0.0, 20.0);
27192 assert_eq_m128d(r, e);
27193 let r = _mm_maskz_cvtsh_sd(1, a, b);
27194 let e = _mm_setr_pd(1.0, 20.0);
27195 assert_eq_m128d(r, e);
27196 }
27197
27198 #[simd_test(enable = "avx512fp16")]
27199 unsafe fn test_mm_cvt_roundsh_sd() {
27200 let a = _mm_setr_pd(2.0, 20.0);
27201 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27202 let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27203 let e = _mm_setr_pd(1.0, 20.0);
27204 assert_eq_m128d(r, e);
27205 }
27206
27207 #[simd_test(enable = "avx512fp16")]
27208 unsafe fn test_mm_mask_cvt_roundsh_sd() {
27209 let src = _mm_setr_pd(3.0, 11.0);
27210 let a = _mm_setr_pd(2.0, 20.0);
27211 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27212 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27213 let e = _mm_setr_pd(3.0, 20.0);
27214 assert_eq_m128d(r, e);
27215 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27216 let e = _mm_setr_pd(1.0, 20.0);
27217 assert_eq_m128d(r, e);
27218 }
27219
27220 #[simd_test(enable = "avx512fp16")]
27221 unsafe fn test_mm_maskz_cvt_roundsh_sd() {
27222 let a = _mm_setr_pd(2.0, 20.0);
27223 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27224 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
27225 let e = _mm_setr_pd(0.0, 20.0);
27226 assert_eq_m128d(r, e);
27227 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b);
27228 let e = _mm_setr_pd(1.0, 20.0);
27229 assert_eq_m128d(r, e);
27230 }
27231
27232 #[simd_test(enable = "avx512fp16")]
27233 unsafe fn test_mm_cvtsh_h() {
27234 let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0);
27235 let r = _mm_cvtsh_h(a);
27236 assert_eq!(r, 1.0);
27237 }
27238
27239 #[simd_test(enable = "avx512fp16")]
27240 unsafe fn test_mm256_cvtsh_h() {
27241 let a = _mm256_setr_ph(
27242 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27243 );
27244 let r = _mm256_cvtsh_h(a);
27245 assert_eq!(r, 1.0);
27246 }
27247
27248 #[simd_test(enable = "avx512fp16")]
27249 unsafe fn test_mm512_cvtsh_h() {
27250 let a = _mm512_setr_ph(
27251 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27252 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
27253 31.0, 32.0,
27254 );
27255 let r = _mm512_cvtsh_h(a);
27256 assert_eq!(r, 1.0);
27257 }
27258
27259 #[simd_test(enable = "avx512fp16")]
27260 unsafe fn test_mm_cvtsi128_si16() {
27261 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
27262 let r = _mm_cvtsi128_si16(a);
27263 assert_eq!(r, 1);
27264 }
27265
27266 #[simd_test(enable = "avx512fp16")]
27267 unsafe fn test_mm_cvtsi16_si128() {
27268 let a = 1;
27269 let r = _mm_cvtsi16_si128(a);
27270 let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
27271 assert_eq_m128i(r, e);
27272 }
27273}
27274