1use crate::arch::asm;
2use crate::core_arch::{simd::*, x86::*};
3use crate::intrinsics::{fmaf16, simd::*};
4use crate::ptr;
5
6/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7///
8/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9#[inline]
10#[target_feature(enable = "avx512fp16")]
11#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12pub fn _mm_set_ph(
13 e7: f16,
14 e6: f16,
15 e5: f16,
16 e4: f16,
17 e3: f16,
18 e2: f16,
19 e1: f16,
20 e0: f16,
21) -> __m128h {
22 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
23}
24
25/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
28#[inline]
29#[target_feature(enable = "avx512fp16")]
30#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
31pub fn _mm256_set_ph(
32 e15: f16,
33 e14: f16,
34 e13: f16,
35 e12: f16,
36 e11: f16,
37 e10: f16,
38 e9: f16,
39 e8: f16,
40 e7: f16,
41 e6: f16,
42 e5: f16,
43 e4: f16,
44 e3: f16,
45 e2: f16,
46 e1: f16,
47 e0: f16,
48) -> __m256h {
49 __m256h([
50 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
51 ])
52}
53
54/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
55///
56/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
57#[inline]
58#[target_feature(enable = "avx512fp16")]
59#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
60pub fn _mm512_set_ph(
61 e31: f16,
62 e30: f16,
63 e29: f16,
64 e28: f16,
65 e27: f16,
66 e26: f16,
67 e25: f16,
68 e24: f16,
69 e23: f16,
70 e22: f16,
71 e21: f16,
72 e20: f16,
73 e19: f16,
74 e18: f16,
75 e17: f16,
76 e16: f16,
77 e15: f16,
78 e14: f16,
79 e13: f16,
80 e12: f16,
81 e11: f16,
82 e10: f16,
83 e9: f16,
84 e8: f16,
85 e7: f16,
86 e6: f16,
87 e5: f16,
88 e4: f16,
89 e3: f16,
90 e2: f16,
91 e1: f16,
92 e0: f16,
93) -> __m512h {
94 __m512h([
95 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
96 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
97 ])
98}
99
100/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
101/// the upper 7 elements.
102///
103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
104#[inline]
105#[target_feature(enable = "avx512fp16")]
106#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
107pub fn _mm_set_sh(a: f16) -> __m128h {
108 __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
109}
110
111/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
112///
113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
114#[inline]
115#[target_feature(enable = "avx512fp16")]
116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
117pub fn _mm_set1_ph(a: f16) -> __m128h {
118 unsafe { transmute(src:f16x8::splat(a)) }
119}
120
121/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
122///
123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
124#[inline]
125#[target_feature(enable = "avx512fp16")]
126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127pub fn _mm256_set1_ph(a: f16) -> __m256h {
128 unsafe { transmute(src:f16x16::splat(a)) }
129}
130
131/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
132///
133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
134#[inline]
135#[target_feature(enable = "avx512fp16")]
136#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
137pub fn _mm512_set1_ph(a: f16) -> __m512h {
138 unsafe { transmute(src:f16x32::splat(a)) }
139}
140
141/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
142///
143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
144#[inline]
145#[target_feature(enable = "avx512fp16")]
146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
147pub fn _mm_setr_ph(
148 e0: f16,
149 e1: f16,
150 e2: f16,
151 e3: f16,
152 e4: f16,
153 e5: f16,
154 e6: f16,
155 e7: f16,
156) -> __m128h {
157 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
158}
159
160/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
161///
162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
163#[inline]
164#[target_feature(enable = "avx512fp16")]
165#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
166pub fn _mm256_setr_ph(
167 e0: f16,
168 e1: f16,
169 e2: f16,
170 e3: f16,
171 e4: f16,
172 e5: f16,
173 e6: f16,
174 e7: f16,
175 e8: f16,
176 e9: f16,
177 e10: f16,
178 e11: f16,
179 e12: f16,
180 e13: f16,
181 e14: f16,
182 e15: f16,
183) -> __m256h {
184 __m256h([
185 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
186 ])
187}
188
189/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
190///
191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
192#[inline]
193#[target_feature(enable = "avx512fp16")]
194#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
195pub fn _mm512_setr_ph(
196 e0: f16,
197 e1: f16,
198 e2: f16,
199 e3: f16,
200 e4: f16,
201 e5: f16,
202 e6: f16,
203 e7: f16,
204 e8: f16,
205 e9: f16,
206 e10: f16,
207 e11: f16,
208 e12: f16,
209 e13: f16,
210 e14: f16,
211 e15: f16,
212 e16: f16,
213 e17: f16,
214 e18: f16,
215 e19: f16,
216 e20: f16,
217 e21: f16,
218 e22: f16,
219 e23: f16,
220 e24: f16,
221 e25: f16,
222 e26: f16,
223 e27: f16,
224 e28: f16,
225 e29: f16,
226 e30: f16,
227 e31: f16,
228) -> __m512h {
229 __m512h([
230 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
231 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
232 ])
233}
234
235/// Return vector of type __m128h with all elements set to zero.
236///
237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
238#[inline]
239#[target_feature(enable = "avx512fp16,avx512vl")]
240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
241pub fn _mm_setzero_ph() -> __m128h {
242 unsafe { transmute(src:f16x8::ZERO) }
243}
244
245/// Return vector of type __m256h with all elements set to zero.
246///
247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
248#[inline]
249#[target_feature(enable = "avx512fp16,avx512vl")]
250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
251pub fn _mm256_setzero_ph() -> __m256h {
252 f16x16::ZERO.as_m256h()
253}
254
255/// Return vector of type __m512h with all elements set to zero.
256///
257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
258#[inline]
259#[target_feature(enable = "avx512fp16")]
260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
261pub fn _mm512_setzero_ph() -> __m512h {
262 f16x32::ZERO.as_m512h()
263}
264
265/// Return vector of type `__m128h` with indetermination elements.
266/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
267/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
268/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
269///
270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
271#[inline]
272#[target_feature(enable = "avx512fp16,avx512vl")]
273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
274pub fn _mm_undefined_ph() -> __m128h {
275 f16x8::ZERO.as_m128h()
276}
277
278/// Return vector of type `__m256h` with indetermination elements.
279/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
280/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
281/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
282///
283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
284#[inline]
285#[target_feature(enable = "avx512fp16,avx512vl")]
286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
287pub fn _mm256_undefined_ph() -> __m256h {
288 f16x16::ZERO.as_m256h()
289}
290
291/// Return vector of type `__m512h` with indetermination elements.
292/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
293/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
294/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
295///
296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
297#[inline]
298#[target_feature(enable = "avx512fp16")]
299#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
300pub fn _mm512_undefined_ph() -> __m512h {
301 f16x32::ZERO.as_m512h()
302}
303
304/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
305/// does not generate any instructions, thus it has zero latency.
306///
307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
308#[inline]
309#[target_feature(enable = "avx512fp16")]
310#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
311pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
312 unsafe { transmute(src:a) }
313}
314
315/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
316/// does not generate any instructions, thus it has zero latency.
317///
318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
319#[inline]
320#[target_feature(enable = "avx512fp16")]
321#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
322pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
323 unsafe { transmute(src:a) }
324}
325
326/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
327/// does not generate any instructions, thus it has zero latency.
328///
329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
330#[inline]
331#[target_feature(enable = "avx512fp16")]
332#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
333pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
334 unsafe { transmute(src:a) }
335}
336
337/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
338/// does not generate any instructions, thus it has zero latency.
339///
340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
341#[inline]
342#[target_feature(enable = "avx512fp16")]
343#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
344pub fn _mm_castph_pd(a: __m128h) -> __m128d {
345 unsafe { transmute(src:a) }
346}
347
348/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
349/// does not generate any instructions, thus it has zero latency.
350///
351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
352#[inline]
353#[target_feature(enable = "avx512fp16")]
354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
355pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
356 unsafe { transmute(src:a) }
357}
358
359/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
360/// does not generate any instructions, thus it has zero latency.
361///
362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
363#[inline]
364#[target_feature(enable = "avx512fp16")]
365#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
366pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
367 unsafe { transmute(src:a) }
368}
369
370/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
371/// does not generate any instructions, thus it has zero latency.
372///
373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
374#[inline]
375#[target_feature(enable = "avx512fp16")]
376#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
377pub fn _mm_castps_ph(a: __m128) -> __m128h {
378 unsafe { transmute(src:a) }
379}
380
381/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
382/// does not generate any instructions, thus it has zero latency.
383///
384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
385#[inline]
386#[target_feature(enable = "avx512fp16")]
387#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
388pub fn _mm256_castps_ph(a: __m256) -> __m256h {
389 unsafe { transmute(src:a) }
390}
391
392/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
393/// does not generate any instructions, thus it has zero latency.
394///
395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
396#[inline]
397#[target_feature(enable = "avx512fp16")]
398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
399pub fn _mm512_castps_ph(a: __m512) -> __m512h {
400 unsafe { transmute(src:a) }
401}
402
403/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
404/// does not generate any instructions, thus it has zero latency.
405///
406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
407#[inline]
408#[target_feature(enable = "avx512fp16")]
409#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
410pub fn _mm_castph_ps(a: __m128h) -> __m128 {
411 unsafe { transmute(src:a) }
412}
413
414/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
415/// does not generate any instructions, thus it has zero latency.
416///
417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
418#[inline]
419#[target_feature(enable = "avx512fp16")]
420#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
421pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
422 unsafe { transmute(src:a) }
423}
424
425/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
426/// does not generate any instructions, thus it has zero latency.
427///
428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
429#[inline]
430#[target_feature(enable = "avx512fp16")]
431#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
432pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
433 unsafe { transmute(src:a) }
434}
435
436/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
437/// does not generate any instructions, thus it has zero latency.
438///
439/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
440#[inline]
441#[target_feature(enable = "avx512fp16")]
442#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
443pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
444 unsafe { transmute(src:a) }
445}
446
447/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
448/// does not generate any instructions, thus it has zero latency.
449///
450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
451#[inline]
452#[target_feature(enable = "avx512fp16")]
453#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
454pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
455 unsafe { transmute(src:a) }
456}
457
458/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
459/// does not generate any instructions, thus it has zero latency.
460///
461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
462#[inline]
463#[target_feature(enable = "avx512fp16")]
464#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
465pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
466 unsafe { transmute(src:a) }
467}
468
469/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
470/// does not generate any instructions, thus it has zero latency.
471///
472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
473#[inline]
474#[target_feature(enable = "avx512fp16")]
475#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
476pub fn _mm_castph_si128(a: __m128h) -> __m128i {
477 unsafe { transmute(src:a) }
478}
479
480/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
481/// does not generate any instructions, thus it has zero latency.
482///
483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
484#[inline]
485#[target_feature(enable = "avx512fp16")]
486#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
487pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
488 unsafe { transmute(src:a) }
489}
490
491/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
492/// does not generate any instructions, thus it has zero latency.
493///
494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
495#[inline]
496#[target_feature(enable = "avx512fp16")]
497#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
498pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
499 unsafe { transmute(src:a) }
500}
501
502/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
503/// does not generate any instructions, thus it has zero latency.
504///
505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
506#[inline]
507#[target_feature(enable = "avx512fp16")]
508#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
509pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
510 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
511}
512
513/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
514/// does not generate any instructions, thus it has zero latency.
515///
516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
517#[inline]
518#[target_feature(enable = "avx512fp16")]
519#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
520pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
521 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
522}
523
524/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
525/// does not generate any instructions, thus it has zero latency.
526///
527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
528#[inline]
529#[target_feature(enable = "avx512fp16")]
530#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
531pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
532 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
533}
534
535/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
536/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
537/// but most of the time it does not generate any instructions.
538///
539/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
540#[inline]
541#[target_feature(enable = "avx512fp16")]
542#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
543pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
544 unsafe {
545 simd_shuffle!(
546 a,
547 _mm_undefined_ph(),
548 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
549 )
550 }
551}
552
553/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
554/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
555/// but most of the time it does not generate any instructions.
556///
557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
558#[inline]
559#[target_feature(enable = "avx512fp16")]
560#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
561pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
562 unsafe {
563 simd_shuffle!(
564 a,
565 _mm_undefined_ph(),
566 [
567 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
568 8, 8, 8, 8
569 ]
570 )
571 }
572}
573
574/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
575/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
576/// but most of the time it does not generate any instructions.
577///
578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
579#[inline]
580#[target_feature(enable = "avx512fp16")]
581#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
582pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
583 unsafe {
584 simd_shuffle!(
585 a,
586 _mm256_undefined_ph(),
587 [
588 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
589 16, 16, 16, 16, 16, 16, 16, 16, 16
590 ]
591 )
592 }
593}
594
595/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
596/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
597/// any instructions.
598///
599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
600#[inline]
601#[target_feature(enable = "avx512fp16")]
602#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
603pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
604 unsafe {
605 simd_shuffle!(
606 a,
607 _mm_setzero_ph(),
608 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
609 )
610 }
611}
612
613/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
614/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
615/// any instructions.
616///
617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
618#[inline]
619#[target_feature(enable = "avx512fp16")]
620#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
621pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
622 unsafe {
623 simd_shuffle!(
624 a,
625 _mm256_setzero_ph(),
626 [
627 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
628 16, 16, 16, 16, 16, 16, 16, 16, 16
629 ]
630 )
631 }
632}
633
634/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
635/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
636/// any instructions.
637///
638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
639#[inline]
640#[target_feature(enable = "avx512fp16")]
641#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
642pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
643 unsafe {
644 simd_shuffle!(
645 a,
646 _mm_setzero_ph(),
647 [
648 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
649 8, 8, 8, 8
650 ]
651 )
652 }
653}
654
655macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
656 ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
657 let dst: $mask_type;
658 asm!(
659 "vcmpph {k}, {a}, {b}, {imm8}",
660 k = lateout(kreg) dst,
661 a = in($reg) $a,
662 b = in($reg) $b,
663 imm8 = const IMM5,
664 options(pure, nomem, nostack)
665 );
666 dst
667 }};
668 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
669 let dst: $mask_type;
670 asm!(
671 "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
672 k = lateout(kreg) dst,
673 mask = in(kreg) $mask,
674 a = in($reg) $a,
675 b = in($reg) $b,
676 imm8 = const IMM5,
677 options(pure, nomem, nostack)
678 );
679 dst
680 }};
681}
682
683/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
684/// operand specified by imm8, and store the results in mask vector k.
685///
686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
687#[inline]
688#[target_feature(enable = "avx512fp16,avx512vl")]
689#[rustc_legacy_const_generics(2)]
690#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
691pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
692 unsafe {
693 static_assert_uimm_bits!(IMM5, 5);
694 cmp_asm!(__mmask8, xmm_reg, a, b)
695 }
696}
697
698/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
699/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
700/// zeroed out when the corresponding mask bit is not set).
701///
702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
703#[inline]
704#[target_feature(enable = "avx512fp16,avx512vl")]
705#[rustc_legacy_const_generics(3)]
706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
707pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
708 unsafe {
709 static_assert_uimm_bits!(IMM5, 5);
710 cmp_asm!(__mmask8, k1, xmm_reg, a, b)
711 }
712}
713
714/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
715/// operand specified by imm8, and store the results in mask vector k.
716///
717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
718#[inline]
719#[target_feature(enable = "avx512fp16,avx512vl")]
720#[rustc_legacy_const_generics(2)]
721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
722pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
723 unsafe {
724 static_assert_uimm_bits!(IMM5, 5);
725 cmp_asm!(__mmask16, ymm_reg, a, b)
726 }
727}
728
729/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
730/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
731/// zeroed out when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
734#[inline]
735#[target_feature(enable = "avx512fp16,avx512vl")]
736#[rustc_legacy_const_generics(3)]
737#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
738pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
739 k1: __mmask16,
740 a: __m256h,
741 b: __m256h,
742) -> __mmask16 {
743 unsafe {
744 static_assert_uimm_bits!(IMM5, 5);
745 cmp_asm!(__mmask16, k1, ymm_reg, a, b)
746 }
747}
748
749/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
750/// operand specified by imm8, and store the results in mask vector k.
751///
752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
753#[inline]
754#[target_feature(enable = "avx512fp16")]
755#[rustc_legacy_const_generics(2)]
756#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
757pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
758 unsafe {
759 static_assert_uimm_bits!(IMM5, 5);
760 cmp_asm!(__mmask32, zmm_reg, a, b)
761 }
762}
763
764/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
765/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
766/// zeroed out when the corresponding mask bit is not set).
767///
768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
769#[inline]
770#[target_feature(enable = "avx512fp16")]
771#[rustc_legacy_const_generics(3)]
772#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
773pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
774 k1: __mmask32,
775 a: __m512h,
776 b: __m512h,
777) -> __mmask32 {
778 unsafe {
779 static_assert_uimm_bits!(IMM5, 5);
780 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
781 }
782}
783
784/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
785/// operand specified by imm8, and store the results in mask vector k.
786///
787/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
788///
789/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
790#[inline]
791#[target_feature(enable = "avx512fp16")]
792#[rustc_legacy_const_generics(2, 3)]
793#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
794pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
795 a: __m512h,
796 b: __m512h,
797) -> __mmask32 {
798 unsafe {
799 static_assert_uimm_bits!(IMM5, 5);
800 static_assert_sae!(SAE);
801 if SAE == _MM_FROUND_NO_EXC {
802 let dst: __mmask32;
803 asm!(
804 "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
805 k = lateout(kreg) dst,
806 a = in(zmm_reg) a,
807 b = in(zmm_reg) b,
808 imm8 = const IMM5,
809 options(pure, nomem, nostack)
810 );
811 dst
812 } else {
813 cmp_asm!(__mmask32, zmm_reg, a, b)
814 }
815 }
816}
817
818/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
819/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
820/// zeroed out when the corresponding mask bit is not set).
821///
822/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
823///
824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
825#[inline]
826#[target_feature(enable = "avx512fp16")]
827#[rustc_legacy_const_generics(3, 4)]
828#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
829pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
830 k1: __mmask32,
831 a: __m512h,
832 b: __m512h,
833) -> __mmask32 {
834 unsafe {
835 static_assert_uimm_bits!(IMM5, 5);
836 static_assert_sae!(SAE);
837 if SAE == _MM_FROUND_NO_EXC {
838 let dst: __mmask32;
839 asm!(
840 "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
841 k = lateout(kreg) dst,
842 k1 = in(kreg) k1,
843 a = in(zmm_reg) a,
844 b = in(zmm_reg) b,
845 imm8 = const IMM5,
846 options(pure, nomem, nostack)
847 );
848 dst
849 } else {
850 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
851 }
852 }
853}
854
855/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
856/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
857/// passing _MM_FROUND_NO_EXC in the sae parameter.
858///
859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
860#[inline]
861#[target_feature(enable = "avx512fp16")]
862#[rustc_legacy_const_generics(2, 3)]
863#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
864pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
865 static_assert_uimm_bits!(IMM5, 5);
866 static_assert_sae!(SAE);
867 _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b)
868}
869
870/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
871/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
872/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
873///
874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
875#[inline]
876#[target_feature(enable = "avx512fp16")]
877#[rustc_legacy_const_generics(3, 4)]
878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
879pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
880 k1: __mmask8,
881 a: __m128h,
882 b: __m128h,
883) -> __mmask8 {
884 unsafe {
885 static_assert_uimm_bits!(IMM5, 5);
886 static_assert_sae!(SAE);
887 vcmpsh(a, b, IMM5, mask:k1, SAE)
888 }
889}
890
891/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
892/// operand specified by imm8, and store the result in mask vector k.
893///
894/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
895#[inline]
896#[target_feature(enable = "avx512fp16")]
897#[rustc_legacy_const_generics(2)]
898#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
899pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
900 static_assert_uimm_bits!(IMM5, 5);
901 _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
902}
903
904/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
905/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
906///
907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
908#[inline]
909#[target_feature(enable = "avx512fp16")]
910#[rustc_legacy_const_generics(3)]
911#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
912pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
913 static_assert_uimm_bits!(IMM5, 5);
914 _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
915}
916
917/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
918/// operand specified by imm8, and return the boolean result (0 or 1).
919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
920///
921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
922#[inline]
923#[target_feature(enable = "avx512fp16")]
924#[rustc_legacy_const_generics(2, 3)]
925#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
926pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
927 unsafe {
928 static_assert_uimm_bits!(IMM5, 5);
929 static_assert_sae!(SAE);
930 vcomish(a, b, IMM5, SAE)
931 }
932}
933
934/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
935/// operand specified by imm8, and return the boolean result (0 or 1).
936///
937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
938#[inline]
939#[target_feature(enable = "avx512fp16")]
940#[rustc_legacy_const_generics(2)]
941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
942pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
943 static_assert_uimm_bits!(IMM5, 5);
944 _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
945}
946
947/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
948/// the boolean result (0 or 1).
949///
950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
951#[inline]
952#[target_feature(enable = "avx512fp16")]
953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
954pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
955 _mm_comi_sh::<_CMP_EQ_OS>(a, b)
956}
957
958/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
959/// and return the boolean result (0 or 1).
960///
961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
962#[inline]
963#[target_feature(enable = "avx512fp16")]
964#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
965pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
966 _mm_comi_sh::<_CMP_GE_OS>(a, b)
967}
968
969/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
970/// the boolean result (0 or 1).
971///
972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
973#[inline]
974#[target_feature(enable = "avx512fp16")]
975#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
976pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
977 _mm_comi_sh::<_CMP_GT_OS>(a, b)
978}
979
980/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
981/// return the boolean result (0 or 1).
982///
983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
984#[inline]
985#[target_feature(enable = "avx512fp16")]
986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
987pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
988 _mm_comi_sh::<_CMP_LE_OS>(a, b)
989}
990
991/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
992/// the boolean result (0 or 1).
993///
994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
995#[inline]
996#[target_feature(enable = "avx512fp16")]
997#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
998pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
999 _mm_comi_sh::<_CMP_LT_OS>(a, b)
1000}
1001
1002/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1003/// the boolean result (0 or 1).
1004///
1005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1006#[inline]
1007#[target_feature(enable = "avx512fp16")]
1008#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1009pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1010 _mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1011}
1012
1013/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1014/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1015///
1016/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1017#[inline]
1018#[target_feature(enable = "avx512fp16")]
1019#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1020pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1021 _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1022}
1023
1024/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1025/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1026///
1027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1028#[inline]
1029#[target_feature(enable = "avx512fp16")]
1030#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1031pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1032 _mm_comi_sh::<_CMP_GE_OQ>(a, b)
1033}
1034
1035/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1036/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1037///
1038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1039#[inline]
1040#[target_feature(enable = "avx512fp16")]
1041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1042pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1043 _mm_comi_sh::<_CMP_GT_OQ>(a, b)
1044}
1045
1046/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1047/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1048///
1049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1050#[inline]
1051#[target_feature(enable = "avx512fp16")]
1052#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1053pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1054 _mm_comi_sh::<_CMP_LE_OQ>(a, b)
1055}
1056
1057/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1058/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1059///
1060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1061#[inline]
1062#[target_feature(enable = "avx512fp16")]
1063#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1064pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1065 _mm_comi_sh::<_CMP_LT_OQ>(a, b)
1066}
1067
1068/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1069/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1070///
1071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1072#[inline]
1073#[target_feature(enable = "avx512fp16")]
1074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1075pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1076 _mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1077}
1078
1079/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1080/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1081///
1082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1083#[inline]
1084#[target_feature(enable = "avx512fp16,avx512vl")]
1085#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1086pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1087 *mem_addr.cast()
1088}
1089
1090/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1091/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1092///
1093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1094#[inline]
1095#[target_feature(enable = "avx512fp16,avx512vl")]
1096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1097pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1098 *mem_addr.cast()
1099}
1100
1101/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1102/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1103///
1104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1105#[inline]
1106#[target_feature(enable = "avx512fp16")]
1107#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1108pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1109 *mem_addr.cast()
1110}
1111
1112/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1113/// and zero the upper elements
1114///
1115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1116#[inline]
1117#[target_feature(enable = "avx512fp16")]
1118#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1119pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1120 _mm_set_sh(*mem_addr)
1121}
1122
1123/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1124/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1125///
1126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1127#[inline]
1128#[target_feature(enable = "avx512fp16")]
1129#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1130pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1131 let mut dst: __m128h = src;
1132 asm!(
1133 vpl!("vmovsh {dst}{{{k}}}"),
1134 dst = inout(xmm_reg) dst,
1135 k = in(kreg) k,
1136 p = in(reg) mem_addr,
1137 options(pure, readonly, nostack, preserves_flags)
1138 );
1139 dst
1140}
1141
1142/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1143/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1144///
1145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1146#[inline]
1147#[target_feature(enable = "avx512fp16")]
1148#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1149pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1150 let mut dst: __m128h;
1151 asm!(
1152 vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1153 dst = out(xmm_reg) dst,
1154 k = in(kreg) k,
1155 p = in(reg) mem_addr,
1156 options(pure, readonly, nostack, preserves_flags)
1157 );
1158 dst
1159}
1160
1161/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1162/// a new vector. The address does not need to be aligned to any particular boundary.
1163///
1164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1165#[inline]
1166#[target_feature(enable = "avx512fp16,avx512vl")]
1167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1168pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1169 ptr::read_unaligned(src:mem_addr.cast())
1170}
1171
1172/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1173/// a new vector. The address does not need to be aligned to any particular boundary.
1174///
1175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1176#[inline]
1177#[target_feature(enable = "avx512fp16,avx512vl")]
1178#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1179pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1180 ptr::read_unaligned(src:mem_addr.cast())
1181}
1182
1183/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1184/// a new vector. The address does not need to be aligned to any particular boundary.
1185///
1186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1187#[inline]
1188#[target_feature(enable = "avx512fp16")]
1189#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1190pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1191 ptr::read_unaligned(src:mem_addr.cast())
1192}
1193
1194/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1195/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1196/// 7 packed elements from a to the upper elements of dst.
1197///
1198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1199#[inline]
1200#[target_feature(enable = "avx512fp16")]
1201#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1202pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1203 unsafe {
1204 let mut mov: f16 = simd_extract!(src, 0);
1205 if (k & 1) != 0 {
1206 mov = simd_extract!(b, 0);
1207 }
1208 simd_insert!(a, 0, mov)
1209 }
1210}
1211
1212/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1213/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1214/// elements from a to the upper elements of dst.
1215///
1216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1217#[inline]
1218#[target_feature(enable = "avx512fp16")]
1219#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1220pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1221 unsafe {
1222 let mut mov: f16 = 0.;
1223 if (k & 1) != 0 {
1224 mov = simd_extract!(b, 0);
1225 }
1226 simd_insert!(a, 0, mov)
1227 }
1228}
1229
1230/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1231/// and copy the upper 7 packed elements from a to the upper elements of dst.
1232///
1233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1234#[inline]
1235#[target_feature(enable = "avx512fp16")]
1236#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1237pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1238 unsafe {
1239 let mov: f16 = simd_extract!(b, 0);
1240 simd_insert!(a, 0, mov)
1241 }
1242}
1243
1244/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1245/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1246///
1247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1248#[inline]
1249#[target_feature(enable = "avx512fp16,avx512vl")]
1250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1251pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1252 *mem_addr.cast() = a;
1253}
1254
1255/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1256/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1257///
1258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1259#[inline]
1260#[target_feature(enable = "avx512fp16,avx512vl")]
1261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1262pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1263 *mem_addr.cast() = a;
1264}
1265
1266/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1267/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1268///
1269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1270#[inline]
1271#[target_feature(enable = "avx512fp16")]
1272#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1273pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1274 *mem_addr.cast() = a;
1275}
1276
1277/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1278///
1279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1280#[inline]
1281#[target_feature(enable = "avx512fp16")]
1282#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1283pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1284 *mem_addr = simd_extract!(a, 0);
1285}
1286
1287/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1288///
1289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1290#[inline]
1291#[target_feature(enable = "avx512fp16")]
1292#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1293pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1294 asm!(
1295 vps!("vmovdqu16", "{{{k}}}, {src}"),
1296 p = in(reg) mem_addr,
1297 k = in(kreg) k,
1298 src = in(xmm_reg) a,
1299 options(nostack, preserves_flags)
1300 );
1301}
1302
1303/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1304/// The address does not need to be aligned to any particular boundary.
1305///
1306/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1307#[inline]
1308#[target_feature(enable = "avx512fp16,avx512vl")]
1309#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1310pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1311 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1312}
1313
1314/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1315/// The address does not need to be aligned to any particular boundary.
1316///
1317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1318#[inline]
1319#[target_feature(enable = "avx512fp16,avx512vl")]
1320#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1321pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1322 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1323}
1324
1325/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1326/// The address does not need to be aligned to any particular boundary.
1327///
1328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1329#[inline]
1330#[target_feature(enable = "avx512fp16")]
1331#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1332pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1333 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1334}
1335
1336/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1337///
1338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1339#[inline]
1340#[target_feature(enable = "avx512fp16,avx512vl")]
1341#[cfg_attr(test, assert_instr(vaddph))]
1342#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1343pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1344 unsafe { simd_add(x:a, y:b) }
1345}
1346
1347/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1348/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1349///
1350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1351#[inline]
1352#[target_feature(enable = "avx512fp16,avx512vl")]
1353#[cfg_attr(test, assert_instr(vaddph))]
1354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1355pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1356 unsafe {
1357 let r: __m128h = _mm_add_ph(a, b);
1358 simd_select_bitmask(m:k, yes:r, no:src)
1359 }
1360}
1361
1362/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1363/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1364///
1365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1366#[inline]
1367#[target_feature(enable = "avx512fp16,avx512vl")]
1368#[cfg_attr(test, assert_instr(vaddph))]
1369#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1370pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1371 unsafe {
1372 let r: __m128h = _mm_add_ph(a, b);
1373 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1374 }
1375}
1376
1377/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1378///
1379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1380#[inline]
1381#[target_feature(enable = "avx512fp16,avx512vl")]
1382#[cfg_attr(test, assert_instr(vaddph))]
1383#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1384pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1385 unsafe { simd_add(x:a, y:b) }
1386}
1387
1388/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1389/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1390///
1391/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1392#[inline]
1393#[target_feature(enable = "avx512fp16,avx512vl")]
1394#[cfg_attr(test, assert_instr(vaddph))]
1395#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1396pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1397 unsafe {
1398 let r: __m256h = _mm256_add_ph(a, b);
1399 simd_select_bitmask(m:k, yes:r, no:src)
1400 }
1401}
1402
1403/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1404/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1405///
1406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1407#[inline]
1408#[target_feature(enable = "avx512fp16,avx512vl")]
1409#[cfg_attr(test, assert_instr(vaddph))]
1410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1411pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1412 unsafe {
1413 let r: __m256h = _mm256_add_ph(a, b);
1414 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1415 }
1416}
1417
1418/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1419///
1420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1421#[inline]
1422#[target_feature(enable = "avx512fp16")]
1423#[cfg_attr(test, assert_instr(vaddph))]
1424#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1425pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1426 unsafe { simd_add(x:a, y:b) }
1427}
1428
1429/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1430/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1431///
1432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1433#[inline]
1434#[target_feature(enable = "avx512fp16")]
1435#[cfg_attr(test, assert_instr(vaddph))]
1436#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1437pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1438 unsafe {
1439 let r: __m512h = _mm512_add_ph(a, b);
1440 simd_select_bitmask(m:k, yes:r, no:src)
1441 }
1442}
1443
1444/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1445/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1446///
1447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1448#[inline]
1449#[target_feature(enable = "avx512fp16")]
1450#[cfg_attr(test, assert_instr(vaddph))]
1451#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1452pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1453 unsafe {
1454 let r: __m512h = _mm512_add_ph(a, b);
1455 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1456 }
1457}
1458
1459/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1460/// Rounding is done according to the rounding parameter, which can be one of:
1461///
1462/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1463/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1464/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1465/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1466/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1467///
1468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1469#[inline]
1470#[target_feature(enable = "avx512fp16")]
1471#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1472#[rustc_legacy_const_generics(2)]
1473#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1474pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1475 unsafe {
1476 static_assert_rounding!(ROUNDING);
1477 vaddph(a, b, ROUNDING)
1478 }
1479}
1480
1481/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1482/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1483/// Rounding is done according to the rounding parameter, which can be one of:
1484///
1485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1490///
1491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1492#[inline]
1493#[target_feature(enable = "avx512fp16")]
1494#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1495#[rustc_legacy_const_generics(4)]
1496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1497pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1498 src: __m512h,
1499 k: __mmask32,
1500 a: __m512h,
1501 b: __m512h,
1502) -> __m512h {
1503 unsafe {
1504 static_assert_rounding!(ROUNDING);
1505 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1506 simd_select_bitmask(m:k, yes:r, no:src)
1507 }
1508}
1509
1510/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1511/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1512/// Rounding is done according to the rounding parameter, which can be one of:
1513///
1514/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1515/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1516/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1517/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1518///
1519/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1520#[inline]
1521#[target_feature(enable = "avx512fp16")]
1522#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1523#[rustc_legacy_const_generics(3)]
1524#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1525pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1526 k: __mmask32,
1527 a: __m512h,
1528 b: __m512h,
1529) -> __m512h {
1530 unsafe {
1531 static_assert_rounding!(ROUNDING);
1532 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1533 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1534 }
1535}
1536
1537/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1538/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1539/// Rounding is done according to the rounding parameter, which can be one of:
1540///
1541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1546///
1547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1548#[inline]
1549#[target_feature(enable = "avx512fp16")]
1550#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1551#[rustc_legacy_const_generics(2)]
1552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1553pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1554 static_assert_rounding!(ROUNDING);
1555 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1556}
1557
1558/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1559/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1560/// writemask k (the element is copied from src when mask bit 0 is not set).
1561/// Rounding is done according to the rounding parameter, which can be one of:
1562///
1563/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1564/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1565/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1566/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1567/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1568///
1569/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1570#[inline]
1571#[target_feature(enable = "avx512fp16")]
1572#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1573#[rustc_legacy_const_generics(4)]
1574#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1575pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1576 src: __m128h,
1577 k: __mmask8,
1578 a: __m128h,
1579 b: __m128h,
1580) -> __m128h {
1581 unsafe {
1582 static_assert_rounding!(ROUNDING);
1583 vaddsh(a, b, src, k, ROUNDING)
1584 }
1585}
1586
1587/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1588/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1589/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1590/// Rounding is done according to the rounding parameter, which can be one of:
1591///
1592/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1593/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1594/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1595/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1596/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1597///
1598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1599#[inline]
1600#[target_feature(enable = "avx512fp16")]
1601#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1602#[rustc_legacy_const_generics(3)]
1603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1604pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1605 static_assert_rounding!(ROUNDING);
1606 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1607}
1608
1609/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1610/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1611///
1612/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1613#[inline]
1614#[target_feature(enable = "avx512fp16")]
1615#[cfg_attr(test, assert_instr(vaddsh))]
1616#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1617pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1618 _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1619}
1620
1621/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1622/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1623/// writemask k (the element is copied from src when mask bit 0 is not set).
1624///
1625/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1626#[inline]
1627#[target_feature(enable = "avx512fp16")]
1628#[cfg_attr(test, assert_instr(vaddsh))]
1629#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1630pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1631 _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1632}
1633
1634/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1635/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1636/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1637///
1638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1639#[inline]
1640#[target_feature(enable = "avx512fp16")]
1641#[cfg_attr(test, assert_instr(vaddsh))]
1642#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1643pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1644 _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1645}
1646
1647/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1648///
1649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1650#[inline]
1651#[target_feature(enable = "avx512fp16,avx512vl")]
1652#[cfg_attr(test, assert_instr(vsubph))]
1653#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1654pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1655 unsafe { simd_sub(lhs:a, rhs:b) }
1656}
1657
1658/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1659/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1660///
1661/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1662#[inline]
1663#[target_feature(enable = "avx512fp16,avx512vl")]
1664#[cfg_attr(test, assert_instr(vsubph))]
1665#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1666pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1667 unsafe {
1668 let r: __m128h = _mm_sub_ph(a, b);
1669 simd_select_bitmask(m:k, yes:r, no:src)
1670 }
1671}
1672
1673/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1674/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1675///
1676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1677#[inline]
1678#[target_feature(enable = "avx512fp16,avx512vl")]
1679#[cfg_attr(test, assert_instr(vsubph))]
1680#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1681pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1682 unsafe {
1683 let r: __m128h = _mm_sub_ph(a, b);
1684 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1685 }
1686}
1687
1688/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1689///
1690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1691#[inline]
1692#[target_feature(enable = "avx512fp16,avx512vl")]
1693#[cfg_attr(test, assert_instr(vsubph))]
1694#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1695pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1696 unsafe { simd_sub(lhs:a, rhs:b) }
1697}
1698
1699/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1700/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1701///
1702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1703#[inline]
1704#[target_feature(enable = "avx512fp16,avx512vl")]
1705#[cfg_attr(test, assert_instr(vsubph))]
1706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1707pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1708 unsafe {
1709 let r: __m256h = _mm256_sub_ph(a, b);
1710 simd_select_bitmask(m:k, yes:r, no:src)
1711 }
1712}
1713
1714/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1715/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1716///
1717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1718#[inline]
1719#[target_feature(enable = "avx512fp16,avx512vl")]
1720#[cfg_attr(test, assert_instr(vsubph))]
1721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1722pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1723 unsafe {
1724 let r: __m256h = _mm256_sub_ph(a, b);
1725 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1726 }
1727}
1728
1729/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1730///
1731/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1732#[inline]
1733#[target_feature(enable = "avx512fp16")]
1734#[cfg_attr(test, assert_instr(vsubph))]
1735#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1736pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1737 unsafe { simd_sub(lhs:a, rhs:b) }
1738}
1739
1740/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1741/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1742///
1743/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1744#[inline]
1745#[target_feature(enable = "avx512fp16")]
1746#[cfg_attr(test, assert_instr(vsubph))]
1747#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1748pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1749 unsafe {
1750 let r: __m512h = _mm512_sub_ph(a, b);
1751 simd_select_bitmask(m:k, yes:r, no:src)
1752 }
1753}
1754
1755/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1756/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1757///
1758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1759#[inline]
1760#[target_feature(enable = "avx512fp16")]
1761#[cfg_attr(test, assert_instr(vsubph))]
1762#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1763pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1764 unsafe {
1765 let r: __m512h = _mm512_sub_ph(a, b);
1766 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1767 }
1768}
1769
1770/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1771/// Rounding is done according to the rounding parameter, which can be one of:
1772///
1773/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1774/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1775/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1776/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1777/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1778///
1779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1780#[inline]
1781#[target_feature(enable = "avx512fp16")]
1782#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1783#[rustc_legacy_const_generics(2)]
1784#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1785pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1786 unsafe {
1787 static_assert_rounding!(ROUNDING);
1788 vsubph(a, b, ROUNDING)
1789 }
1790}
1791
1792/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1793/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1794/// Rounding is done according to the rounding parameter, which can be one of:
1795///
1796/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1797/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1798/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1799/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1800/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1801///
1802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1803#[inline]
1804#[target_feature(enable = "avx512fp16")]
1805#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1806#[rustc_legacy_const_generics(4)]
1807#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1808pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1809 src: __m512h,
1810 k: __mmask32,
1811 a: __m512h,
1812 b: __m512h,
1813) -> __m512h {
1814 unsafe {
1815 static_assert_rounding!(ROUNDING);
1816 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1817 simd_select_bitmask(m:k, yes:r, no:src)
1818 }
1819}
1820
1821/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1822/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1823/// Rounding is done according to the rounding parameter, which can be one of:
1824///
1825/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1826/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1827/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1828/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1829/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1830///
1831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1832#[inline]
1833#[target_feature(enable = "avx512fp16")]
1834#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1835#[rustc_legacy_const_generics(3)]
1836#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1837pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1838 k: __mmask32,
1839 a: __m512h,
1840 b: __m512h,
1841) -> __m512h {
1842 unsafe {
1843 static_assert_rounding!(ROUNDING);
1844 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1845 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1846 }
1847}
1848
1849/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1850/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1851/// Rounding is done according to the rounding parameter, which can be one of:
1852///
1853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1858///
1859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1860#[inline]
1861#[target_feature(enable = "avx512fp16")]
1862#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1863#[rustc_legacy_const_generics(2)]
1864#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1865pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1866 static_assert_rounding!(ROUNDING);
1867 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1868}
1869
1870/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1871/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1872/// writemask k (the element is copied from src when mask bit 0 is not set).
1873/// Rounding is done according to the rounding parameter, which can be one of:
1874///
1875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1880///
1881/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1882#[inline]
1883#[target_feature(enable = "avx512fp16")]
1884#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1885#[rustc_legacy_const_generics(4)]
1886#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1887pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1888 src: __m128h,
1889 k: __mmask8,
1890 a: __m128h,
1891 b: __m128h,
1892) -> __m128h {
1893 unsafe {
1894 static_assert_rounding!(ROUNDING);
1895 vsubsh(a, b, src, k, ROUNDING)
1896 }
1897}
1898
1899/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1900/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1901/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1902/// Rounding is done according to the rounding parameter, which can be one of:
1903///
1904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1909///
1910/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
1911#[inline]
1912#[target_feature(enable = "avx512fp16")]
1913#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1914#[rustc_legacy_const_generics(3)]
1915#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1916pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1917 static_assert_rounding!(ROUNDING);
1918 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1919}
1920
1921/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1922/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1923///
1924/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
1925#[inline]
1926#[target_feature(enable = "avx512fp16")]
1927#[cfg_attr(test, assert_instr(vsubsh))]
1928#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1929pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
1930 _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
1931}
1932
1933/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1934/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1935/// writemask k (the element is copied from src when mask bit 0 is not set).
1936///
1937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
1938#[inline]
1939#[target_feature(enable = "avx512fp16")]
1940#[cfg_attr(test, assert_instr(vsubsh))]
1941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1942pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1943 _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
1944}
1945
1946/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1947/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1948/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1949///
1950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
1951#[inline]
1952#[target_feature(enable = "avx512fp16")]
1953#[cfg_attr(test, assert_instr(vsubsh))]
1954#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1955pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1956 _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
1957}
1958
1959/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1960///
1961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
1962#[inline]
1963#[target_feature(enable = "avx512fp16,avx512vl")]
1964#[cfg_attr(test, assert_instr(vmulph))]
1965#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1966pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
1967 unsafe { simd_mul(x:a, y:b) }
1968}
1969
1970/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1971/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1972///
1973/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
1974#[inline]
1975#[target_feature(enable = "avx512fp16,avx512vl")]
1976#[cfg_attr(test, assert_instr(vmulph))]
1977#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1978pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1979 unsafe {
1980 let r: __m128h = _mm_mul_ph(a, b);
1981 simd_select_bitmask(m:k, yes:r, no:src)
1982 }
1983}
1984
1985/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1986/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1987///
1988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
1989#[inline]
1990#[target_feature(enable = "avx512fp16,avx512vl")]
1991#[cfg_attr(test, assert_instr(vmulph))]
1992#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1993pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1994 unsafe {
1995 let r: __m128h = _mm_mul_ph(a, b);
1996 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1997 }
1998}
1999
2000/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2001///
2002/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
2003#[inline]
2004#[target_feature(enable = "avx512fp16,avx512vl")]
2005#[cfg_attr(test, assert_instr(vmulph))]
2006#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2007pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2008 unsafe { simd_mul(x:a, y:b) }
2009}
2010
2011/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2012/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2013///
2014/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2015#[inline]
2016#[target_feature(enable = "avx512fp16,avx512vl")]
2017#[cfg_attr(test, assert_instr(vmulph))]
2018#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2019pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2020 unsafe {
2021 let r: __m256h = _mm256_mul_ph(a, b);
2022 simd_select_bitmask(m:k, yes:r, no:src)
2023 }
2024}
2025
2026/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2027/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2028///
2029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2030#[inline]
2031#[target_feature(enable = "avx512fp16,avx512vl")]
2032#[cfg_attr(test, assert_instr(vmulph))]
2033#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2034pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2035 unsafe {
2036 let r: __m256h = _mm256_mul_ph(a, b);
2037 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2038 }
2039}
2040
2041/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2042///
2043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2044#[inline]
2045#[target_feature(enable = "avx512fp16")]
2046#[cfg_attr(test, assert_instr(vmulph))]
2047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2048pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2049 unsafe { simd_mul(x:a, y:b) }
2050}
2051
2052/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2053/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2054///
2055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2056#[inline]
2057#[target_feature(enable = "avx512fp16")]
2058#[cfg_attr(test, assert_instr(vmulph))]
2059#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2060pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2061 unsafe {
2062 let r: __m512h = _mm512_mul_ph(a, b);
2063 simd_select_bitmask(m:k, yes:r, no:src)
2064 }
2065}
2066
2067/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2068/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2069///
2070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2071#[inline]
2072#[target_feature(enable = "avx512fp16")]
2073#[cfg_attr(test, assert_instr(vmulph))]
2074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2075pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2076 unsafe {
2077 let r: __m512h = _mm512_mul_ph(a, b);
2078 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2079 }
2080}
2081
2082/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2083/// Rounding is done according to the rounding parameter, which can be one of:
2084///
2085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2090///
2091/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2092#[inline]
2093#[target_feature(enable = "avx512fp16")]
2094#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2095#[rustc_legacy_const_generics(2)]
2096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2097pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2098 unsafe {
2099 static_assert_rounding!(ROUNDING);
2100 vmulph(a, b, ROUNDING)
2101 }
2102}
2103
2104/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2105/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2106/// Rounding is done according to the rounding parameter, which can be one of:
2107///
2108/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2109/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2110/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2111/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2112/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2113///
2114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2115#[inline]
2116#[target_feature(enable = "avx512fp16")]
2117#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2118#[rustc_legacy_const_generics(4)]
2119#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2120pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2121 src: __m512h,
2122 k: __mmask32,
2123 a: __m512h,
2124 b: __m512h,
2125) -> __m512h {
2126 unsafe {
2127 static_assert_rounding!(ROUNDING);
2128 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2129 simd_select_bitmask(m:k, yes:r, no:src)
2130 }
2131}
2132
2133/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2134/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2135/// Rounding is done according to the rounding parameter, which can be one of:
2136///
2137/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2138/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2139/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2140/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2142///
2143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2144#[inline]
2145#[target_feature(enable = "avx512fp16")]
2146#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2147#[rustc_legacy_const_generics(3)]
2148#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2149pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2150 k: __mmask32,
2151 a: __m512h,
2152 b: __m512h,
2153) -> __m512h {
2154 unsafe {
2155 static_assert_rounding!(ROUNDING);
2156 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2157 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2158 }
2159}
2160
2161/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2162/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2163/// Rounding is done according to the rounding parameter, which can be one of:
2164///
2165/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2166/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2167/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2168/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2169/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2170///
2171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2172#[inline]
2173#[target_feature(enable = "avx512fp16")]
2174#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2175#[rustc_legacy_const_generics(2)]
2176#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2177pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2178 static_assert_rounding!(ROUNDING);
2179 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2180}
2181
2182/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2183/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2184/// writemask k (the element is copied from src when mask bit 0 is not set).
2185/// Rounding is done according to the rounding parameter, which can be one of:
2186///
2187/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2188/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2189/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2190/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2191/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2192///
2193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2194#[inline]
2195#[target_feature(enable = "avx512fp16")]
2196#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2197#[rustc_legacy_const_generics(4)]
2198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2199pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2200 src: __m128h,
2201 k: __mmask8,
2202 a: __m128h,
2203 b: __m128h,
2204) -> __m128h {
2205 unsafe {
2206 static_assert_rounding!(ROUNDING);
2207 vmulsh(a, b, src, k, ROUNDING)
2208 }
2209}
2210
2211/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2212/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2213/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2214/// Rounding is done according to the rounding parameter, which can be one of:
2215///
2216/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2217/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2218/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2219/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2220/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2221///
2222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2223#[inline]
2224#[target_feature(enable = "avx512fp16")]
2225#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2226#[rustc_legacy_const_generics(3)]
2227#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2228pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2229 static_assert_rounding!(ROUNDING);
2230 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2231}
2232
2233/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2234/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2235///
2236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2237#[inline]
2238#[target_feature(enable = "avx512fp16")]
2239#[cfg_attr(test, assert_instr(vmulsh))]
2240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2241pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2242 _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2243}
2244
2245/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2246/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2247/// writemask k (the element is copied from src when mask bit 0 is not set).
2248///
2249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2250#[inline]
2251#[target_feature(enable = "avx512fp16")]
2252#[cfg_attr(test, assert_instr(vmulsh))]
2253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2254pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2255 _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2256}
2257
2258/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2259/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2260/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2261///
2262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2263#[inline]
2264#[target_feature(enable = "avx512fp16")]
2265#[cfg_attr(test, assert_instr(vmulsh))]
2266#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2267pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2268 _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2269}
2270
2271/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2272///
2273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2274#[inline]
2275#[target_feature(enable = "avx512fp16,avx512vl")]
2276#[cfg_attr(test, assert_instr(vdivph))]
2277#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2278pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2279 unsafe { simd_div(lhs:a, rhs:b) }
2280}
2281
2282/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2283/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2284///
2285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2286#[inline]
2287#[target_feature(enable = "avx512fp16,avx512vl")]
2288#[cfg_attr(test, assert_instr(vdivph))]
2289#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2290pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2291 unsafe {
2292 let r: __m128h = _mm_div_ph(a, b);
2293 simd_select_bitmask(m:k, yes:r, no:src)
2294 }
2295}
2296
2297/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2298/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2299///
2300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2301#[inline]
2302#[target_feature(enable = "avx512fp16,avx512vl")]
2303#[cfg_attr(test, assert_instr(vdivph))]
2304#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2305pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2306 unsafe {
2307 let r: __m128h = _mm_div_ph(a, b);
2308 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2309 }
2310}
2311
2312/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2313///
2314/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2315#[inline]
2316#[target_feature(enable = "avx512fp16,avx512vl")]
2317#[cfg_attr(test, assert_instr(vdivph))]
2318#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2319pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2320 unsafe { simd_div(lhs:a, rhs:b) }
2321}
2322
2323/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2324/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2325///
2326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2327#[inline]
2328#[target_feature(enable = "avx512fp16,avx512vl")]
2329#[cfg_attr(test, assert_instr(vdivph))]
2330#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2331pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2332 unsafe {
2333 let r: __m256h = _mm256_div_ph(a, b);
2334 simd_select_bitmask(m:k, yes:r, no:src)
2335 }
2336}
2337
2338/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2339/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2340///
2341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2342#[inline]
2343#[target_feature(enable = "avx512fp16,avx512vl")]
2344#[cfg_attr(test, assert_instr(vdivph))]
2345#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2346pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2347 unsafe {
2348 let r: __m256h = _mm256_div_ph(a, b);
2349 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2350 }
2351}
2352
2353/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2354///
2355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2356#[inline]
2357#[target_feature(enable = "avx512fp16")]
2358#[cfg_attr(test, assert_instr(vdivph))]
2359#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2360pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2361 unsafe { simd_div(lhs:a, rhs:b) }
2362}
2363
2364/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2365/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2366///
2367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2368#[inline]
2369#[target_feature(enable = "avx512fp16")]
2370#[cfg_attr(test, assert_instr(vdivph))]
2371#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2372pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2373 unsafe {
2374 let r: __m512h = _mm512_div_ph(a, b);
2375 simd_select_bitmask(m:k, yes:r, no:src)
2376 }
2377}
2378
2379/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2380/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2381///
2382/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2383#[inline]
2384#[target_feature(enable = "avx512fp16")]
2385#[cfg_attr(test, assert_instr(vdivph))]
2386#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2387pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2388 unsafe {
2389 let r: __m512h = _mm512_div_ph(a, b);
2390 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2391 }
2392}
2393
2394/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2395/// Rounding is done according to the rounding parameter, which can be one of:
2396///
2397/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2398/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2399/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2400/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2401/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2402///
2403/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2404#[inline]
2405#[target_feature(enable = "avx512fp16")]
2406#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2407#[rustc_legacy_const_generics(2)]
2408#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2409pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2410 unsafe {
2411 static_assert_rounding!(ROUNDING);
2412 vdivph(a, b, ROUNDING)
2413 }
2414}
2415
2416/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2417/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2418/// Rounding is done according to the rounding parameter, which can be one of:
2419///
2420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2425///
2426/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2427#[inline]
2428#[target_feature(enable = "avx512fp16")]
2429#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2430#[rustc_legacy_const_generics(4)]
2431#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2432pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2433 src: __m512h,
2434 k: __mmask32,
2435 a: __m512h,
2436 b: __m512h,
2437) -> __m512h {
2438 unsafe {
2439 static_assert_rounding!(ROUNDING);
2440 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2441 simd_select_bitmask(m:k, yes:r, no:src)
2442 }
2443}
2444
2445/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2446/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2447/// Rounding is done according to the rounding parameter, which can be one of:
2448///
2449/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2450/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2451/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2452/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2453/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2454///
2455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2456#[inline]
2457#[target_feature(enable = "avx512fp16")]
2458#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2459#[rustc_legacy_const_generics(3)]
2460#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2461pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2462 k: __mmask32,
2463 a: __m512h,
2464 b: __m512h,
2465) -> __m512h {
2466 unsafe {
2467 static_assert_rounding!(ROUNDING);
2468 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2469 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2470 }
2471}
2472
2473/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2474/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2475/// Rounding is done according to the rounding parameter, which can be one of:
2476///
2477/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2478/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2479/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2480/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2481/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2482///
2483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2484#[inline]
2485#[target_feature(enable = "avx512fp16")]
2486#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2487#[rustc_legacy_const_generics(2)]
2488#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2489pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2490 static_assert_rounding!(ROUNDING);
2491 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2492}
2493
2494/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2495/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2496/// writemask k (the element is copied from src when mask bit 0 is not set).
2497/// Rounding is done according to the rounding parameter, which can be one of:
2498///
2499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2504///
2505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2506#[inline]
2507#[target_feature(enable = "avx512fp16")]
2508#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2509#[rustc_legacy_const_generics(4)]
2510#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2511pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2512 src: __m128h,
2513 k: __mmask8,
2514 a: __m128h,
2515 b: __m128h,
2516) -> __m128h {
2517 unsafe {
2518 static_assert_rounding!(ROUNDING);
2519 vdivsh(a, b, src, k, ROUNDING)
2520 }
2521}
2522
2523/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2524/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2525/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2526/// Rounding is done according to the rounding parameter, which can be one of:
2527///
2528/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2529/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2530/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2531/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2532/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2533///
2534/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2535#[inline]
2536#[target_feature(enable = "avx512fp16")]
2537#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2538#[rustc_legacy_const_generics(3)]
2539#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2540pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2541 static_assert_rounding!(ROUNDING);
2542 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2543}
2544
2545/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2546/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2547///
2548/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2549#[inline]
2550#[target_feature(enable = "avx512fp16")]
2551#[cfg_attr(test, assert_instr(vdivsh))]
2552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2553pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2554 _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b)
2555}
2556
2557/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2558/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2559/// writemask k (the element is copied from src when mask bit 0 is not set).
2560///
2561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2562#[inline]
2563#[target_feature(enable = "avx512fp16")]
2564#[cfg_attr(test, assert_instr(vdivsh))]
2565#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2566pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2567 _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2568}
2569
2570/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2571/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2572/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2573///
2574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2575#[inline]
2576#[target_feature(enable = "avx512fp16")]
2577#[cfg_attr(test, assert_instr(vdivsh))]
2578#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2579pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2580 _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b)
2581}
2582
2583/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2584/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2585/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2586///
2587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2588#[inline]
2589#[target_feature(enable = "avx512fp16,avx512vl")]
2590#[cfg_attr(test, assert_instr(vfmulcph))]
2591#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2592pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2593 _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
2594}
2595
2596/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2597/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2598/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2599///
2600/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2601#[inline]
2602#[target_feature(enable = "avx512fp16,avx512vl")]
2603#[cfg_attr(test, assert_instr(vfmulcph))]
2604#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2605pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2606 unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2607}
2608
2609/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2610/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2611/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2612///
2613/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2614#[inline]
2615#[target_feature(enable = "avx512fp16,avx512vl")]
2616#[cfg_attr(test, assert_instr(vfmulcph))]
2617#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2618pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2619 _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2620}
2621
2622/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2623/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2624/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2625///
2626/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2627#[inline]
2628#[target_feature(enable = "avx512fp16,avx512vl")]
2629#[cfg_attr(test, assert_instr(vfmulcph))]
2630#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2631pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2632 _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
2633}
2634
2635/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2636/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2637/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2638///
2639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2640#[inline]
2641#[target_feature(enable = "avx512fp16,avx512vl")]
2642#[cfg_attr(test, assert_instr(vfmulcph))]
2643#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2644pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2645 unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2646}
2647
2648/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2649/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2650/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2651///
2652/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2653#[inline]
2654#[target_feature(enable = "avx512fp16,avx512vl")]
2655#[cfg_attr(test, assert_instr(vfmulcph))]
2656#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2657pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2658 _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2659}
2660
2661/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2662/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2663/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2664///
2665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2666#[inline]
2667#[target_feature(enable = "avx512fp16")]
2668#[cfg_attr(test, assert_instr(vfmulcph))]
2669#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2670pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2671 _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
2672}
2673
2674/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2675/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2676/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2677///
2678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2679#[inline]
2680#[target_feature(enable = "avx512fp16")]
2681#[cfg_attr(test, assert_instr(vfmulcph))]
2682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2683pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2684 _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2685}
2686
2687/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2688/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2689/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2690///
2691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2692#[inline]
2693#[target_feature(enable = "avx512fp16")]
2694#[cfg_attr(test, assert_instr(vfmulcph))]
2695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2696pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2697 _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2698}
2699
2700/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2701/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2702/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2703///
2704/// Rounding is done according to the rounding parameter, which can be one of:
2705///
2706/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2707/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2708/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2709/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2710/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2711///
2712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2713#[inline]
2714#[target_feature(enable = "avx512fp16")]
2715#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2716#[rustc_legacy_const_generics(2)]
2717#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2718pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2719 static_assert_rounding!(ROUNDING);
2720 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
2721}
2722
2723/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2724/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2725/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2726///
2727/// Rounding is done according to the rounding parameter, which can be one of:
2728///
2729/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2730/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2731/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2732/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2733/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2734///
2735/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2736#[inline]
2737#[target_feature(enable = "avx512fp16")]
2738#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2739#[rustc_legacy_const_generics(4)]
2740#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2741pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2742 src: __m512h,
2743 k: __mmask16,
2744 a: __m512h,
2745 b: __m512h,
2746) -> __m512h {
2747 unsafe {
2748 static_assert_rounding!(ROUNDING);
2749 transmute(src:vfmulcph_512(
2750 a:transmute(a),
2751 b:transmute(b),
2752 src:transmute(src),
2753 k,
2754 ROUNDING,
2755 ))
2756 }
2757}
2758
2759/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2760/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2761/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2762///
2763/// Rounding is done according to the rounding parameter, which can be one of:
2764///
2765/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2766/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2767/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2768/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2770///
2771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2772#[inline]
2773#[target_feature(enable = "avx512fp16")]
2774#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2775#[rustc_legacy_const_generics(3)]
2776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2777pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2778 k: __mmask16,
2779 a: __m512h,
2780 b: __m512h,
2781) -> __m512h {
2782 static_assert_rounding!(ROUNDING);
2783 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2784}
2785
2786/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2787/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2788/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2789/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2790///
2791/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2792#[inline]
2793#[target_feature(enable = "avx512fp16")]
2794#[cfg_attr(test, assert_instr(vfmulcsh))]
2795#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2796pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2797 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2798}
2799
2800/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2801/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2802/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2803/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2804///
2805/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2806#[inline]
2807#[target_feature(enable = "avx512fp16")]
2808#[cfg_attr(test, assert_instr(vfmulcsh))]
2809#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2810pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2811 _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2812}
2813
2814/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2815/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2816/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2817/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2818///
2819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2820#[inline]
2821#[target_feature(enable = "avx512fp16")]
2822#[cfg_attr(test, assert_instr(vfmulcsh))]
2823#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2824pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2825 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
2826}
2827
2828/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2829/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2830/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2831/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2832///
2833/// Rounding is done according to the rounding parameter, which can be one of:
2834///
2835/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2836/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2837/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2838/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2839/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2840///
2841/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
2842#[inline]
2843#[target_feature(enable = "avx512fp16")]
2844#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2845#[rustc_legacy_const_generics(2)]
2846#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2847pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2848 static_assert_rounding!(ROUNDING);
2849 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2850}
2851
2852/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2853/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2854/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2855/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2856///
2857/// Rounding is done according to the rounding parameter, which can be one of:
2858///
2859/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2860/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2861/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2862/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2863/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2864///
2865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
2866#[inline]
2867#[target_feature(enable = "avx512fp16")]
2868#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2869#[rustc_legacy_const_generics(4)]
2870#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2871pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
2872 src: __m128h,
2873 k: __mmask8,
2874 a: __m128h,
2875 b: __m128h,
2876) -> __m128h {
2877 unsafe {
2878 static_assert_rounding!(ROUNDING);
2879 transmute(src:vfmulcsh(
2880 a:transmute(a),
2881 b:transmute(b),
2882 src:transmute(src),
2883 k,
2884 ROUNDING,
2885 ))
2886 }
2887}
2888
2889/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2890/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2891/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2892/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2893///
2894/// Rounding is done according to the rounding parameter, which can be one of:
2895///
2896/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2897/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2898/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2899/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2900/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2901///
2902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
2903#[inline]
2904#[target_feature(enable = "avx512fp16")]
2905#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2906#[rustc_legacy_const_generics(3)]
2907#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2908pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
2909 k: __mmask8,
2910 a: __m128h,
2911 b: __m128h,
2912) -> __m128h {
2913 static_assert_rounding!(ROUNDING);
2914 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2915}
2916
2917/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2918/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2919/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2920///
2921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
2922#[inline]
2923#[target_feature(enable = "avx512fp16,avx512vl")]
2924#[cfg_attr(test, assert_instr(vfmulcph))]
2925#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2926pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
2927 _mm_mul_pch(a, b)
2928}
2929
2930/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2931/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2932/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2933///
2934/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
2935#[inline]
2936#[target_feature(enable = "avx512fp16,avx512vl")]
2937#[cfg_attr(test, assert_instr(vfmulcph))]
2938#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2939pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2940 _mm_mask_mul_pch(src, k, a, b)
2941}
2942
2943/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2944/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2945/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2946///
2947/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
2948#[inline]
2949#[target_feature(enable = "avx512fp16,avx512vl")]
2950#[cfg_attr(test, assert_instr(vfmulcph))]
2951#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2952pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2953 _mm_maskz_mul_pch(k, a, b)
2954}
2955
2956/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2957/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2958/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2959///
2960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
2961#[inline]
2962#[target_feature(enable = "avx512fp16,avx512vl")]
2963#[cfg_attr(test, assert_instr(vfmulcph))]
2964#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2965pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
2966 _mm256_mul_pch(a, b)
2967}
2968
2969/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2970/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2971/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2972///
2973/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
2974#[inline]
2975#[target_feature(enable = "avx512fp16,avx512vl")]
2976#[cfg_attr(test, assert_instr(vfmulcph))]
2977#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2978pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2979 _mm256_mask_mul_pch(src, k, a, b)
2980}
2981
2982/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2983/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
2984/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2985///
2986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
2987#[inline]
2988#[target_feature(enable = "avx512fp16,avx512vl")]
2989#[cfg_attr(test, assert_instr(vfmulcph))]
2990#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2991pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2992 _mm256_maskz_mul_pch(k, a, b)
2993}
2994
2995/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
2996/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2997///
2998/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
2999#[inline]
3000#[target_feature(enable = "avx512fp16")]
3001#[cfg_attr(test, assert_instr(vfmulcph))]
3002#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3003pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
3004 _mm512_mul_pch(a, b)
3005}
3006
3007/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3008/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3009/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3010///
3011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3012#[inline]
3013#[target_feature(enable = "avx512fp16")]
3014#[cfg_attr(test, assert_instr(vfmulcph))]
3015#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3016pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3017 _mm512_mask_mul_pch(src, k, a, b)
3018}
3019
3020/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3021/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3022/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3023///
3024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3025#[inline]
3026#[target_feature(enable = "avx512fp16")]
3027#[cfg_attr(test, assert_instr(vfmulcph))]
3028#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3029pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3030 _mm512_maskz_mul_pch(k, a, b)
3031}
3032
3033/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3034/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3035/// Rounding is done according to the rounding parameter, which can be one of:
3036///
3037/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3038/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3039/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3040/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3041/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3042///
3043/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3044#[inline]
3045#[target_feature(enable = "avx512fp16")]
3046#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3047#[rustc_legacy_const_generics(2)]
3048#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3049pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3050 static_assert_rounding!(ROUNDING);
3051 _mm512_mul_round_pch::<ROUNDING>(a, b)
3052}
3053
3054/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3055/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3056/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3057/// Rounding is done according to the rounding parameter, which can be one of:
3058///
3059/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3060/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3061/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3062/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3063/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3064///
3065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3066#[inline]
3067#[target_feature(enable = "avx512fp16")]
3068#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3069#[rustc_legacy_const_generics(4)]
3070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3071pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3072 src: __m512h,
3073 k: __mmask16,
3074 a: __m512h,
3075 b: __m512h,
3076) -> __m512h {
3077 static_assert_rounding!(ROUNDING);
3078 _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3079}
3080
3081/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3082/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3083/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3084/// Rounding is done according to the rounding parameter, which can be one of:
3085///
3086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3091///
3092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3093#[inline]
3094#[target_feature(enable = "avx512fp16")]
3095#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3096#[rustc_legacy_const_generics(3)]
3097#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3098pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3099 k: __mmask16,
3100 a: __m512h,
3101 b: __m512h,
3102) -> __m512h {
3103 static_assert_rounding!(ROUNDING);
3104 _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3105}
3106
3107/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3108/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3109/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3110///
3111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3112#[inline]
3113#[target_feature(enable = "avx512fp16")]
3114#[cfg_attr(test, assert_instr(vfmulcsh))]
3115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3116pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3117 _mm_mul_sch(a, b)
3118}
3119
3120/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3121/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3122/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3123///
3124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3125#[inline]
3126#[target_feature(enable = "avx512fp16")]
3127#[cfg_attr(test, assert_instr(vfmulcsh))]
3128#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3129pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3130 _mm_mask_mul_sch(src, k, a, b)
3131}
3132
3133/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3134/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3135/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3136///
3137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3138#[inline]
3139#[target_feature(enable = "avx512fp16")]
3140#[cfg_attr(test, assert_instr(vfmulcsh))]
3141#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3142pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3143 _mm_maskz_mul_sch(k, a, b)
3144}
3145
3146/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3147/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3148///
3149/// Rounding is done according to the rounding parameter, which can be one of:
3150///
3151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3156///
3157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3158#[inline]
3159#[target_feature(enable = "avx512fp16")]
3160#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3161#[rustc_legacy_const_generics(2)]
3162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3163pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3164 static_assert_rounding!(ROUNDING);
3165 _mm_mul_round_sch::<ROUNDING>(a, b)
3166}
3167
3168/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3169/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3170/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3171///
3172/// Rounding is done according to the rounding parameter, which can be one of:
3173///
3174/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3175/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3176/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3177/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3178/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3179///
3180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3181#[inline]
3182#[target_feature(enable = "avx512fp16")]
3183#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3184#[rustc_legacy_const_generics(4)]
3185#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3186pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3187 src: __m128h,
3188 k: __mmask8,
3189 a: __m128h,
3190 b: __m128h,
3191) -> __m128h {
3192 static_assert_rounding!(ROUNDING);
3193 _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3194}
3195
3196/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3197/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3198/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3199///
3200/// Rounding is done according to the rounding parameter, which can be one of:
3201///
3202/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3203/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3204/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3205/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3206/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3207///
3208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3209#[inline]
3210#[target_feature(enable = "avx512fp16")]
3211#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3212#[rustc_legacy_const_generics(3)]
3213#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3214pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3215 k: __mmask8,
3216 a: __m128h,
3217 b: __m128h,
3218) -> __m128h {
3219 static_assert_rounding!(ROUNDING);
3220 _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3221}
3222
3223/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3224/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3225/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3226/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3227///
3228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3229#[inline]
3230#[target_feature(enable = "avx512fp16,avx512vl")]
3231#[cfg_attr(test, assert_instr(vfcmulcph))]
3232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3233pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3234 _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
3235}
3236
3237/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3238/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3239/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3240/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3241///
3242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3243#[inline]
3244#[target_feature(enable = "avx512fp16,avx512vl")]
3245#[cfg_attr(test, assert_instr(vfcmulcph))]
3246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3247pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3248 unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3249}
3250
3251/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3252/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3253/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3254/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3255///
3256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3257#[inline]
3258#[target_feature(enable = "avx512fp16,avx512vl")]
3259#[cfg_attr(test, assert_instr(vfcmulcph))]
3260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3261pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3262 _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3263}
3264
3265/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3266/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3267/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3268/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3269///
3270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3271#[inline]
3272#[target_feature(enable = "avx512fp16,avx512vl")]
3273#[cfg_attr(test, assert_instr(vfcmulcph))]
3274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3275pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3276 _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
3277}
3278
3279/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3280/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3281/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3282/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3283///
3284/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3285#[inline]
3286#[target_feature(enable = "avx512fp16,avx512vl")]
3287#[cfg_attr(test, assert_instr(vfcmulcph))]
3288#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3289pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3290 unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3291}
3292
3293/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3294/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3295/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3296/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3297///
3298/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3299#[inline]
3300#[target_feature(enable = "avx512fp16,avx512vl")]
3301#[cfg_attr(test, assert_instr(vfcmulcph))]
3302#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3303pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3304 _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3305}
3306
3307/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3308/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3309/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3310/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3311///
3312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3313#[inline]
3314#[target_feature(enable = "avx512fp16")]
3315#[cfg_attr(test, assert_instr(vfcmulcph))]
3316#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3317pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3318 _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
3319}
3320
3321/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3322/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3323/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3324/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3325///
3326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3327#[inline]
3328#[target_feature(enable = "avx512fp16")]
3329#[cfg_attr(test, assert_instr(vfcmulcph))]
3330#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3331pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3332 _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3333}
3334
3335/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3336/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3337/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3338/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3339///
3340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3341#[inline]
3342#[target_feature(enable = "avx512fp16")]
3343#[cfg_attr(test, assert_instr(vfcmulcph))]
3344#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3345pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3346 _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3347}
3348
3349/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3350/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3351/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3352/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3353///
3354/// Rounding is done according to the rounding parameter, which can be one of:
3355///
3356/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3357/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3358/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3359/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3360/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3361///
3362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3363#[inline]
3364#[target_feature(enable = "avx512fp16")]
3365#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3366#[rustc_legacy_const_generics(2)]
3367#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3368pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3369 static_assert_rounding!(ROUNDING);
3370 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
3371}
3372
3373/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3374/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3375/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3376/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3377///
3378/// Rounding is done according to the rounding parameter, which can be one of:
3379///
3380/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3381/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3382/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3383/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3384/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3385///
3386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3387#[inline]
3388#[target_feature(enable = "avx512fp16")]
3389#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3390#[rustc_legacy_const_generics(4)]
3391#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3392pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3393 src: __m512h,
3394 k: __mmask16,
3395 a: __m512h,
3396 b: __m512h,
3397) -> __m512h {
3398 unsafe {
3399 static_assert_rounding!(ROUNDING);
3400 transmute(src:vfcmulcph_512(
3401 a:transmute(a),
3402 b:transmute(b),
3403 src:transmute(src),
3404 k,
3405 ROUNDING,
3406 ))
3407 }
3408}
3409
3410/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3411/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3412/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3413/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3414///
3415/// Rounding is done according to the rounding parameter, which can be one of:
3416///
3417/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3418/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3419/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3420/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3421/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3422///
3423/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3424#[inline]
3425#[target_feature(enable = "avx512fp16")]
3426#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3427#[rustc_legacy_const_generics(3)]
3428#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3429pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3430 k: __mmask16,
3431 a: __m512h,
3432 b: __m512h,
3433) -> __m512h {
3434 static_assert_rounding!(ROUNDING);
3435 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3436}
3437
3438/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3439/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3440/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3441///
3442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3443#[inline]
3444#[target_feature(enable = "avx512fp16")]
3445#[cfg_attr(test, assert_instr(vfcmulcsh))]
3446#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3447pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3448 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3449}
3450
3451/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3452/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3453/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3454/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3455///
3456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3457#[inline]
3458#[target_feature(enable = "avx512fp16")]
3459#[cfg_attr(test, assert_instr(vfcmulcsh))]
3460#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3461pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3462 _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3463}
3464
3465/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3466/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3467/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3468/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3469///
3470/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3471#[inline]
3472#[target_feature(enable = "avx512fp16")]
3473#[cfg_attr(test, assert_instr(vfcmulcsh))]
3474#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3475pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3476 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
3477}
3478
3479/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3480/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3481/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3482///
3483/// Rounding is done according to the rounding parameter, which can be one of:
3484///
3485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3490///
3491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3492#[inline]
3493#[target_feature(enable = "avx512fp16")]
3494#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3495#[rustc_legacy_const_generics(2)]
3496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3497pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3498 static_assert_rounding!(ROUNDING);
3499 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3500}
3501
3502/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3503/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3504/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3505/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3506///
3507/// Rounding is done according to the rounding parameter, which can be one of:
3508///
3509/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3510/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3511/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3512/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3513/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3514///
3515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3516#[inline]
3517#[target_feature(enable = "avx512fp16")]
3518#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3519#[rustc_legacy_const_generics(4)]
3520#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3521pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3522 src: __m128h,
3523 k: __mmask8,
3524 a: __m128h,
3525 b: __m128h,
3526) -> __m128h {
3527 unsafe {
3528 static_assert_rounding!(ROUNDING);
3529 transmute(src:vfcmulcsh(
3530 a:transmute(a),
3531 b:transmute(b),
3532 src:transmute(src),
3533 k,
3534 ROUNDING,
3535 ))
3536 }
3537}
3538
3539/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3540/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3541/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3542/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3543///
3544/// Rounding is done according to the rounding parameter, which can be one of:
3545///
3546/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3547/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3548/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3549/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3551///
3552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3553#[inline]
3554#[target_feature(enable = "avx512fp16")]
3555#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3556#[rustc_legacy_const_generics(3)]
3557#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3558pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3559 k: __mmask8,
3560 a: __m128h,
3561 b: __m128h,
3562) -> __m128h {
3563 static_assert_rounding!(ROUNDING);
3564 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
3565}
3566
3567/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3568/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3569/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3570/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3571///
3572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3573#[inline]
3574#[target_feature(enable = "avx512fp16,avx512vl")]
3575#[cfg_attr(test, assert_instr(vfcmulcph))]
3576#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3577pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3578 _mm_cmul_pch(a, b)
3579}
3580
3581/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3582/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3583/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3584/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3585///
3586/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3587#[inline]
3588#[target_feature(enable = "avx512fp16,avx512vl")]
3589#[cfg_attr(test, assert_instr(vfcmulcph))]
3590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3591pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3592 _mm_mask_cmul_pch(src, k, a, b)
3593}
3594
3595/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3596/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3597/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3598/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3599///
3600/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3601#[inline]
3602#[target_feature(enable = "avx512fp16,avx512vl")]
3603#[cfg_attr(test, assert_instr(vfcmulcph))]
3604#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3605pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3606 _mm_maskz_cmul_pch(k, a, b)
3607}
3608
3609/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3610/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3611/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3612/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3613///
3614/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3615#[inline]
3616#[target_feature(enable = "avx512fp16,avx512vl")]
3617#[cfg_attr(test, assert_instr(vfcmulcph))]
3618#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3619pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3620 _mm256_cmul_pch(a, b)
3621}
3622
3623/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3624/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3625/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3626/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3627///
3628/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3629#[inline]
3630#[target_feature(enable = "avx512fp16,avx512vl")]
3631#[cfg_attr(test, assert_instr(vfcmulcph))]
3632#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3633pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3634 _mm256_mask_cmul_pch(src, k, a, b)
3635}
3636
3637/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3638/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3639/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3640/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3641///
3642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3643#[inline]
3644#[target_feature(enable = "avx512fp16,avx512vl")]
3645#[cfg_attr(test, assert_instr(vfcmulcph))]
3646#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3647pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3648 _mm256_maskz_cmul_pch(k, a, b)
3649}
3650
3651/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3652/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3653/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3654/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3655///
3656/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3657#[inline]
3658#[target_feature(enable = "avx512fp16")]
3659#[cfg_attr(test, assert_instr(vfcmulcph))]
3660#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3661pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3662 _mm512_cmul_pch(a, b)
3663}
3664
3665/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3666/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3667/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3668/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3669///
3670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3671#[inline]
3672#[target_feature(enable = "avx512fp16")]
3673#[cfg_attr(test, assert_instr(vfcmulcph))]
3674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3675pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3676 _mm512_mask_cmul_pch(src, k, a, b)
3677}
3678
3679/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3680/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3681/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3682/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3683///
3684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3685#[inline]
3686#[target_feature(enable = "avx512fp16")]
3687#[cfg_attr(test, assert_instr(vfcmulcph))]
3688#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3689pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3690 _mm512_maskz_cmul_pch(k, a, b)
3691}
3692
3693/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3694/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3695/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3696///
3697/// Rounding is done according to the rounding parameter, which can be one of:
3698///
3699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3704///
3705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3706#[inline]
3707#[target_feature(enable = "avx512fp16")]
3708#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3709#[rustc_legacy_const_generics(2)]
3710#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3711pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3712 static_assert_rounding!(ROUNDING);
3713 _mm512_cmul_round_pch::<ROUNDING>(a, b)
3714}
3715
3716/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3717/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3718/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3719/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3720///
3721/// Rounding is done according to the rounding parameter, which can be one of:
3722///
3723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3728///
3729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3730#[inline]
3731#[target_feature(enable = "avx512fp16")]
3732#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3733#[rustc_legacy_const_generics(4)]
3734#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3735pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3736 src: __m512h,
3737 k: __mmask16,
3738 a: __m512h,
3739 b: __m512h,
3740) -> __m512h {
3741 static_assert_rounding!(ROUNDING);
3742 _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3743}
3744
3745/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3746/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3747/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3748/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3749///
3750/// Rounding is done according to the rounding parameter, which can be one of:
3751///
3752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3757///
3758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3759#[inline]
3760#[target_feature(enable = "avx512fp16")]
3761#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3762#[rustc_legacy_const_generics(3)]
3763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3764pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3765 k: __mmask16,
3766 a: __m512h,
3767 b: __m512h,
3768) -> __m512h {
3769 static_assert_rounding!(ROUNDING);
3770 _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3771}
3772
3773/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3774/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3775/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3776/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3777///
3778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3779#[inline]
3780#[target_feature(enable = "avx512fp16")]
3781#[cfg_attr(test, assert_instr(vfcmulcsh))]
3782#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3783pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3784 _mm_cmul_sch(a, b)
3785}
3786
3787/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3788/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3789/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3790/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3791///
3792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3793#[inline]
3794#[target_feature(enable = "avx512fp16")]
3795#[cfg_attr(test, assert_instr(vfcmulcsh))]
3796#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3797pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3798 _mm_mask_cmul_sch(src, k, a, b)
3799}
3800
3801/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3802/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3803/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3804/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3805///
3806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3807#[inline]
3808#[target_feature(enable = "avx512fp16")]
3809#[cfg_attr(test, assert_instr(vfcmulcsh))]
3810#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3811pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3812 _mm_maskz_cmul_sch(k, a, b)
3813}
3814
3815/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3816/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3817/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3818///
3819/// Rounding is done according to the rounding parameter, which can be one of:
3820///
3821/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3822/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3823/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3824/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3825/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3826///
3827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
3828#[inline]
3829#[target_feature(enable = "avx512fp16")]
3830#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3831#[rustc_legacy_const_generics(2)]
3832#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3833pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3834 static_assert_rounding!(ROUNDING);
3835 _mm_cmul_round_sch::<ROUNDING>(a, b)
3836}
3837
3838/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3839/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3840/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3841/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3842///
3843/// Rounding is done according to the rounding parameter, which can be one of:
3844///
3845/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3846/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3847/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3848/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3849/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3850///
3851/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
3852#[inline]
3853#[target_feature(enable = "avx512fp16")]
3854#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3855#[rustc_legacy_const_generics(4)]
3856#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3857pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
3858 src: __m128h,
3859 k: __mmask8,
3860 a: __m128h,
3861 b: __m128h,
3862) -> __m128h {
3863 static_assert_rounding!(ROUNDING);
3864 _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
3865}
3866
3867/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3868/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3869/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3870/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3871///
3872/// Rounding is done according to the rounding parameter, which can be one of:
3873///
3874/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3875/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3876/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3877/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3878/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3879///
3880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
3881#[inline]
3882#[target_feature(enable = "avx512fp16")]
3883#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3884#[rustc_legacy_const_generics(3)]
3885#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3886pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
3887 k: __mmask8,
3888 a: __m128h,
3889 b: __m128h,
3890) -> __m128h {
3891 static_assert_rounding!(ROUNDING);
3892 _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
3893}
3894
3895/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3896/// the results in dst.
3897///
3898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
3899#[inline]
3900#[target_feature(enable = "avx512fp16,avx512vl")]
3901#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3902pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
3903 unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
3904}
3905
3906/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3907/// the result in dst.
3908///
3909/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
3910#[inline]
3911#[target_feature(enable = "avx512fp16,avx512vl")]
3912#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3913pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
3914 unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
3915}
3916
3917/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3918/// the result in dst.
3919///
3920/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
3921#[inline]
3922#[target_feature(enable = "avx512fp16")]
3923#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3924pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
3925 unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
3926}
3927
3928/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
3929/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
3930/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate
3931/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3932///
3933/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
3934#[inline]
3935#[target_feature(enable = "avx512fp16,avx512vl")]
3936#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3937pub fn _mm_conj_pch(a: __m128h) -> __m128h {
3938 unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
3939}
3940
3941/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3942/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3943/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
3944/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3945///
3946/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
3947#[inline]
3948#[target_feature(enable = "avx512fp16,avx512vl")]
3949#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3950pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
3951 unsafe {
3952 let r: __m128 = transmute(src:_mm_conj_pch(a));
3953 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3954 }
3955}
3956
3957/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3958/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
3959/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3960/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3961///
3962/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
3963#[inline]
3964#[target_feature(enable = "avx512fp16,avx512vl")]
3965#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3966pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
3967 _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
3968}
3969
3970/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
3971/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3972/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3973///
3974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
3975#[inline]
3976#[target_feature(enable = "avx512fp16,avx512vl")]
3977#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3978pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
3979 unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
3980}
3981
3982/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
3983/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
3984/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3985/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3986///
3987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
3988#[inline]
3989#[target_feature(enable = "avx512fp16,avx512vl")]
3990#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3991pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
3992 unsafe {
3993 let r: __m256 = transmute(src:_mm256_conj_pch(a));
3994 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
3995 }
3996}
3997
3998/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
3999/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4000/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4001/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4002///
4003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
4004#[inline]
4005#[target_feature(enable = "avx512fp16,avx512vl")]
4006#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4007pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4008 _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4009}
4010
4011/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4012/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4013/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4014///
4015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4016#[inline]
4017#[target_feature(enable = "avx512fp16")]
4018#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4019pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
4020 unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4021}
4022
4023/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4024/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4025/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4026/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4027///
4028/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4029#[inline]
4030#[target_feature(enable = "avx512fp16")]
4031#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4032pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4033 unsafe {
4034 let r: __m512 = transmute(src:_mm512_conj_pch(a));
4035 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4036 }
4037}
4038
4039/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4040/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4041/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4042/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4043///
4044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4045#[inline]
4046#[target_feature(enable = "avx512fp16")]
4047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4048pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4049 _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4050}
4051
4052/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4053/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4054/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4055///
4056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4057#[inline]
4058#[target_feature(enable = "avx512fp16,avx512vl")]
4059#[cfg_attr(test, assert_instr(vfmaddcph))]
4060#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4061pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4062 _mm_mask3_fmadd_pch(a, b, c, k:0xff)
4063}
4064
4065/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4066/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4067/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4068/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4069///
4070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4071#[inline]
4072#[target_feature(enable = "avx512fp16,avx512vl")]
4073#[cfg_attr(test, assert_instr(vfmaddcph))]
4074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4075pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4076 unsafe {
4077 let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4078 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4079 }
4080}
4081
4082/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4083/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4084/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4085/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4086///
4087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4088#[inline]
4089#[target_feature(enable = "avx512fp16,avx512vl")]
4090#[cfg_attr(test, assert_instr(vfmaddcph))]
4091#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4092pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4093 unsafe {
4094 transmute(src:vfmaddcph_mask3_128(
4095 a:transmute(a),
4096 b:transmute(b),
4097 c:transmute(src:c),
4098 k,
4099 ))
4100 }
4101}
4102
4103/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4104/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4105/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4106/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4107///
4108/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4109#[inline]
4110#[target_feature(enable = "avx512fp16,avx512vl")]
4111#[cfg_attr(test, assert_instr(vfmaddcph))]
4112#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4113pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4114 unsafe {
4115 transmute(src:vfmaddcph_maskz_128(
4116 a:transmute(a),
4117 b:transmute(b),
4118 c:transmute(src:c),
4119 k,
4120 ))
4121 }
4122}
4123
4124/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4125/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4126/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4127///
4128/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4129#[inline]
4130#[target_feature(enable = "avx512fp16,avx512vl")]
4131#[cfg_attr(test, assert_instr(vfmaddcph))]
4132#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4133pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4134 _mm256_mask3_fmadd_pch(a, b, c, k:0xff)
4135}
4136
4137/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4138/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4139/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4140/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4141///
4142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4143#[inline]
4144#[target_feature(enable = "avx512fp16,avx512vl")]
4145#[cfg_attr(test, assert_instr(vfmaddcph))]
4146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4147pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4148 unsafe {
4149 let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4150 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4151 }
4152}
4153
4154/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4155/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4156/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4157/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4158///
4159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4160#[inline]
4161#[target_feature(enable = "avx512fp16,avx512vl")]
4162#[cfg_attr(test, assert_instr(vfmaddcph))]
4163#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4164pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4165 unsafe {
4166 transmute(src:vfmaddcph_mask3_256(
4167 a:transmute(a),
4168 b:transmute(b),
4169 c:transmute(src:c),
4170 k,
4171 ))
4172 }
4173}
4174
4175/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4176/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4177/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4178/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4179///
4180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4181#[inline]
4182#[target_feature(enable = "avx512fp16,avx512vl")]
4183#[cfg_attr(test, assert_instr(vfmaddcph))]
4184#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4185pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4186 unsafe {
4187 transmute(src:vfmaddcph_maskz_256(
4188 a:transmute(a),
4189 b:transmute(b),
4190 c:transmute(src:c),
4191 k,
4192 ))
4193 }
4194}
4195
4196/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4197/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4198/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4199///
4200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4201#[inline]
4202#[target_feature(enable = "avx512fp16")]
4203#[cfg_attr(test, assert_instr(vfmaddcph))]
4204#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4205pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4206 _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4207}
4208
4209/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4210/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4211/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4212/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4213///
4214/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4215#[inline]
4216#[target_feature(enable = "avx512fp16")]
4217#[cfg_attr(test, assert_instr(vfmaddcph))]
4218#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4219pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4220 _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4221}
4222
4223/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4224/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4225/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4226/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4227///
4228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4229#[inline]
4230#[target_feature(enable = "avx512fp16")]
4231#[cfg_attr(test, assert_instr(vfmaddcph))]
4232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4233pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4234 _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4235}
4236
4237/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4238/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4239/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4240/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4241///
4242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4243#[inline]
4244#[target_feature(enable = "avx512fp16")]
4245#[cfg_attr(test, assert_instr(vfmaddcph))]
4246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4247pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4248 _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4249}
4250
4251/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4252/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4253/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4254///
4255/// Rounding is done according to the rounding parameter, which can be one of:
4256///
4257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4262///
4263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4264#[inline]
4265#[target_feature(enable = "avx512fp16")]
4266#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4267#[rustc_legacy_const_generics(3)]
4268#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4269pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4270 static_assert_rounding!(ROUNDING);
4271 _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4272}
4273
4274/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4275/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4276/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4277/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4278///
4279/// Rounding is done according to the rounding parameter, which can be one of:
4280///
4281/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4282/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4283/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4284/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4285/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4286///
4287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4288#[inline]
4289#[target_feature(enable = "avx512fp16")]
4290#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4291#[rustc_legacy_const_generics(4)]
4292#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4293pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4294 a: __m512h,
4295 k: __mmask16,
4296 b: __m512h,
4297 c: __m512h,
4298) -> __m512h {
4299 unsafe {
4300 static_assert_rounding!(ROUNDING);
4301 let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4302 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4303 }
4304}
4305
4306/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4307/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4308/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4309/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4310///
4311/// Rounding is done according to the rounding parameter, which can be one of:
4312///
4313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4318///
4319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4320#[inline]
4321#[target_feature(enable = "avx512fp16")]
4322#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4323#[rustc_legacy_const_generics(4)]
4324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4325pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4326 a: __m512h,
4327 b: __m512h,
4328 c: __m512h,
4329 k: __mmask16,
4330) -> __m512h {
4331 unsafe {
4332 static_assert_rounding!(ROUNDING);
4333 transmute(src:vfmaddcph_mask3_512(
4334 a:transmute(a),
4335 b:transmute(b),
4336 c:transmute(src:c),
4337 k,
4338 ROUNDING,
4339 ))
4340 }
4341}
4342
4343/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4344/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4345/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4346/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4347///
4348/// Rounding is done according to the rounding parameter, which can be one of:
4349///
4350/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4351/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4352/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4353/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4354/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4355///
4356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4357#[inline]
4358#[target_feature(enable = "avx512fp16")]
4359#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4360#[rustc_legacy_const_generics(4)]
4361#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4362pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4363 k: __mmask16,
4364 a: __m512h,
4365 b: __m512h,
4366 c: __m512h,
4367) -> __m512h {
4368 unsafe {
4369 static_assert_rounding!(ROUNDING);
4370 transmute(src:vfmaddcph_maskz_512(
4371 a:transmute(a),
4372 b:transmute(b),
4373 c:transmute(src:c),
4374 k,
4375 ROUNDING,
4376 ))
4377 }
4378}
4379
4380/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4381/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4382/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4383/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4384///
4385/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4386#[inline]
4387#[target_feature(enable = "avx512fp16")]
4388#[cfg_attr(test, assert_instr(vfmaddcsh))]
4389#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4390pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4391 _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4392}
4393
4394/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4395/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4396/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4397/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4398/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4399///
4400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4401#[inline]
4402#[target_feature(enable = "avx512fp16")]
4403#[cfg_attr(test, assert_instr(vfmaddcsh))]
4404#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4405pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4406 _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4407}
4408
4409/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4410/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4411/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4412/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4413/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4414///
4415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4416#[inline]
4417#[target_feature(enable = "avx512fp16")]
4418#[cfg_attr(test, assert_instr(vfmaddcsh))]
4419#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4420pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4421 _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4422}
4423
4424/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4425/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4426/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4427/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4428/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4429///
4430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4431#[inline]
4432#[target_feature(enable = "avx512fp16")]
4433#[cfg_attr(test, assert_instr(vfmaddcsh))]
4434#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4435pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4436 _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4437}
4438
4439/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4440/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4441/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4442///
4443/// Rounding is done according to the rounding parameter, which can be one of:
4444///
4445/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4446/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4447/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4448/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4449/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4450///
4451/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4452#[inline]
4453#[target_feature(enable = "avx512fp16")]
4454#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4455#[rustc_legacy_const_generics(3)]
4456#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4457pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4458 unsafe {
4459 static_assert_rounding!(ROUNDING);
4460 transmute(src:vfmaddcsh_mask(
4461 a:transmute(a),
4462 b:transmute(b),
4463 c:transmute(c),
4464 k:0xff,
4465 ROUNDING,
4466 ))
4467 }
4468}
4469
4470/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4471/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4472/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4473/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4474/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4475///
4476/// Rounding is done according to the rounding parameter, which can be one of:
4477///
4478/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4479/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4480/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4481/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4482/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4483///
4484/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4485#[inline]
4486#[target_feature(enable = "avx512fp16")]
4487#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4488#[rustc_legacy_const_generics(4)]
4489#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4490pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4491 a: __m128h,
4492 k: __mmask8,
4493 b: __m128h,
4494 c: __m128h,
4495) -> __m128h {
4496 unsafe {
4497 static_assert_rounding!(ROUNDING);
4498 let a: __m128 = transmute(src:a);
4499 let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4500 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4501 }
4502}
4503
4504/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4505/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4506/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4507/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4508/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4509///
4510/// Rounding is done according to the rounding parameter, which can be one of:
4511///
4512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4517///
4518/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4519#[inline]
4520#[target_feature(enable = "avx512fp16")]
4521#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4522#[rustc_legacy_const_generics(4)]
4523#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4524pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4525 a: __m128h,
4526 b: __m128h,
4527 c: __m128h,
4528 k: __mmask8,
4529) -> __m128h {
4530 unsafe {
4531 static_assert_rounding!(ROUNDING);
4532 let c: __m128 = transmute(src:c);
4533 let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4534 transmute(src:_mm_move_ss(a:c, b:r))
4535 }
4536}
4537
4538/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4539/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4540/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4541/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4542/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4543///
4544/// Rounding is done according to the rounding parameter, which can be one of:
4545///
4546/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4547/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4548/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4549/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4551///
4552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4553#[inline]
4554#[target_feature(enable = "avx512fp16")]
4555#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4556#[rustc_legacy_const_generics(4)]
4557#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4558pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4559 k: __mmask8,
4560 a: __m128h,
4561 b: __m128h,
4562 c: __m128h,
4563) -> __m128h {
4564 unsafe {
4565 static_assert_rounding!(ROUNDING);
4566 transmute(src:vfmaddcsh_maskz(
4567 a:transmute(a),
4568 b:transmute(b),
4569 c:transmute(src:c),
4570 k,
4571 ROUNDING,
4572 ))
4573 }
4574}
4575
4576/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4577/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4578/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4579/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4580///
4581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4582#[inline]
4583#[target_feature(enable = "avx512fp16,avx512vl")]
4584#[cfg_attr(test, assert_instr(vfcmaddcph))]
4585#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4586pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4587 _mm_mask3_fcmadd_pch(a, b, c, k:0xff)
4588}
4589
4590/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4591/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4592/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4593/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4594/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4595///
4596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4597#[inline]
4598#[target_feature(enable = "avx512fp16,avx512vl")]
4599#[cfg_attr(test, assert_instr(vfcmaddcph))]
4600#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4601pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4602 unsafe {
4603 let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4604 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4605 }
4606}
4607
4608/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4609/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4610/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4611/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4612/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4613///
4614/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4615#[inline]
4616#[target_feature(enable = "avx512fp16,avx512vl")]
4617#[cfg_attr(test, assert_instr(vfcmaddcph))]
4618#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4619pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4620 unsafe {
4621 transmute(src:vfcmaddcph_mask3_128(
4622 a:transmute(a),
4623 b:transmute(b),
4624 c:transmute(src:c),
4625 k,
4626 ))
4627 }
4628}
4629
4630/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4631/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4632/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4633/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4634/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4635///
4636/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4637#[inline]
4638#[target_feature(enable = "avx512fp16,avx512vl")]
4639#[cfg_attr(test, assert_instr(vfcmaddcph))]
4640#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4641pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4642 unsafe {
4643 transmute(src:vfcmaddcph_maskz_128(
4644 a:transmute(a),
4645 b:transmute(b),
4646 c:transmute(src:c),
4647 k,
4648 ))
4649 }
4650}
4651
4652/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4653/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4654/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4655/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4656///
4657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4658#[inline]
4659#[target_feature(enable = "avx512fp16,avx512vl")]
4660#[cfg_attr(test, assert_instr(vfcmaddcph))]
4661#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4662pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4663 _mm256_mask3_fcmadd_pch(a, b, c, k:0xff)
4664}
4665
4666/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4667/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4668/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4669/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4670/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4671///
4672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4673#[inline]
4674#[target_feature(enable = "avx512fp16,avx512vl")]
4675#[cfg_attr(test, assert_instr(vfcmaddcph))]
4676#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4677pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4678 unsafe {
4679 let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4680 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4681 }
4682}
4683
4684/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4685/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4686/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4687/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4688/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4689///
4690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4691#[inline]
4692#[target_feature(enable = "avx512fp16,avx512vl")]
4693#[cfg_attr(test, assert_instr(vfcmaddcph))]
4694#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4695pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4696 unsafe {
4697 transmute(src:vfcmaddcph_mask3_256(
4698 a:transmute(a),
4699 b:transmute(b),
4700 c:transmute(src:c),
4701 k,
4702 ))
4703 }
4704}
4705
4706/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4707/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4708/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4709/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4710/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4711///
4712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4713#[inline]
4714#[target_feature(enable = "avx512fp16,avx512vl")]
4715#[cfg_attr(test, assert_instr(vfcmaddcph))]
4716#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4717pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4718 unsafe {
4719 transmute(src:vfcmaddcph_maskz_256(
4720 a:transmute(a),
4721 b:transmute(b),
4722 c:transmute(src:c),
4723 k,
4724 ))
4725 }
4726}
4727
4728/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4729/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4730/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4731/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4732///
4733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4734#[inline]
4735#[target_feature(enable = "avx512fp16")]
4736#[cfg_attr(test, assert_instr(vfcmaddcph))]
4737#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4738pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4739 _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4740}
4741
4742/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4743/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4744/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4745/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4746/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4747///
4748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4749#[inline]
4750#[target_feature(enable = "avx512fp16")]
4751#[cfg_attr(test, assert_instr(vfcmaddcph))]
4752#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4753pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4754 _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4755}
4756
4757/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4758/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4759/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4760/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4761/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4762///
4763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4764#[inline]
4765#[target_feature(enable = "avx512fp16")]
4766#[cfg_attr(test, assert_instr(vfcmaddcph))]
4767#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4768pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4769 _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4770}
4771
4772/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4773/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4774/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4775/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4776/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4777///
4778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4779#[inline]
4780#[target_feature(enable = "avx512fp16")]
4781#[cfg_attr(test, assert_instr(vfcmaddcph))]
4782#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4783pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4784 _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4785}
4786
4787/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4788/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4789/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4790/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4791///
4792/// Rounding is done according to the rounding parameter, which can be one of:
4793///
4794/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4795/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4796/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4797/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4798/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4799///
4800/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4801#[inline]
4802#[target_feature(enable = "avx512fp16")]
4803#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4804#[rustc_legacy_const_generics(3)]
4805#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4806pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4807 static_assert_rounding!(ROUNDING);
4808 _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4809}
4810
4811/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4812/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4813/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4814/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4815/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4816///
4817/// Rounding is done according to the rounding parameter, which can be one of:
4818///
4819/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4820/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4821/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4822/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4823/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4824///
4825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
4826#[inline]
4827#[target_feature(enable = "avx512fp16")]
4828#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4829#[rustc_legacy_const_generics(4)]
4830#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4831pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
4832 a: __m512h,
4833 k: __mmask16,
4834 b: __m512h,
4835 c: __m512h,
4836) -> __m512h {
4837 unsafe {
4838 static_assert_rounding!(ROUNDING);
4839 let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4840 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4841 }
4842}
4843
4844/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4845/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
4846/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4847/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4848/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4849///
4850/// Rounding is done according to the rounding parameter, which can be one of:
4851///
4852/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4853/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4854/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4855/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4856/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4857///
4858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
4859#[inline]
4860#[target_feature(enable = "avx512fp16")]
4861#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4862#[rustc_legacy_const_generics(4)]
4863#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4864pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
4865 a: __m512h,
4866 b: __m512h,
4867 c: __m512h,
4868 k: __mmask16,
4869) -> __m512h {
4870 unsafe {
4871 static_assert_rounding!(ROUNDING);
4872 transmute(src:vfcmaddcph_mask3_512(
4873 a:transmute(a),
4874 b:transmute(b),
4875 c:transmute(src:c),
4876 k,
4877 ROUNDING,
4878 ))
4879 }
4880}
4881
4882/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4883/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
4884/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4885/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4886/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4887///
4888/// Rounding is done according to the rounding parameter, which can be one of:
4889///
4890/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4891/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4892/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4893/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4895///
4896/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
4897#[inline]
4898#[target_feature(enable = "avx512fp16")]
4899#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4900#[rustc_legacy_const_generics(4)]
4901#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4902pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
4903 k: __mmask16,
4904 a: __m512h,
4905 b: __m512h,
4906 c: __m512h,
4907) -> __m512h {
4908 unsafe {
4909 static_assert_rounding!(ROUNDING);
4910 transmute(src:vfcmaddcph_maskz_512(
4911 a:transmute(a),
4912 b:transmute(b),
4913 c:transmute(src:c),
4914 k,
4915 ROUNDING,
4916 ))
4917 }
4918}
4919
4920/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4921/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4922/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4923/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4924/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4925///
4926/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
4927#[inline]
4928#[target_feature(enable = "avx512fp16")]
4929#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4930#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4931pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4932 _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4933}
4934
4935/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4936/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4937/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
4938/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4939/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4940/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4941///
4942/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
4943#[inline]
4944#[target_feature(enable = "avx512fp16")]
4945#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4946#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4947pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4948 _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4949}
4950
4951/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4952/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4953/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
4954/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4955/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4956/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4957///
4958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
4959#[inline]
4960#[target_feature(enable = "avx512fp16")]
4961#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4962#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4963pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4964 _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4965}
4966
4967/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4968/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
4969/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
4970/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
4971/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4972/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4973///
4974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
4975#[inline]
4976#[target_feature(enable = "avx512fp16")]
4977#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4978#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4979pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4980 _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4981}
4982
4983/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4984/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4985/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4986/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4987/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4988///
4989/// Rounding is done according to the rounding parameter, which can be one of:
4990///
4991/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4992/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4993/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4994/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4995/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4996///
4997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
4998#[inline]
4999#[target_feature(enable = "avx512fp16")]
5000#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5001#[rustc_legacy_const_generics(3)]
5002#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5003pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5004 unsafe {
5005 static_assert_rounding!(ROUNDING);
5006 transmute(src:vfcmaddcsh_mask(
5007 a:transmute(a),
5008 b:transmute(b),
5009 c:transmute(c),
5010 k:0xff,
5011 ROUNDING,
5012 ))
5013 }
5014}
5015
5016/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5017/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5018/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5019/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5020/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5021/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5022///
5023/// Rounding is done according to the rounding parameter, which can be one of:
5024///
5025/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5026/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5027/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5028/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5029/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5030///
5031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5032#[inline]
5033#[target_feature(enable = "avx512fp16")]
5034#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5035#[rustc_legacy_const_generics(4)]
5036#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5037pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5038 a: __m128h,
5039 k: __mmask8,
5040 b: __m128h,
5041 c: __m128h,
5042) -> __m128h {
5043 unsafe {
5044 static_assert_rounding!(ROUNDING);
5045 let a: __m128 = transmute(src:a);
5046 let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5047 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5048 }
5049}
5050
5051/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5052/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5053/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5054/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5055/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5056/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5057///
5058/// Rounding is done according to the rounding parameter, which can be one of:
5059///
5060/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5061/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5062/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5063/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5064/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5065///
5066/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5067#[inline]
5068#[target_feature(enable = "avx512fp16")]
5069#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5070#[rustc_legacy_const_generics(4)]
5071#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5072pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5073 a: __m128h,
5074 b: __m128h,
5075 c: __m128h,
5076 k: __mmask8,
5077) -> __m128h {
5078 unsafe {
5079 static_assert_rounding!(ROUNDING);
5080 let c: __m128 = transmute(src:c);
5081 let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5082 transmute(src:_mm_move_ss(a:c, b:r))
5083 }
5084}
5085
5086/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5087/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5088/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5089/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5090/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5091/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5092///
5093/// Rounding is done according to the rounding parameter, which can be one of:
5094///
5095/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5096/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5097/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5098/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5099/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5100///
5101/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5102#[inline]
5103#[target_feature(enable = "avx512fp16")]
5104#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5105#[rustc_legacy_const_generics(4)]
5106#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5107pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5108 k: __mmask8,
5109 a: __m128h,
5110 b: __m128h,
5111 c: __m128h,
5112) -> __m128h {
5113 unsafe {
5114 static_assert_rounding!(ROUNDING);
5115 transmute(src:vfcmaddcsh_maskz(
5116 a:transmute(a),
5117 b:transmute(b),
5118 c:transmute(src:c),
5119 k,
5120 ROUNDING,
5121 ))
5122 }
5123}
5124
5125/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5126/// result to packed elements in c, and store the results in dst.
5127///
5128/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5129#[inline]
5130#[target_feature(enable = "avx512fp16,avx512vl")]
5131#[cfg_attr(test, assert_instr(vfmadd))]
5132#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5133pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5134 unsafe { simd_fma(x:a, y:b, z:c) }
5135}
5136
5137/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5138/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5139/// from a when the corresponding mask bit is not set).
5140///
5141/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5142#[inline]
5143#[target_feature(enable = "avx512fp16,avx512vl")]
5144#[cfg_attr(test, assert_instr(vfmadd))]
5145#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5146pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5147 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5148}
5149
5150/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5151/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5152/// from c when the corresponding mask bit is not set).
5153///
5154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5155#[inline]
5156#[target_feature(enable = "avx512fp16,avx512vl")]
5157#[cfg_attr(test, assert_instr(vfmadd))]
5158#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5159pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5160 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5161}
5162
5163/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5164/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5165/// out when the corresponding mask bit is not set).
5166///
5167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5168#[inline]
5169#[target_feature(enable = "avx512fp16,avx512vl")]
5170#[cfg_attr(test, assert_instr(vfmadd))]
5171#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5172pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5173 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5174}
5175
5176/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5177/// result to packed elements in c, and store the results in dst.
5178///
5179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5180#[inline]
5181#[target_feature(enable = "avx512fp16,avx512vl")]
5182#[cfg_attr(test, assert_instr(vfmadd))]
5183#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5184pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5185 unsafe { simd_fma(x:a, y:b, z:c) }
5186}
5187
5188/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5189/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5190/// from a when the corresponding mask bit is not set).
5191///
5192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5193#[inline]
5194#[target_feature(enable = "avx512fp16,avx512vl")]
5195#[cfg_attr(test, assert_instr(vfmadd))]
5196#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5197pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5198 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5199}
5200
5201/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5202/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5203/// from c when the corresponding mask bit is not set).
5204///
5205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5206#[inline]
5207#[target_feature(enable = "avx512fp16,avx512vl")]
5208#[cfg_attr(test, assert_instr(vfmadd))]
5209#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5210pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5211 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5212}
5213
5214/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5215/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5216/// out when the corresponding mask bit is not set).
5217///
5218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5219#[inline]
5220#[target_feature(enable = "avx512fp16,avx512vl")]
5221#[cfg_attr(test, assert_instr(vfmadd))]
5222#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5223pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5224 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5225}
5226
5227/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5228/// result to packed elements in c, and store the results in dst.
5229///
5230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5231#[inline]
5232#[target_feature(enable = "avx512fp16")]
5233#[cfg_attr(test, assert_instr(vfmadd))]
5234#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5235pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5236 unsafe { simd_fma(x:a, y:b, z:c) }
5237}
5238
5239/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5240/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5241/// from a when the corresponding mask bit is not set).
5242///
5243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5244#[inline]
5245#[target_feature(enable = "avx512fp16")]
5246#[cfg_attr(test, assert_instr(vfmadd))]
5247#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5248pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5249 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5250}
5251
5252/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5253/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5254/// from c when the corresponding mask bit is not set).
5255///
5256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5257#[inline]
5258#[target_feature(enable = "avx512fp16")]
5259#[cfg_attr(test, assert_instr(vfmadd))]
5260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5261pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5262 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5263}
5264
5265/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5266/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5267/// out when the corresponding mask bit is not set).
5268///
5269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5270#[inline]
5271#[target_feature(enable = "avx512fp16")]
5272#[cfg_attr(test, assert_instr(vfmadd))]
5273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5274pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5275 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5276}
5277
5278/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5279/// result to packed elements in c, and store the results in dst.
5280///
5281/// Rounding is done according to the rounding parameter, which can be one of:
5282///
5283/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5284/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5285/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5286/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5288///
5289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5290#[inline]
5291#[target_feature(enable = "avx512fp16")]
5292#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5293#[rustc_legacy_const_generics(3)]
5294#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5295pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5296 unsafe {
5297 static_assert_rounding!(ROUNDING);
5298 vfmaddph_512(a, b, c, ROUNDING)
5299 }
5300}
5301
5302/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5303/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5304/// from a when the corresponding mask bit is not set).
5305///
5306/// Rounding is done according to the rounding parameter, which can be one of:
5307///
5308/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5309/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5310/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5311/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5312/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5313///
5314/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5315#[inline]
5316#[target_feature(enable = "avx512fp16")]
5317#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5318#[rustc_legacy_const_generics(4)]
5319#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5320pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5321 a: __m512h,
5322 k: __mmask32,
5323 b: __m512h,
5324 c: __m512h,
5325) -> __m512h {
5326 unsafe {
5327 static_assert_rounding!(ROUNDING);
5328 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5329 }
5330}
5331
5332/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5333/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5334/// from c when the corresponding mask bit is not set).
5335///
5336/// Rounding is done according to the rounding parameter, which can be one of:
5337///
5338/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5339/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5340/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5341/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5342/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5343///
5344/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5345#[inline]
5346#[target_feature(enable = "avx512fp16")]
5347#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5348#[rustc_legacy_const_generics(4)]
5349#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5350pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5351 a: __m512h,
5352 b: __m512h,
5353 c: __m512h,
5354 k: __mmask32,
5355) -> __m512h {
5356 unsafe {
5357 static_assert_rounding!(ROUNDING);
5358 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5359 }
5360}
5361
5362/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5363/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5364/// out when the corresponding mask bit is not set).
5365///
5366/// Rounding is done according to the rounding parameter, which can be one of:
5367///
5368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5373///
5374/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5375#[inline]
5376#[target_feature(enable = "avx512fp16")]
5377#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5378#[rustc_legacy_const_generics(4)]
5379#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5380pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5381 k: __mmask32,
5382 a: __m512h,
5383 b: __m512h,
5384 c: __m512h,
5385) -> __m512h {
5386 unsafe {
5387 static_assert_rounding!(ROUNDING);
5388 simd_select_bitmask(
5389 m:k,
5390 yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5391 no:_mm512_setzero_ph(),
5392 )
5393 }
5394}
5395
5396/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5397/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5398/// 7 packed elements from a to the upper elements of dst.
5399///
5400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5401#[inline]
5402#[target_feature(enable = "avx512fp16")]
5403#[cfg_attr(test, assert_instr(vfmadd))]
5404#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5405pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5406 unsafe {
5407 let extracta: f16 = simd_extract!(a, 0);
5408 let extractb: f16 = simd_extract!(b, 0);
5409 let extractc: f16 = simd_extract!(c, 0);
5410 let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5411 simd_insert!(a, 0, r)
5412 }
5413}
5414
5415/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5416/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5417/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5418/// upper elements of dst.
5419///
5420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5421#[inline]
5422#[target_feature(enable = "avx512fp16")]
5423#[cfg_attr(test, assert_instr(vfmadd))]
5424#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5425pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5426 unsafe {
5427 let mut fmadd: f16 = simd_extract!(a, 0);
5428 if k & 1 != 0 {
5429 let extractb: f16 = simd_extract!(b, 0);
5430 let extractc: f16 = simd_extract!(c, 0);
5431 fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5432 }
5433 simd_insert!(a, 0, fmadd)
5434 }
5435}
5436
5437/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5438/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5439/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5440/// upper elements of dst.
5441///
5442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5443#[inline]
5444#[target_feature(enable = "avx512fp16")]
5445#[cfg_attr(test, assert_instr(vfmadd))]
5446#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5447pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5448 unsafe {
5449 let mut fmadd: f16 = simd_extract!(c, 0);
5450 if k & 1 != 0 {
5451 let extracta: f16 = simd_extract!(a, 0);
5452 let extractb: f16 = simd_extract!(b, 0);
5453 fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5454 }
5455 simd_insert!(c, 0, fmadd)
5456 }
5457}
5458
5459/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5460/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5461/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5462/// upper elements of dst.
5463///
5464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5465#[inline]
5466#[target_feature(enable = "avx512fp16")]
5467#[cfg_attr(test, assert_instr(vfmadd))]
5468#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5469pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5470 unsafe {
5471 let mut fmadd: f16 = 0.0;
5472 if k & 1 != 0 {
5473 let extracta: f16 = simd_extract!(a, 0);
5474 let extractb: f16 = simd_extract!(b, 0);
5475 let extractc: f16 = simd_extract!(c, 0);
5476 fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5477 }
5478 simd_insert!(a, 0, fmadd)
5479 }
5480}
5481
5482/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5483/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5484/// 7 packed elements from a to the upper elements of dst.
5485///
5486/// Rounding is done according to the rounding parameter, which can be one of:
5487///
5488/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5489/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5490/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5491/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5492/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5493///
5494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5495#[inline]
5496#[target_feature(enable = "avx512fp16")]
5497#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5498#[rustc_legacy_const_generics(3)]
5499#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5500pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5501 unsafe {
5502 static_assert_rounding!(ROUNDING);
5503 let extracta: f16 = simd_extract!(a, 0);
5504 let extractb: f16 = simd_extract!(b, 0);
5505 let extractc: f16 = simd_extract!(c, 0);
5506 let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5507 simd_insert!(a, 0, r)
5508 }
5509}
5510
5511/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5512/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5513/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5514/// upper elements of dst.
5515///
5516/// Rounding is done according to the rounding parameter, which can be one of:
5517///
5518/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5519/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5520/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5521/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5522/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5523///
5524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5525#[inline]
5526#[target_feature(enable = "avx512fp16")]
5527#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5528#[rustc_legacy_const_generics(4)]
5529#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5530pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5531 a: __m128h,
5532 k: __mmask8,
5533 b: __m128h,
5534 c: __m128h,
5535) -> __m128h {
5536 unsafe {
5537 static_assert_rounding!(ROUNDING);
5538 let mut fmadd: f16 = simd_extract!(a, 0);
5539 if k & 1 != 0 {
5540 let extractb: f16 = simd_extract!(b, 0);
5541 let extractc: f16 = simd_extract!(c, 0);
5542 fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5543 }
5544 simd_insert!(a, 0, fmadd)
5545 }
5546}
5547
5548/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5549/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5550/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5551/// upper elements of dst.
5552///
5553/// Rounding is done according to the rounding parameter, which can be one of:
5554///
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5556/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5557/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5558/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5562#[inline]
5563#[target_feature(enable = "avx512fp16")]
5564#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5565#[rustc_legacy_const_generics(4)]
5566#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5567pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5568 a: __m128h,
5569 b: __m128h,
5570 c: __m128h,
5571 k: __mmask8,
5572) -> __m128h {
5573 unsafe {
5574 static_assert_rounding!(ROUNDING);
5575 let mut fmadd: f16 = simd_extract!(c, 0);
5576 if k & 1 != 0 {
5577 let extracta: f16 = simd_extract!(a, 0);
5578 let extractb: f16 = simd_extract!(b, 0);
5579 fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5580 }
5581 simd_insert!(c, 0, fmadd)
5582 }
5583}
5584
5585/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5586/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5587/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5588/// upper elements of dst.
5589///
5590/// Rounding is done according to the rounding parameter, which can be one of:
5591///
5592/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5593/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5594/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5595/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5596/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5597///
5598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5599#[inline]
5600#[target_feature(enable = "avx512fp16")]
5601#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5602#[rustc_legacy_const_generics(4)]
5603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5604pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5605 k: __mmask8,
5606 a: __m128h,
5607 b: __m128h,
5608 c: __m128h,
5609) -> __m128h {
5610 unsafe {
5611 static_assert_rounding!(ROUNDING);
5612 let mut fmadd: f16 = 0.0;
5613 if k & 1 != 0 {
5614 let extracta: f16 = simd_extract!(a, 0);
5615 let extractb: f16 = simd_extract!(b, 0);
5616 let extractc: f16 = simd_extract!(c, 0);
5617 fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5618 }
5619 simd_insert!(a, 0, fmadd)
5620 }
5621}
5622
5623/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5624/// in c from the intermediate result, and store the results in dst.
5625/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5626///
5627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5628#[inline]
5629#[target_feature(enable = "avx512fp16,avx512vl")]
5630#[cfg_attr(test, assert_instr(vfmsub))]
5631#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5632pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5633 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5634}
5635
5636/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5637/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5638/// from a when the corresponding mask bit is not set).
5639///
5640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5641#[inline]
5642#[target_feature(enable = "avx512fp16,avx512vl")]
5643#[cfg_attr(test, assert_instr(vfmsub))]
5644#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5645pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5646 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5647}
5648
5649/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5650/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5651/// from c when the corresponding mask bit is not set).
5652///
5653/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5654#[inline]
5655#[target_feature(enable = "avx512fp16,avx512vl")]
5656#[cfg_attr(test, assert_instr(vfmsub))]
5657#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5658pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5659 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5660}
5661
5662/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5663/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5664/// out when the corresponding mask bit is not set).
5665///
5666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5667#[inline]
5668#[target_feature(enable = "avx512fp16,avx512vl")]
5669#[cfg_attr(test, assert_instr(vfmsub))]
5670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5671pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5672 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5673}
5674
5675/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5676/// in c from the intermediate result, and store the results in dst.
5677///
5678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5679#[inline]
5680#[target_feature(enable = "avx512fp16,avx512vl")]
5681#[cfg_attr(test, assert_instr(vfmsub))]
5682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5683pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5684 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5685}
5686
5687/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5688/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5689/// from a when the corresponding mask bit is not set).
5690///
5691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5692#[inline]
5693#[target_feature(enable = "avx512fp16,avx512vl")]
5694#[cfg_attr(test, assert_instr(vfmsub))]
5695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5696pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5697 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5698}
5699
5700/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5701/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5702/// from c when the corresponding mask bit is not set).
5703///
5704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5705#[inline]
5706#[target_feature(enable = "avx512fp16,avx512vl")]
5707#[cfg_attr(test, assert_instr(vfmsub))]
5708#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5709pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5710 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5711}
5712
5713/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5714/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5715/// out when the corresponding mask bit is not set).
5716///
5717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5718#[inline]
5719#[target_feature(enable = "avx512fp16,avx512vl")]
5720#[cfg_attr(test, assert_instr(vfmsub))]
5721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5722pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5723 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5724}
5725
5726/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5727/// in c from the intermediate result, and store the results in dst.
5728///
5729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5730#[inline]
5731#[target_feature(enable = "avx512fp16")]
5732#[cfg_attr(test, assert_instr(vfmsub))]
5733#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5734pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5735 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5736}
5737
5738/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5739/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5740/// from a when the corresponding mask bit is not set).
5741///
5742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5743#[inline]
5744#[target_feature(enable = "avx512fp16")]
5745#[cfg_attr(test, assert_instr(vfmsub))]
5746#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5747pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5748 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5749}
5750
5751/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5752/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5753/// from c when the corresponding mask bit is not set).
5754///
5755/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5756#[inline]
5757#[target_feature(enable = "avx512fp16")]
5758#[cfg_attr(test, assert_instr(vfmsub))]
5759#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5760pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5761 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5762}
5763
5764/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5765/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5766/// out when the corresponding mask bit is not set).
5767///
5768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5769#[inline]
5770#[target_feature(enable = "avx512fp16")]
5771#[cfg_attr(test, assert_instr(vfmsub))]
5772#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5773pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5774 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5775}
5776
5777/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5778/// in c from the intermediate result, and store the results in dst.
5779///
5780/// Rounding is done according to the rounding parameter, which can be one of:
5781///
5782/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5783/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5784/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5785/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5786/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5787///
5788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
5789#[inline]
5790#[target_feature(enable = "avx512fp16")]
5791#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5792#[rustc_legacy_const_generics(3)]
5793#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5794pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5795 unsafe {
5796 static_assert_rounding!(ROUNDING);
5797 vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
5798 }
5799}
5800
5801/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5802/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5803/// from a when the corresponding mask bit is not set).
5804///
5805/// Rounding is done according to the rounding parameter, which can be one of:
5806///
5807/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5808/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5809/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5810/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5811/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812///
5813/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
5814#[inline]
5815#[target_feature(enable = "avx512fp16")]
5816#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5817#[rustc_legacy_const_generics(4)]
5818#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5819pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
5820 a: __m512h,
5821 k: __mmask32,
5822 b: __m512h,
5823 c: __m512h,
5824) -> __m512h {
5825 unsafe {
5826 static_assert_rounding!(ROUNDING);
5827 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
5828 }
5829}
5830
5831/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5832/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5833/// from c when the corresponding mask bit is not set).
5834///
5835/// Rounding is done according to the rounding parameter, which can be one of:
5836///
5837/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5838/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5839/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5840/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5841/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5842///
5843/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
5844#[inline]
5845#[target_feature(enable = "avx512fp16")]
5846#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5847#[rustc_legacy_const_generics(4)]
5848#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5849pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
5850 a: __m512h,
5851 b: __m512h,
5852 c: __m512h,
5853 k: __mmask32,
5854) -> __m512h {
5855 unsafe {
5856 static_assert_rounding!(ROUNDING);
5857 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
5858 }
5859}
5860
5861/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5862/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5863/// out when the corresponding mask bit is not set).
5864///
5865/// Rounding is done according to the rounding parameter, which can be one of:
5866///
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5868/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5869/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5870/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
5874#[inline]
5875#[target_feature(enable = "avx512fp16")]
5876#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5877#[rustc_legacy_const_generics(4)]
5878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5879pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
5880 k: __mmask32,
5881 a: __m512h,
5882 b: __m512h,
5883 c: __m512h,
5884) -> __m512h {
5885 unsafe {
5886 static_assert_rounding!(ROUNDING);
5887 simd_select_bitmask(
5888 m:k,
5889 yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
5890 no:_mm512_setzero_ph(),
5891 )
5892 }
5893}
5894
5895/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5896/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5897/// 7 packed elements from a to the upper elements of dst.
5898///
5899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
5900#[inline]
5901#[target_feature(enable = "avx512fp16")]
5902#[cfg_attr(test, assert_instr(vfmsub))]
5903#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5904pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5905 unsafe {
5906 let extracta: f16 = simd_extract!(a, 0);
5907 let extractb: f16 = simd_extract!(b, 0);
5908 let extractc: f16 = simd_extract!(c, 0);
5909 let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
5910 simd_insert!(a, 0, r)
5911 }
5912}
5913
5914/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5915/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5916/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5917/// upper elements of dst.
5918///
5919/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
5920#[inline]
5921#[target_feature(enable = "avx512fp16")]
5922#[cfg_attr(test, assert_instr(vfmsub))]
5923#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5924pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5925 unsafe {
5926 let mut fmsub: f16 = simd_extract!(a, 0);
5927 if k & 1 != 0 {
5928 let extractb: f16 = simd_extract!(b, 0);
5929 let extractc: f16 = simd_extract!(c, 0);
5930 fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
5931 }
5932 simd_insert!(a, 0, fmsub)
5933 }
5934}
5935
5936/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5937/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5938/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5939/// upper elements of dst.
5940///
5941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
5942#[inline]
5943#[target_feature(enable = "avx512fp16")]
5944#[cfg_attr(test, assert_instr(vfmsub))]
5945#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5946pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5947 unsafe {
5948 let mut fmsub: f16 = simd_extract!(c, 0);
5949 if k & 1 != 0 {
5950 let extracta: f16 = simd_extract!(a, 0);
5951 let extractb: f16 = simd_extract!(b, 0);
5952 fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
5953 }
5954 simd_insert!(c, 0, fmsub)
5955 }
5956}
5957
5958/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5959/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
5960/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5961/// upper elements of dst.
5962///
5963/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
5964#[inline]
5965#[target_feature(enable = "avx512fp16")]
5966#[cfg_attr(test, assert_instr(vfmsub))]
5967#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5968pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5969 unsafe {
5970 let mut fmsub: f16 = 0.0;
5971 if k & 1 != 0 {
5972 let extracta: f16 = simd_extract!(a, 0);
5973 let extractb: f16 = simd_extract!(b, 0);
5974 let extractc: f16 = simd_extract!(c, 0);
5975 fmsub = fmaf16(a:extracta, b:extractb, -extractc);
5976 }
5977 simd_insert!(a, 0, fmsub)
5978 }
5979}
5980
5981/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5982/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5983/// 7 packed elements from a to the upper elements of dst.
5984///
5985/// Rounding is done according to the rounding parameter, which can be one of:
5986///
5987/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5988/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5989/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5990/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5991/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5992///
5993/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
5994#[inline]
5995#[target_feature(enable = "avx512fp16")]
5996#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5997#[rustc_legacy_const_generics(3)]
5998#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5999pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6000 unsafe {
6001 static_assert_rounding!(ROUNDING);
6002 let extracta: f16 = simd_extract!(a, 0);
6003 let extractb: f16 = simd_extract!(b, 0);
6004 let extractc: f16 = simd_extract!(c, 0);
6005 let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6006 simd_insert!(a, 0, r)
6007 }
6008}
6009
6010/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6011/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6012/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6013/// upper elements of dst.
6014///
6015/// Rounding is done according to the rounding parameter, which can be one of:
6016///
6017/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6018/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6019/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6020/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6022///
6023/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6024#[inline]
6025#[target_feature(enable = "avx512fp16")]
6026#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6027#[rustc_legacy_const_generics(4)]
6028#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6029pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6030 a: __m128h,
6031 k: __mmask8,
6032 b: __m128h,
6033 c: __m128h,
6034) -> __m128h {
6035 unsafe {
6036 static_assert_rounding!(ROUNDING);
6037 let mut fmsub: f16 = simd_extract!(a, 0);
6038 if k & 1 != 0 {
6039 let extractb: f16 = simd_extract!(b, 0);
6040 let extractc: f16 = simd_extract!(c, 0);
6041 fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6042 }
6043 simd_insert!(a, 0, fmsub)
6044 }
6045}
6046
6047/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6048/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6049/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6050/// upper elements of dst.
6051///
6052/// Rounding is done according to the rounding parameter, which can be one of:
6053///
6054/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6055/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6056/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6057/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6058/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6059///
6060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6061#[inline]
6062#[target_feature(enable = "avx512fp16")]
6063#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6064#[rustc_legacy_const_generics(4)]
6065#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6066pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6067 a: __m128h,
6068 b: __m128h,
6069 c: __m128h,
6070 k: __mmask8,
6071) -> __m128h {
6072 unsafe {
6073 static_assert_rounding!(ROUNDING);
6074 let mut fmsub: f16 = simd_extract!(c, 0);
6075 if k & 1 != 0 {
6076 let extracta: f16 = simd_extract!(a, 0);
6077 let extractb: f16 = simd_extract!(b, 0);
6078 fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6079 }
6080 simd_insert!(c, 0, fmsub)
6081 }
6082}
6083
6084/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6085/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6086/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6087/// upper elements of dst.
6088///
6089/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6090#[inline]
6091#[target_feature(enable = "avx512fp16")]
6092#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6093#[rustc_legacy_const_generics(4)]
6094#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6095pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6096 k: __mmask8,
6097 a: __m128h,
6098 b: __m128h,
6099 c: __m128h,
6100) -> __m128h {
6101 unsafe {
6102 static_assert_rounding!(ROUNDING);
6103 let mut fmsub: f16 = 0.0;
6104 if k & 1 != 0 {
6105 let extracta: f16 = simd_extract!(a, 0);
6106 let extractb: f16 = simd_extract!(b, 0);
6107 let extractc: f16 = simd_extract!(c, 0);
6108 fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6109 }
6110 simd_insert!(a, 0, fmsub)
6111 }
6112}
6113
6114/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6115/// result from packed elements in c, and store the results in dst.
6116///
6117/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6118#[inline]
6119#[target_feature(enable = "avx512fp16,avx512vl")]
6120#[cfg_attr(test, assert_instr(vfnmadd))]
6121#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6122pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6123 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6124}
6125
6126/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6127/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6128/// from a when the corresponding mask bit is not set).
6129///
6130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6131#[inline]
6132#[target_feature(enable = "avx512fp16,avx512vl")]
6133#[cfg_attr(test, assert_instr(vfnmadd))]
6134#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6135pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6136 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6137}
6138
6139/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6140/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6141/// from c when the corresponding mask bit is not set).
6142///
6143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6144#[inline]
6145#[target_feature(enable = "avx512fp16,avx512vl")]
6146#[cfg_attr(test, assert_instr(vfnmadd))]
6147#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6148pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6149 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6150}
6151
6152/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6153/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6154/// out when the corresponding mask bit is not set).
6155///
6156/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6157#[inline]
6158#[target_feature(enable = "avx512fp16,avx512vl")]
6159#[cfg_attr(test, assert_instr(vfnmadd))]
6160#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6161pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6162 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6163}
6164
6165/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6166/// result from packed elements in c, and store the results in dst.
6167///
6168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6169#[inline]
6170#[target_feature(enable = "avx512fp16,avx512vl")]
6171#[cfg_attr(test, assert_instr(vfnmadd))]
6172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6173pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6174 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6175}
6176
6177/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6178/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6179/// from a when the corresponding mask bit is not set).
6180///
6181/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6182#[inline]
6183#[target_feature(enable = "avx512fp16,avx512vl")]
6184#[cfg_attr(test, assert_instr(vfnmadd))]
6185#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6186pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6187 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6188}
6189
6190/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6191/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6192/// from c when the corresponding mask bit is not set).
6193///
6194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6195#[inline]
6196#[target_feature(enable = "avx512fp16,avx512vl")]
6197#[cfg_attr(test, assert_instr(vfnmadd))]
6198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6199pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6200 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6201}
6202
6203/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6204/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6205/// out when the corresponding mask bit is not set).
6206///
6207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6208#[inline]
6209#[target_feature(enable = "avx512fp16,avx512vl")]
6210#[cfg_attr(test, assert_instr(vfnmadd))]
6211#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6212pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6213 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6214}
6215
6216/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6217/// result from packed elements in c, and store the results in dst.
6218///
6219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6220#[inline]
6221#[target_feature(enable = "avx512fp16")]
6222#[cfg_attr(test, assert_instr(vfnmadd))]
6223#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6224pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6225 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6226}
6227
6228/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6229/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6230/// from a when the corresponding mask bit is not set).
6231///
6232/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6233#[inline]
6234#[target_feature(enable = "avx512fp16")]
6235#[cfg_attr(test, assert_instr(vfnmadd))]
6236#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6237pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6238 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6239}
6240
6241/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6242/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6243/// from c when the corresponding mask bit is not set).
6244///
6245/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6246#[inline]
6247#[target_feature(enable = "avx512fp16")]
6248#[cfg_attr(test, assert_instr(vfnmadd))]
6249#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6250pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6251 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6252}
6253
6254/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6255/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6256/// out when the corresponding mask bit is not set).
6257///
6258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6259#[inline]
6260#[target_feature(enable = "avx512fp16")]
6261#[cfg_attr(test, assert_instr(vfnmadd))]
6262#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6263pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6264 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6265}
6266
6267/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6268/// result from packed elements in c, and store the results in dst.
6269///
6270/// Rounding is done according to the rounding parameter, which can be one of:
6271///
6272/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6273/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6274/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6275/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6276/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6277///
6278/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6279#[inline]
6280#[target_feature(enable = "avx512fp16")]
6281#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6282#[rustc_legacy_const_generics(3)]
6283#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6284pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6285 unsafe {
6286 static_assert_rounding!(ROUNDING);
6287 vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6288 }
6289}
6290
6291/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6292/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6293/// from a when the corresponding mask bit is not set).
6294///
6295/// Rounding is done according to the rounding parameter, which can be one of:
6296///
6297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6302///
6303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6304#[inline]
6305#[target_feature(enable = "avx512fp16")]
6306#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6307#[rustc_legacy_const_generics(4)]
6308#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6309pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6310 a: __m512h,
6311 k: __mmask32,
6312 b: __m512h,
6313 c: __m512h,
6314) -> __m512h {
6315 unsafe {
6316 static_assert_rounding!(ROUNDING);
6317 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6318 }
6319}
6320
6321/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6322/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6323/// from c when the corresponding mask bit is not set).
6324///
6325/// Rounding is done according to the rounding parameter, which can be one of:
6326///
6327/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6328/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6329/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6330/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6331/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6332///
6333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6334#[inline]
6335#[target_feature(enable = "avx512fp16")]
6336#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6337#[rustc_legacy_const_generics(4)]
6338#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6339pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6340 a: __m512h,
6341 b: __m512h,
6342 c: __m512h,
6343 k: __mmask32,
6344) -> __m512h {
6345 unsafe {
6346 static_assert_rounding!(ROUNDING);
6347 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6348 }
6349}
6350
6351/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6352/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6353/// out when the corresponding mask bit is not set).
6354///
6355/// Rounding is done according to the rounding parameter, which can be one of:
6356///
6357/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6358/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6359/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6360/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6361/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6362///
6363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6364#[inline]
6365#[target_feature(enable = "avx512fp16")]
6366#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6367#[rustc_legacy_const_generics(4)]
6368#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6369pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6370 k: __mmask32,
6371 a: __m512h,
6372 b: __m512h,
6373 c: __m512h,
6374) -> __m512h {
6375 unsafe {
6376 static_assert_rounding!(ROUNDING);
6377 simd_select_bitmask(
6378 m:k,
6379 yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6380 no:_mm512_setzero_ph(),
6381 )
6382 }
6383}
6384
6385/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6386/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6387/// elements from a to the upper elements of dst.
6388///
6389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6390#[inline]
6391#[target_feature(enable = "avx512fp16")]
6392#[cfg_attr(test, assert_instr(vfnmadd))]
6393#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6394pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6395 unsafe {
6396 let extracta: f16 = simd_extract!(a, 0);
6397 let extractb: f16 = simd_extract!(b, 0);
6398 let extractc: f16 = simd_extract!(c, 0);
6399 let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6400 simd_insert!(a, 0, r)
6401 }
6402}
6403
6404/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6405/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6406/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6407/// elements of dst.
6408///
6409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6410#[inline]
6411#[target_feature(enable = "avx512fp16")]
6412#[cfg_attr(test, assert_instr(vfnmadd))]
6413#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6414pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6415 unsafe {
6416 let mut fnmadd: f16 = simd_extract!(a, 0);
6417 if k & 1 != 0 {
6418 let extractb: f16 = simd_extract!(b, 0);
6419 let extractc: f16 = simd_extract!(c, 0);
6420 fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6421 }
6422 simd_insert!(a, 0, fnmadd)
6423 }
6424}
6425
6426/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6427/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6428/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6429/// elements of dst.
6430///
6431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6432#[inline]
6433#[target_feature(enable = "avx512fp16")]
6434#[cfg_attr(test, assert_instr(vfnmadd))]
6435#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6436pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6437 unsafe {
6438 let mut fnmadd: f16 = simd_extract!(c, 0);
6439 if k & 1 != 0 {
6440 let extracta: f16 = simd_extract!(a, 0);
6441 let extractb: f16 = simd_extract!(b, 0);
6442 fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6443 }
6444 simd_insert!(c, 0, fnmadd)
6445 }
6446}
6447
6448/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6449/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6450/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6451/// elements of dst.
6452///
6453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6454#[inline]
6455#[target_feature(enable = "avx512fp16")]
6456#[cfg_attr(test, assert_instr(vfnmadd))]
6457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6458pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6459 unsafe {
6460 let mut fnmadd: f16 = 0.0;
6461 if k & 1 != 0 {
6462 let extracta: f16 = simd_extract!(a, 0);
6463 let extractb: f16 = simd_extract!(b, 0);
6464 let extractc: f16 = simd_extract!(c, 0);
6465 fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6466 }
6467 simd_insert!(a, 0, fnmadd)
6468 }
6469}
6470
6471/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6472/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6473/// elements from a to the upper elements of dst.
6474///
6475/// Rounding is done according to the rounding parameter, which can be one of:
6476///
6477/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6478/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6479/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6480/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6481/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6482///
6483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6484#[inline]
6485#[target_feature(enable = "avx512fp16")]
6486#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6487#[rustc_legacy_const_generics(3)]
6488#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6489pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6490 unsafe {
6491 static_assert_rounding!(ROUNDING);
6492 let extracta: f16 = simd_extract!(a, 0);
6493 let extractb: f16 = simd_extract!(b, 0);
6494 let extractc: f16 = simd_extract!(c, 0);
6495 let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6496 simd_insert!(a, 0, r)
6497 }
6498}
6499
6500/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6501/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6502/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6503/// elements of dst.
6504///
6505/// Rounding is done according to the rounding parameter, which can be one of:
6506///
6507/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6508/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6509/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6510/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6512///
6513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6514#[inline]
6515#[target_feature(enable = "avx512fp16")]
6516#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6517#[rustc_legacy_const_generics(4)]
6518#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6519pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6520 a: __m128h,
6521 k: __mmask8,
6522 b: __m128h,
6523 c: __m128h,
6524) -> __m128h {
6525 unsafe {
6526 static_assert_rounding!(ROUNDING);
6527 let mut fnmadd: f16 = simd_extract!(a, 0);
6528 if k & 1 != 0 {
6529 let extractb: f16 = simd_extract!(b, 0);
6530 let extractc: f16 = simd_extract!(c, 0);
6531 fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6532 }
6533 simd_insert!(a, 0, fnmadd)
6534 }
6535}
6536
6537/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6538/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6539/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6540/// elements of dst.
6541///
6542/// Rounding is done according to the rounding parameter, which can be one of:
6543///
6544/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6545/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6546/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6547/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6548/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6549///
6550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6551#[inline]
6552#[target_feature(enable = "avx512fp16")]
6553#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6554#[rustc_legacy_const_generics(4)]
6555#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6556pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6557 a: __m128h,
6558 b: __m128h,
6559 c: __m128h,
6560 k: __mmask8,
6561) -> __m128h {
6562 unsafe {
6563 static_assert_rounding!(ROUNDING);
6564 let mut fnmadd: f16 = simd_extract!(c, 0);
6565 if k & 1 != 0 {
6566 let extracta: f16 = simd_extract!(a, 0);
6567 let extractb: f16 = simd_extract!(b, 0);
6568 fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6569 }
6570 simd_insert!(c, 0, fnmadd)
6571 }
6572}
6573
6574/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6575/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6576/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6577/// elements of dst.
6578///
6579/// Rounding is done according to the rounding parameter, which can be one of:
6580///
6581/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6582/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6583/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6584/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6585/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6586///
6587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6588#[inline]
6589#[target_feature(enable = "avx512fp16")]
6590#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6591#[rustc_legacy_const_generics(4)]
6592#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6593pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6594 k: __mmask8,
6595 a: __m128h,
6596 b: __m128h,
6597 c: __m128h,
6598) -> __m128h {
6599 unsafe {
6600 static_assert_rounding!(ROUNDING);
6601 let mut fnmadd: f16 = 0.0;
6602 if k & 1 != 0 {
6603 let extracta: f16 = simd_extract!(a, 0);
6604 let extractb: f16 = simd_extract!(b, 0);
6605 let extractc: f16 = simd_extract!(c, 0);
6606 fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6607 }
6608 simd_insert!(a, 0, fnmadd)
6609 }
6610}
6611
6612/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6613/// in c from the negated intermediate result, and store the results in dst.
6614///
6615/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6616#[inline]
6617#[target_feature(enable = "avx512fp16,avx512vl")]
6618#[cfg_attr(test, assert_instr(vfnmsub))]
6619#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6620pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6621 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6622}
6623
6624/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6625/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6626/// copied from a when the corresponding mask bit is not set).
6627///
6628/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6629#[inline]
6630#[target_feature(enable = "avx512fp16,avx512vl")]
6631#[cfg_attr(test, assert_instr(vfnmsub))]
6632#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6633pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6634 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6635}
6636
6637/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6638/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6639/// copied from c when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6642#[inline]
6643#[target_feature(enable = "avx512fp16,avx512vl")]
6644#[cfg_attr(test, assert_instr(vfnmsub))]
6645#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6646pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6647 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6648}
6649
6650/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6651/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6652/// zeroed out when the corresponding mask bit is not set).
6653///
6654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6655#[inline]
6656#[target_feature(enable = "avx512fp16,avx512vl")]
6657#[cfg_attr(test, assert_instr(vfnmsub))]
6658#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6659pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6660 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6661}
6662
6663/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6664/// in c from the negated intermediate result, and store the results in dst.
6665///
6666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6667#[inline]
6668#[target_feature(enable = "avx512fp16,avx512vl")]
6669#[cfg_attr(test, assert_instr(vfnmsub))]
6670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6671pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6672 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6673}
6674
6675/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6676/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6677/// copied from a when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6680#[inline]
6681#[target_feature(enable = "avx512fp16,avx512vl")]
6682#[cfg_attr(test, assert_instr(vfnmsub))]
6683#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6684pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6685 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6686}
6687
6688/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6689/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6690/// copied from c when the corresponding mask bit is not set).
6691///
6692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6693#[inline]
6694#[target_feature(enable = "avx512fp16,avx512vl")]
6695#[cfg_attr(test, assert_instr(vfnmsub))]
6696#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6697pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6698 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6699}
6700
6701/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6702/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6703/// zeroed out when the corresponding mask bit is not set).
6704///
6705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6706#[inline]
6707#[target_feature(enable = "avx512fp16,avx512vl")]
6708#[cfg_attr(test, assert_instr(vfnmsub))]
6709#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6710pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6711 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6712}
6713
6714/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6715/// in c from the negated intermediate result, and store the results in dst.
6716///
6717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6718#[inline]
6719#[target_feature(enable = "avx512fp16")]
6720#[cfg_attr(test, assert_instr(vfnmsub))]
6721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6722pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6723 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6724}
6725
6726/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6727/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6728/// copied from a when the corresponding mask bit is not set).
6729///
6730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6731#[inline]
6732#[target_feature(enable = "avx512fp16")]
6733#[cfg_attr(test, assert_instr(vfnmsub))]
6734#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6735pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6736 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6737}
6738
6739/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6740/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6741/// copied from c when the corresponding mask bit is not set).
6742///
6743/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6744#[inline]
6745#[target_feature(enable = "avx512fp16")]
6746#[cfg_attr(test, assert_instr(vfnmsub))]
6747#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6748pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6749 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6750}
6751
6752/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6753/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6754/// zeroed out when the corresponding mask bit is not set).
6755///
6756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
6757#[inline]
6758#[target_feature(enable = "avx512fp16")]
6759#[cfg_attr(test, assert_instr(vfnmsub))]
6760#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6761pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6762 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
6763}
6764
6765/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6766/// in c from the negated intermediate result, and store the results in dst.
6767///
6768/// Rounding is done according to the rounding parameter, which can be one of:
6769///
6770/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6771/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6772/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6773/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6774/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6775///
6776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
6777#[inline]
6778#[target_feature(enable = "avx512fp16")]
6779#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6780#[rustc_legacy_const_generics(3)]
6781#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6782pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6783 unsafe {
6784 static_assert_rounding!(ROUNDING);
6785 vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
6786 }
6787}
6788
6789/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6790/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6791/// copied from a when the corresponding mask bit is not set).
6792///
6793/// Rounding is done according to the rounding parameter, which can be one of:
6794///
6795/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6796/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6797/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6798/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6799/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6800///
6801/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
6802#[inline]
6803#[target_feature(enable = "avx512fp16")]
6804#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6805#[rustc_legacy_const_generics(4)]
6806#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6807pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
6808 a: __m512h,
6809 k: __mmask32,
6810 b: __m512h,
6811 c: __m512h,
6812) -> __m512h {
6813 unsafe {
6814 static_assert_rounding!(ROUNDING);
6815 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6816 }
6817}
6818
6819/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6820/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6821/// copied from c when the corresponding mask bit is not set).
6822///
6823/// Rounding is done according to the rounding parameter, which can be one of:
6824///
6825/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6826/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6827/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6828/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6829/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6830///
6831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
6832#[inline]
6833#[target_feature(enable = "avx512fp16")]
6834#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6835#[rustc_legacy_const_generics(4)]
6836#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6837pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
6838 a: __m512h,
6839 b: __m512h,
6840 c: __m512h,
6841 k: __mmask32,
6842) -> __m512h {
6843 unsafe {
6844 static_assert_rounding!(ROUNDING);
6845 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6846 }
6847}
6848
6849/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6850/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6851/// zeroed out when the corresponding mask bit is not set).
6852///
6853/// Rounding is done according to the rounding parameter, which can be one of:
6854///
6855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6860///
6861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
6862#[inline]
6863#[target_feature(enable = "avx512fp16")]
6864#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6865#[rustc_legacy_const_generics(4)]
6866#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6867pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
6868 k: __mmask32,
6869 a: __m512h,
6870 b: __m512h,
6871 c: __m512h,
6872) -> __m512h {
6873 unsafe {
6874 static_assert_rounding!(ROUNDING);
6875 simd_select_bitmask(
6876 m:k,
6877 yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
6878 no:_mm512_setzero_ph(),
6879 )
6880 }
6881}
6882
6883/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6884/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6885/// elements from a to the upper elements of dst.
6886///
6887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
6888#[inline]
6889#[target_feature(enable = "avx512fp16")]
6890#[cfg_attr(test, assert_instr(vfnmsub))]
6891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6892pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6893 unsafe {
6894 let extracta: f16 = simd_extract!(a, 0);
6895 let extractb: f16 = simd_extract!(b, 0);
6896 let extractc: f16 = simd_extract!(c, 0);
6897 let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
6898 simd_insert!(a, 0, r)
6899 }
6900}
6901
6902/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6903/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6904/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6905/// elements of dst.
6906///
6907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
6908#[inline]
6909#[target_feature(enable = "avx512fp16")]
6910#[cfg_attr(test, assert_instr(vfnmsub))]
6911#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6912pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6913 unsafe {
6914 let mut fnmsub: f16 = simd_extract!(a, 0);
6915 if k & 1 != 0 {
6916 let extractb: f16 = simd_extract!(b, 0);
6917 let extractc: f16 = simd_extract!(c, 0);
6918 fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
6919 }
6920 simd_insert!(a, 0, fnmsub)
6921 }
6922}
6923
6924/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6925/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6926/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6927/// elements of dst.
6928///
6929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
6930#[inline]
6931#[target_feature(enable = "avx512fp16")]
6932#[cfg_attr(test, assert_instr(vfnmsub))]
6933#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6934pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6935 unsafe {
6936 let mut fnmsub: f16 = simd_extract!(c, 0);
6937 if k & 1 != 0 {
6938 let extracta: f16 = simd_extract!(a, 0);
6939 let extractb: f16 = simd_extract!(b, 0);
6940 fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
6941 }
6942 simd_insert!(c, 0, fnmsub)
6943 }
6944}
6945
6946/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6947/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6948/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6949/// elements of dst.
6950///
6951/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
6952#[inline]
6953#[target_feature(enable = "avx512fp16")]
6954#[cfg_attr(test, assert_instr(vfnmsub))]
6955#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6956pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6957 unsafe {
6958 let mut fnmsub: f16 = 0.0;
6959 if k & 1 != 0 {
6960 let extracta: f16 = simd_extract!(a, 0);
6961 let extractb: f16 = simd_extract!(b, 0);
6962 let extractc: f16 = simd_extract!(c, 0);
6963 fnmsub = fmaf16(-extracta, b:extractb, -extractc);
6964 }
6965 simd_insert!(a, 0, fnmsub)
6966 }
6967}
6968
6969/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6970/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6971/// elements from a to the upper elements of dst.
6972///
6973/// Rounding is done according to the rounding parameter, which can be one of:
6974///
6975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6980///
6981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
6982#[inline]
6983#[target_feature(enable = "avx512fp16")]
6984#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6985#[rustc_legacy_const_generics(3)]
6986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6987pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6988 unsafe {
6989 static_assert_rounding!(ROUNDING);
6990 let extracta: f16 = simd_extract!(a, 0);
6991 let extractb: f16 = simd_extract!(b, 0);
6992 let extractc: f16 = simd_extract!(c, 0);
6993 let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
6994 simd_insert!(a, 0, r)
6995 }
6996}
6997
6998/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6999/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7000/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7001/// elements of dst.
7002///
7003/// Rounding is done according to the rounding parameter, which can be one of:
7004///
7005/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7006/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7007/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7008/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7010///
7011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
7012#[inline]
7013#[target_feature(enable = "avx512fp16")]
7014#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7015#[rustc_legacy_const_generics(4)]
7016#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7017pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7018 a: __m128h,
7019 k: __mmask8,
7020 b: __m128h,
7021 c: __m128h,
7022) -> __m128h {
7023 unsafe {
7024 static_assert_rounding!(ROUNDING);
7025 let mut fnmsub: f16 = simd_extract!(a, 0);
7026 if k & 1 != 0 {
7027 let extractb: f16 = simd_extract!(b, 0);
7028 let extractc: f16 = simd_extract!(c, 0);
7029 fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7030 }
7031 simd_insert!(a, 0, fnmsub)
7032 }
7033}
7034
7035/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7036/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7037/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7038/// elements of dst.
7039///
7040/// Rounding is done according to the rounding parameter, which can be one of:
7041///
7042/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7043/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7044/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7045/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7046/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7047///
7048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7049#[inline]
7050#[target_feature(enable = "avx512fp16")]
7051#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7052#[rustc_legacy_const_generics(4)]
7053#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7054pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7055 a: __m128h,
7056 b: __m128h,
7057 c: __m128h,
7058 k: __mmask8,
7059) -> __m128h {
7060 unsafe {
7061 static_assert_rounding!(ROUNDING);
7062 let mut fnmsub: f16 = simd_extract!(c, 0);
7063 if k & 1 != 0 {
7064 let extracta: f16 = simd_extract!(a, 0);
7065 let extractb: f16 = simd_extract!(b, 0);
7066 fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7067 }
7068 simd_insert!(c, 0, fnmsub)
7069 }
7070}
7071
7072/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7073/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7074/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7075/// elements of dst.
7076///
7077/// Rounding is done according to the rounding parameter, which can be one of:
7078///
7079/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7080/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7081/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7082/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7083/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7084///
7085/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7086#[inline]
7087#[target_feature(enable = "avx512fp16")]
7088#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7089#[rustc_legacy_const_generics(4)]
7090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7091pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7092 k: __mmask8,
7093 a: __m128h,
7094 b: __m128h,
7095 c: __m128h,
7096) -> __m128h {
7097 unsafe {
7098 static_assert_rounding!(ROUNDING);
7099 let mut fnmsub: f16 = 0.0;
7100 if k & 1 != 0 {
7101 let extracta: f16 = simd_extract!(a, 0);
7102 let extractb: f16 = simd_extract!(b, 0);
7103 let extractc: f16 = simd_extract!(c, 0);
7104 fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7105 }
7106 simd_insert!(a, 0, fnmsub)
7107 }
7108}
7109
7110/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7111/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7112///
7113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7114#[inline]
7115#[target_feature(enable = "avx512fp16,avx512vl")]
7116#[cfg_attr(test, assert_instr(vfmaddsub))]
7117#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7118pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7119 unsafe { vfmaddsubph_128(a, b, c) }
7120}
7121
7122/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7123/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7124/// (the element is copied from a when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7127#[inline]
7128#[target_feature(enable = "avx512fp16,avx512vl")]
7129#[cfg_attr(test, assert_instr(vfmaddsub))]
7130#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7131pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7132 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7133}
7134
7135/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7136/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7137/// (the element is copied from c when the corresponding mask bit is not set).
7138///
7139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7140#[inline]
7141#[target_feature(enable = "avx512fp16,avx512vl")]
7142#[cfg_attr(test, assert_instr(vfmaddsub))]
7143#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7144pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7145 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7146}
7147
7148/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7149/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7150/// (the element is zeroed out when the corresponding mask bit is not set).
7151///
7152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7153#[inline]
7154#[target_feature(enable = "avx512fp16,avx512vl")]
7155#[cfg_attr(test, assert_instr(vfmaddsub))]
7156#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7157pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7158 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7159}
7160
7161/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7162/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7163///
7164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7165#[inline]
7166#[target_feature(enable = "avx512fp16,avx512vl")]
7167#[cfg_attr(test, assert_instr(vfmaddsub))]
7168#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7169pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7170 unsafe { vfmaddsubph_256(a, b, c) }
7171}
7172
7173/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7174/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7175/// (the element is copied from a when the corresponding mask bit is not set).
7176///
7177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7178#[inline]
7179#[target_feature(enable = "avx512fp16,avx512vl")]
7180#[cfg_attr(test, assert_instr(vfmaddsub))]
7181#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7182pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7183 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7184}
7185
7186/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7187/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7188/// (the element is copied from c when the corresponding mask bit is not set).
7189///
7190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7191#[inline]
7192#[target_feature(enable = "avx512fp16,avx512vl")]
7193#[cfg_attr(test, assert_instr(vfmaddsub))]
7194#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7195pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7196 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7197}
7198
7199/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7200/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7201/// (the element is zeroed out when the corresponding mask bit is not set).
7202///
7203/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7204#[inline]
7205#[target_feature(enable = "avx512fp16,avx512vl")]
7206#[cfg_attr(test, assert_instr(vfmaddsub))]
7207#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7208pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7209 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7210}
7211
7212/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7213/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7214///
7215/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7216#[inline]
7217#[target_feature(enable = "avx512fp16")]
7218#[cfg_attr(test, assert_instr(vfmaddsub))]
7219#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7220pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7221 _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7222}
7223
7224/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7225/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7226/// (the element is copied from a when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7229#[inline]
7230#[target_feature(enable = "avx512fp16")]
7231#[cfg_attr(test, assert_instr(vfmaddsub))]
7232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7233pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7234 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7235}
7236
7237/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7238/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7239/// (the element is copied from c when the corresponding mask bit is not set).
7240///
7241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7242#[inline]
7243#[target_feature(enable = "avx512fp16")]
7244#[cfg_attr(test, assert_instr(vfmaddsub))]
7245#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7246pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7247 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7248}
7249
7250/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7251/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7252/// (the element is zeroed out when the corresponding mask bit is not set).
7253///
7254/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7255#[inline]
7256#[target_feature(enable = "avx512fp16")]
7257#[cfg_attr(test, assert_instr(vfmaddsub))]
7258#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7259pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7260 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7261}
7262
7263/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7264/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7265///
7266/// Rounding is done according to the rounding parameter, which can be one of:
7267///
7268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7273///
7274/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7275#[inline]
7276#[target_feature(enable = "avx512fp16")]
7277#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7278#[rustc_legacy_const_generics(3)]
7279#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7280pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7281 a: __m512h,
7282 b: __m512h,
7283 c: __m512h,
7284) -> __m512h {
7285 unsafe {
7286 static_assert_rounding!(ROUNDING);
7287 vfmaddsubph_512(a, b, c, ROUNDING)
7288 }
7289}
7290
7291/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7292/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7293/// (the element is copied from a when the corresponding mask bit is not set).
7294///
7295/// Rounding is done according to the rounding parameter, which can be one of:
7296///
7297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7302///
7303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7304#[inline]
7305#[target_feature(enable = "avx512fp16")]
7306#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7307#[rustc_legacy_const_generics(4)]
7308#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7309pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7310 a: __m512h,
7311 k: __mmask32,
7312 b: __m512h,
7313 c: __m512h,
7314) -> __m512h {
7315 unsafe {
7316 static_assert_rounding!(ROUNDING);
7317 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7318 }
7319}
7320
7321/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7322/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7323/// (the element is copied from c when the corresponding mask bit is not set).
7324///
7325/// Rounding is done according to the rounding parameter, which can be one of:
7326///
7327/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7328/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7329/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7330/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7331/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7332///
7333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7334#[inline]
7335#[target_feature(enable = "avx512fp16")]
7336#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7337#[rustc_legacy_const_generics(4)]
7338#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7339pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7340 a: __m512h,
7341 b: __m512h,
7342 c: __m512h,
7343 k: __mmask32,
7344) -> __m512h {
7345 unsafe {
7346 static_assert_rounding!(ROUNDING);
7347 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7348 }
7349}
7350
7351/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7352/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7353/// (the element is zeroed out when the corresponding mask bit is not set).
7354///
7355/// Rounding is done according to the rounding parameter, which can be one of:
7356///
7357/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7358/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7359/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7360/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7361/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7362///
7363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7364#[inline]
7365#[target_feature(enable = "avx512fp16")]
7366#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7367#[rustc_legacy_const_generics(4)]
7368#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7369pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7370 k: __mmask32,
7371 a: __m512h,
7372 b: __m512h,
7373 c: __m512h,
7374) -> __m512h {
7375 unsafe {
7376 static_assert_rounding!(ROUNDING);
7377 simd_select_bitmask(
7378 m:k,
7379 yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7380 no:_mm512_setzero_ph(),
7381 )
7382 }
7383}
7384
7385/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7386/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7387///
7388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7389#[inline]
7390#[target_feature(enable = "avx512fp16,avx512vl")]
7391#[cfg_attr(test, assert_instr(vfmsubadd))]
7392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7393pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7394 unsafe { vfmaddsubph_128(a, b, c:simd_neg(c)) }
7395}
7396
7397/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7398/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7399/// (the element is copied from a when the corresponding mask bit is not set).
7400///
7401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7402#[inline]
7403#[target_feature(enable = "avx512fp16,avx512vl")]
7404#[cfg_attr(test, assert_instr(vfmsubadd))]
7405#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7406pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7407 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7408}
7409
7410/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7411/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7412/// (the element is copied from c when the corresponding mask bit is not set).
7413///
7414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7415#[inline]
7416#[target_feature(enable = "avx512fp16,avx512vl")]
7417#[cfg_attr(test, assert_instr(vfmsubadd))]
7418#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7419pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7420 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7421}
7422
7423/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7424/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7425/// (the element is zeroed out when the corresponding mask bit is not set).
7426///
7427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7428#[inline]
7429#[target_feature(enable = "avx512fp16,avx512vl")]
7430#[cfg_attr(test, assert_instr(vfmsubadd))]
7431#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7432pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7433 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7434}
7435
7436/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7437/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7438///
7439/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7440#[inline]
7441#[target_feature(enable = "avx512fp16,avx512vl")]
7442#[cfg_attr(test, assert_instr(vfmsubadd))]
7443#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7444pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7445 unsafe { vfmaddsubph_256(a, b, c:simd_neg(c)) }
7446}
7447
7448/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7449/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7450/// (the element is copied from a when the corresponding mask bit is not set).
7451///
7452/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7453#[inline]
7454#[target_feature(enable = "avx512fp16,avx512vl")]
7455#[cfg_attr(test, assert_instr(vfmsubadd))]
7456#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7457pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7458 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7459}
7460
7461/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7462/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7463/// (the element is copied from c when the corresponding mask bit is not set).
7464///
7465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7466#[inline]
7467#[target_feature(enable = "avx512fp16,avx512vl")]
7468#[cfg_attr(test, assert_instr(vfmsubadd))]
7469#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7470pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7471 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7472}
7473
7474/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7475/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7476/// (the element is zeroed out when the corresponding mask bit is not set).
7477///
7478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7479#[inline]
7480#[target_feature(enable = "avx512fp16,avx512vl")]
7481#[cfg_attr(test, assert_instr(vfmsubadd))]
7482#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7483pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7484 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7485}
7486
7487/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7488/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7489///
7490/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7491#[inline]
7492#[target_feature(enable = "avx512fp16")]
7493#[cfg_attr(test, assert_instr(vfmsubadd))]
7494#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7495pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7496 _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
7497}
7498
7499/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7500/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7501/// (the element is copied from a when the corresponding mask bit is not set).
7502///
7503/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7504#[inline]
7505#[target_feature(enable = "avx512fp16")]
7506#[cfg_attr(test, assert_instr(vfmsubadd))]
7507#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7508pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7509 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7510}
7511
7512/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7513/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7514/// (the element is copied from c when the corresponding mask bit is not set).
7515///
7516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7517#[inline]
7518#[target_feature(enable = "avx512fp16")]
7519#[cfg_attr(test, assert_instr(vfmsubadd))]
7520#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7521pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7522 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7523}
7524
7525/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7526/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7527/// (the element is zeroed out when the corresponding mask bit is not set).
7528///
7529/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7530#[inline]
7531#[target_feature(enable = "avx512fp16")]
7532#[cfg_attr(test, assert_instr(vfmsubadd))]
7533#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7534pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7535 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7536}
7537
7538/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7539/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7540///
7541/// Rounding is done according to the rounding parameter, which can be one of:
7542///
7543/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7544/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7545/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7546/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7547/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7548///
7549/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7550#[inline]
7551#[target_feature(enable = "avx512fp16")]
7552#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7553#[rustc_legacy_const_generics(3)]
7554#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7555pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7556 a: __m512h,
7557 b: __m512h,
7558 c: __m512h,
7559) -> __m512h {
7560 unsafe {
7561 static_assert_rounding!(ROUNDING);
7562 vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7563 }
7564}
7565
7566/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7567/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7568/// (the element is copied from a when the corresponding mask bit is not set).
7569///
7570/// Rounding is done according to the rounding parameter, which can be one of:
7571///
7572/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7573/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7574/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7575/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7576/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7577///
7578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7579#[inline]
7580#[target_feature(enable = "avx512fp16")]
7581#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7582#[rustc_legacy_const_generics(4)]
7583#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7584pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7585 a: __m512h,
7586 k: __mmask32,
7587 b: __m512h,
7588 c: __m512h,
7589) -> __m512h {
7590 unsafe {
7591 static_assert_rounding!(ROUNDING);
7592 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7593 }
7594}
7595
7596/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7597/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7598/// (the element is copied from c when the corresponding mask bit is not set).
7599///
7600/// Rounding is done according to the rounding parameter, which can be one of:
7601///
7602/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7603/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7604/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7605/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7607///
7608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7609#[inline]
7610#[target_feature(enable = "avx512fp16")]
7611#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7612#[rustc_legacy_const_generics(4)]
7613#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7614pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7615 a: __m512h,
7616 b: __m512h,
7617 c: __m512h,
7618 k: __mmask32,
7619) -> __m512h {
7620 unsafe {
7621 static_assert_rounding!(ROUNDING);
7622 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7623 }
7624}
7625
7626/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7627/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7628/// (the element is zeroed out when the corresponding mask bit is not set).
7629///
7630/// Rounding is done according to the rounding parameter, which can be one of:
7631///
7632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7637///
7638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7639#[inline]
7640#[target_feature(enable = "avx512fp16")]
7641#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7642#[rustc_legacy_const_generics(4)]
7643#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7644pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7645 k: __mmask32,
7646 a: __m512h,
7647 b: __m512h,
7648 c: __m512h,
7649) -> __m512h {
7650 unsafe {
7651 static_assert_rounding!(ROUNDING);
7652 simd_select_bitmask(
7653 m:k,
7654 yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7655 no:_mm512_setzero_ph(),
7656 )
7657 }
7658}
7659
7660/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7661/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7662///
7663/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7664#[inline]
7665#[target_feature(enable = "avx512fp16,avx512vl")]
7666#[cfg_attr(test, assert_instr(vrcpph))]
7667#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7668pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7669 _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a)
7670}
7671
7672/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7673/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7674/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7675///
7676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7677#[inline]
7678#[target_feature(enable = "avx512fp16,avx512vl")]
7679#[cfg_attr(test, assert_instr(vrcpph))]
7680#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7681pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7682 unsafe { vrcpph_128(a, src, k) }
7683}
7684
7685/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7686/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7687/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7688///
7689/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7690#[inline]
7691#[target_feature(enable = "avx512fp16,avx512vl")]
7692#[cfg_attr(test, assert_instr(vrcpph))]
7693#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7694pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7695 _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7696}
7697
7698/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7699/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7700///
7701/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
7702#[inline]
7703#[target_feature(enable = "avx512fp16,avx512vl")]
7704#[cfg_attr(test, assert_instr(vrcpph))]
7705#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7706pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
7707 _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7708}
7709
7710/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7711/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7712/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7713///
7714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
7715#[inline]
7716#[target_feature(enable = "avx512fp16,avx512vl")]
7717#[cfg_attr(test, assert_instr(vrcpph))]
7718#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7719pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7720 unsafe { vrcpph_256(a, src, k) }
7721}
7722
7723/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7724/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7725/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7726///
7727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
7728#[inline]
7729#[target_feature(enable = "avx512fp16,avx512vl")]
7730#[cfg_attr(test, assert_instr(vrcpph))]
7731#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7732pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
7733 _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
7734}
7735
7736/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7737/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7738///
7739/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
7740#[inline]
7741#[target_feature(enable = "avx512fp16")]
7742#[cfg_attr(test, assert_instr(vrcpph))]
7743#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7744pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
7745 _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
7746}
7747
7748/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7749/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7750/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7751///
7752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
7753#[inline]
7754#[target_feature(enable = "avx512fp16")]
7755#[cfg_attr(test, assert_instr(vrcpph))]
7756#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7757pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7758 unsafe { vrcpph_512(a, src, k) }
7759}
7760
7761/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7762/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7763/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7764///
7765/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
7766#[inline]
7767#[target_feature(enable = "avx512fp16")]
7768#[cfg_attr(test, assert_instr(vrcpph))]
7769#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7770pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
7771 _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
7772}
7773
7774/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7775/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
7776/// upper elements of dst.
7777/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7778///
7779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
7780#[inline]
7781#[target_feature(enable = "avx512fp16")]
7782#[cfg_attr(test, assert_instr(vrcpsh))]
7783#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7784pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
7785 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
7786}
7787
7788/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7789/// store the result in the lower element of dst using writemask k (the element is copied from src when
7790/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7791/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7792///
7793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
7794#[inline]
7795#[target_feature(enable = "avx512fp16")]
7796#[cfg_attr(test, assert_instr(vrcpsh))]
7797#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7798pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7799 unsafe { vrcpsh(a, b, src, k) }
7800}
7801
7802/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7803/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
7804/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7805/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7806///
7807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
7808#[inline]
7809#[target_feature(enable = "avx512fp16")]
7810#[cfg_attr(test, assert_instr(vrcpsh))]
7811#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7812pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7813 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
7814}
7815
7816/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7817/// elements in a, and store the results in dst.
7818/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7819///
7820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
7821#[inline]
7822#[target_feature(enable = "avx512fp16,avx512vl")]
7823#[cfg_attr(test, assert_instr(vrsqrtph))]
7824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7825pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
7826 _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a)
7827}
7828
7829/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7830/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7831/// the corresponding mask bit is not set).
7832/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7833///
7834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
7835#[inline]
7836#[target_feature(enable = "avx512fp16,avx512vl")]
7837#[cfg_attr(test, assert_instr(vrsqrtph))]
7838#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7839pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7840 unsafe { vrsqrtph_128(a, src, k) }
7841}
7842
7843/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7844/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7845/// corresponding mask bit is not set).
7846/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7847///
7848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
7849#[inline]
7850#[target_feature(enable = "avx512fp16,avx512vl")]
7851#[cfg_attr(test, assert_instr(vrsqrtph))]
7852#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7853pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
7854 _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
7855}
7856
7857/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7858/// elements in a, and store the results in dst.
7859/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7860///
7861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
7862#[inline]
7863#[target_feature(enable = "avx512fp16,avx512vl")]
7864#[cfg_attr(test, assert_instr(vrsqrtph))]
7865#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7866pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
7867 _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7868}
7869
7870/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7871/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7872/// the corresponding mask bit is not set).
7873/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7874///
7875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
7876#[inline]
7877#[target_feature(enable = "avx512fp16,avx512vl")]
7878#[cfg_attr(test, assert_instr(vrsqrtph))]
7879#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7880pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7881 unsafe { vrsqrtph_256(a, src, k) }
7882}
7883
7884/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7885/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7886/// corresponding mask bit is not set).
7887/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7888///
7889/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
7890#[inline]
7891#[target_feature(enable = "avx512fp16,avx512vl")]
7892#[cfg_attr(test, assert_instr(vrsqrtph))]
7893#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7894pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
7895 _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
7896}
7897
7898/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7899/// elements in a, and store the results in dst.
7900/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7901///
7902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
7903#[inline]
7904#[target_feature(enable = "avx512fp16")]
7905#[cfg_attr(test, assert_instr(vrsqrtph))]
7906#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7907pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
7908 _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
7909}
7910
7911/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7912/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7913/// the corresponding mask bit is not set).
7914/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7915///
7916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
7917#[inline]
7918#[target_feature(enable = "avx512fp16")]
7919#[cfg_attr(test, assert_instr(vrsqrtph))]
7920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7921pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7922 unsafe { vrsqrtph_512(a, src, k) }
7923}
7924
7925/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7926/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7927/// corresponding mask bit is not set).
7928/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7929///
7930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
7931#[inline]
7932#[target_feature(enable = "avx512fp16")]
7933#[cfg_attr(test, assert_instr(vrsqrtph))]
7934#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7935pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
7936 _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
7937}
7938
7939/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7940/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
7941/// to the upper elements of dst.
7942/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7943///
7944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
7945#[inline]
7946#[target_feature(enable = "avx512fp16")]
7947#[cfg_attr(test, assert_instr(vrsqrtsh))]
7948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7949pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
7950 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
7951}
7952
7953/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7954/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
7955/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7956/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7957///
7958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
7959#[inline]
7960#[target_feature(enable = "avx512fp16")]
7961#[cfg_attr(test, assert_instr(vrsqrtsh))]
7962#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7963pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7964 unsafe { vrsqrtsh(a, b, src, k) }
7965}
7966
7967/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
7968/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
7969/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7970/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7971///
7972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
7973#[inline]
7974#[target_feature(enable = "avx512fp16")]
7975#[cfg_attr(test, assert_instr(vrsqrtsh))]
7976#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7977pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7978 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
7979}
7980
7981/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7982/// results in dst.
7983///
7984/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
7985#[inline]
7986#[target_feature(enable = "avx512fp16,avx512vl")]
7987#[cfg_attr(test, assert_instr(vsqrtph))]
7988#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7989pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
7990 unsafe { simd_fsqrt(a) }
7991}
7992
7993/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
7994/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7995///
7996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
7997#[inline]
7998#[target_feature(enable = "avx512fp16,avx512vl")]
7999#[cfg_attr(test, assert_instr(vsqrtph))]
8000#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8001pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8002 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
8003}
8004
8005/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8006/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8007///
8008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
8009#[inline]
8010#[target_feature(enable = "avx512fp16,avx512vl")]
8011#[cfg_attr(test, assert_instr(vsqrtph))]
8012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8013pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8014 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8015}
8016
8017/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8018/// results in dst.
8019///
8020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8021#[inline]
8022#[target_feature(enable = "avx512fp16,avx512vl")]
8023#[cfg_attr(test, assert_instr(vsqrtph))]
8024#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8025pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8026 unsafe { simd_fsqrt(a) }
8027}
8028
8029/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8030/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8031///
8032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8033#[inline]
8034#[target_feature(enable = "avx512fp16,avx512vl")]
8035#[cfg_attr(test, assert_instr(vsqrtph))]
8036#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8037pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8038 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8039}
8040
8041/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8042/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8043///
8044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8045#[inline]
8046#[target_feature(enable = "avx512fp16,avx512vl")]
8047#[cfg_attr(test, assert_instr(vsqrtph))]
8048#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8049pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8050 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8051}
8052
8053/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8054/// results in dst.
8055///
8056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8057#[inline]
8058#[target_feature(enable = "avx512fp16")]
8059#[cfg_attr(test, assert_instr(vsqrtph))]
8060#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8061pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8062 unsafe { simd_fsqrt(a) }
8063}
8064
8065/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8066/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8067///
8068/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8069#[inline]
8070#[target_feature(enable = "avx512fp16")]
8071#[cfg_attr(test, assert_instr(vsqrtph))]
8072#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8073pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8074 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8075}
8076
8077/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8078/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8079///
8080/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8081#[inline]
8082#[target_feature(enable = "avx512fp16")]
8083#[cfg_attr(test, assert_instr(vsqrtph))]
8084#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8085pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8086 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8087}
8088
8089/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8090/// results in dst.
8091/// Rounding is done according to the rounding parameter, which can be one of:
8092///
8093/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8094/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8095/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8096/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8097/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8098///
8099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8100#[inline]
8101#[target_feature(enable = "avx512fp16")]
8102#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(1)]
8104#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8105pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8106 unsafe {
8107 static_assert_rounding!(ROUNDING);
8108 vsqrtph_512(a, ROUNDING)
8109 }
8110}
8111
8112/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8113/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8114/// Rounding is done according to the rounding parameter, which can be one of:
8115///
8116/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8117/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8118/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8119/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8120/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8121///
8122/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8123#[inline]
8124#[target_feature(enable = "avx512fp16")]
8125#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8126#[rustc_legacy_const_generics(3)]
8127#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8128pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8129 src: __m512h,
8130 k: __mmask32,
8131 a: __m512h,
8132) -> __m512h {
8133 unsafe {
8134 static_assert_rounding!(ROUNDING);
8135 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8136 }
8137}
8138
8139/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8140/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8141/// Rounding is done according to the rounding parameter, which can be one of:
8142///
8143/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8144/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8145/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8146/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8147/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8148///
8149/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8150#[inline]
8151#[target_feature(enable = "avx512fp16")]
8152#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8153#[rustc_legacy_const_generics(2)]
8154#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8155pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8156 unsafe {
8157 static_assert_rounding!(ROUNDING);
8158 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8159 }
8160}
8161
8162/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8163/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8164/// elements of dst.
8165///
8166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8167#[inline]
8168#[target_feature(enable = "avx512fp16")]
8169#[cfg_attr(test, assert_instr(vsqrtsh))]
8170#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8171pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8172 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8173}
8174
8175/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8176/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8177/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8178///
8179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8180#[inline]
8181#[target_feature(enable = "avx512fp16")]
8182#[cfg_attr(test, assert_instr(vsqrtsh))]
8183#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8184pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8185 _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8186}
8187
8188/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8189/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8190/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8191///
8192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8193#[inline]
8194#[target_feature(enable = "avx512fp16")]
8195#[cfg_attr(test, assert_instr(vsqrtsh))]
8196#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8197pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8198 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8199}
8200
8201/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8202/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8203/// elements of dst.
8204/// Rounding is done according to the rounding parameter, which can be one of:
8205///
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8213#[inline]
8214#[target_feature(enable = "avx512fp16")]
8215#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8216#[rustc_legacy_const_generics(2)]
8217#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8218pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8219 static_assert_rounding!(ROUNDING);
8220 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8221}
8222
8223/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8224/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8225/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8226/// Rounding is done according to the rounding parameter, which can be one of:
8227///
8228/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8229/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8230/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8231/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8232/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8233///
8234/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8235#[inline]
8236#[target_feature(enable = "avx512fp16")]
8237#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8238#[rustc_legacy_const_generics(4)]
8239#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8240pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8241 src: __m128h,
8242 k: __mmask8,
8243 a: __m128h,
8244 b: __m128h,
8245) -> __m128h {
8246 unsafe {
8247 static_assert_rounding!(ROUNDING);
8248 vsqrtsh(a, b, src, k, ROUNDING)
8249 }
8250}
8251
8252/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8253/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8254/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8255/// Rounding is done according to the rounding parameter, which can be one of:
8256///
8257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8262///
8263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8264#[inline]
8265#[target_feature(enable = "avx512fp16")]
8266#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8267#[rustc_legacy_const_generics(3)]
8268#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8269pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8270 k: __mmask8,
8271 a: __m128h,
8272 b: __m128h,
8273) -> __m128h {
8274 static_assert_rounding!(ROUNDING);
8275 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
8276}
8277
8278/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8279/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8280/// value when inputs are NaN or signed-zero values.
8281///
8282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8283#[inline]
8284#[target_feature(enable = "avx512fp16,avx512vl")]
8285#[cfg_attr(test, assert_instr(vmaxph))]
8286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8287pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8288 unsafe { vmaxph_128(a, b) }
8289}
8290
8291/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8292/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8293/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8294/// NaN or signed-zero values.
8295///
8296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8297#[inline]
8298#[target_feature(enable = "avx512fp16,avx512vl")]
8299#[cfg_attr(test, assert_instr(vmaxph))]
8300#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8301pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8302 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8303}
8304
8305/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8306/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8307/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8308/// NaN or signed-zero values.
8309///
8310/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8311#[inline]
8312#[target_feature(enable = "avx512fp16,avx512vl")]
8313#[cfg_attr(test, assert_instr(vmaxph))]
8314#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8315pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8316 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8317}
8318
8319/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8320/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8321/// value when inputs are NaN or signed-zero values.
8322///
8323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8324#[inline]
8325#[target_feature(enable = "avx512fp16,avx512vl")]
8326#[cfg_attr(test, assert_instr(vmaxph))]
8327#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8328pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8329 unsafe { vmaxph_256(a, b) }
8330}
8331
8332/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8333/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8334/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8335/// NaN or signed-zero values.
8336///
8337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8338#[inline]
8339#[target_feature(enable = "avx512fp16,avx512vl")]
8340#[cfg_attr(test, assert_instr(vmaxph))]
8341#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8342pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8343 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8344}
8345
8346/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8347/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8348/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8349/// NaN or signed-zero values.
8350///
8351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8352#[inline]
8353#[target_feature(enable = "avx512fp16,avx512vl")]
8354#[cfg_attr(test, assert_instr(vmaxph))]
8355#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8356pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8357 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8358}
8359
8360/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8361/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8362/// value when inputs are NaN or signed-zero values.
8363///
8364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8365#[inline]
8366#[target_feature(enable = "avx512fp16")]
8367#[cfg_attr(test, assert_instr(vmaxph))]
8368#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8369pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8370 _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8371}
8372
8373/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8374/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8375/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8376/// NaN or signed-zero values.
8377///
8378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8379#[inline]
8380#[target_feature(enable = "avx512fp16")]
8381#[cfg_attr(test, assert_instr(vmaxph))]
8382#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8383pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8384 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8385}
8386
8387/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8388/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8389/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8390/// NaN or signed-zero values.
8391///
8392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8393#[inline]
8394#[target_feature(enable = "avx512fp16")]
8395#[cfg_attr(test, assert_instr(vmaxph))]
8396#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8397pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8398 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8399}
8400
8401/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8402/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8403/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8404/// NaN or signed-zero values.
8405///
8406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8407#[inline]
8408#[target_feature(enable = "avx512fp16")]
8409#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8410#[rustc_legacy_const_generics(2)]
8411#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8412pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8413 unsafe {
8414 static_assert_sae!(SAE);
8415 vmaxph_512(a, b, SAE)
8416 }
8417}
8418
8419/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8420/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8421/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8422/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8423///
8424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8425#[inline]
8426#[target_feature(enable = "avx512fp16")]
8427#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8428#[rustc_legacy_const_generics(4)]
8429#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8430pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8431 src: __m512h,
8432 k: __mmask32,
8433 a: __m512h,
8434 b: __m512h,
8435) -> __m512h {
8436 unsafe {
8437 static_assert_sae!(SAE);
8438 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8439 }
8440}
8441
8442/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8443/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8444/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8445/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8446///
8447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8448#[inline]
8449#[target_feature(enable = "avx512fp16")]
8450#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8451#[rustc_legacy_const_generics(3)]
8452#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8453pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8454 unsafe {
8455 static_assert_sae!(SAE);
8456 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8457 }
8458}
8459
8460/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8461/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8462/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8463/// when inputs are NaN or signed-zero values.
8464///
8465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8466#[inline]
8467#[target_feature(enable = "avx512fp16,avx512vl")]
8468#[cfg_attr(test, assert_instr(vmaxsh))]
8469#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8470pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8471 _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8472}
8473
8474/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8475/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8476/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8477/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8478///
8479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8480#[inline]
8481#[target_feature(enable = "avx512fp16,avx512vl")]
8482#[cfg_attr(test, assert_instr(vmaxsh))]
8483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8484pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8485 _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8486}
8487
8488/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8489/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8490/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8491/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8492///
8493/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8494#[inline]
8495#[target_feature(enable = "avx512fp16,avx512vl")]
8496#[cfg_attr(test, assert_instr(vmaxsh))]
8497#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8498pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8499 _mm_mask_max_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8500}
8501
8502/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8503/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8504/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8505/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8506///
8507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8508#[inline]
8509#[target_feature(enable = "avx512fp16,avx512vl")]
8510#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8511#[rustc_legacy_const_generics(2)]
8512#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8513pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8514 static_assert_sae!(SAE);
8515 _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8516}
8517
8518/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8519/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8520/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8521/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8522/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8523///
8524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8525#[inline]
8526#[target_feature(enable = "avx512fp16,avx512vl")]
8527#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8528#[rustc_legacy_const_generics(4)]
8529#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8530pub fn _mm_mask_max_round_sh<const SAE: i32>(
8531 src: __m128h,
8532 k: __mmask8,
8533 a: __m128h,
8534 b: __m128h,
8535) -> __m128h {
8536 unsafe {
8537 static_assert_sae!(SAE);
8538 vmaxsh(a, b, src, k, SAE)
8539 }
8540}
8541
8542/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8543/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8544/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8545/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8546/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8547///
8548/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8549#[inline]
8550#[target_feature(enable = "avx512fp16,avx512vl")]
8551#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8552#[rustc_legacy_const_generics(3)]
8553#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8554pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8555 static_assert_sae!(SAE);
8556 _mm_mask_max_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8557}
8558
8559/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8560/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8561/// when inputs are NaN or signed-zero values.
8562///
8563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8564#[inline]
8565#[target_feature(enable = "avx512fp16,avx512vl")]
8566#[cfg_attr(test, assert_instr(vminph))]
8567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8568pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8569 unsafe { vminph_128(a, b) }
8570}
8571
8572/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8573/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8574/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8575/// NaN or signed-zero values.
8576///
8577/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8578#[inline]
8579#[target_feature(enable = "avx512fp16,avx512vl")]
8580#[cfg_attr(test, assert_instr(vminph))]
8581#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8582pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8583 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8584}
8585
8586/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8587/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8588/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8589/// NaN or signed-zero values.
8590///
8591/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8592#[inline]
8593#[target_feature(enable = "avx512fp16,avx512vl")]
8594#[cfg_attr(test, assert_instr(vminph))]
8595#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8596pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8597 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8598}
8599
8600/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8601/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8602/// when inputs are NaN or signed-zero values.
8603///
8604/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8605#[inline]
8606#[target_feature(enable = "avx512fp16,avx512vl")]
8607#[cfg_attr(test, assert_instr(vminph))]
8608#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8609pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8610 unsafe { vminph_256(a, b) }
8611}
8612
8613/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8614/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8615/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8616/// NaN or signed-zero values.
8617///
8618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8619#[inline]
8620#[target_feature(enable = "avx512fp16,avx512vl")]
8621#[cfg_attr(test, assert_instr(vminph))]
8622#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8623pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8624 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8625}
8626
8627/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8628/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8629/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8630/// NaN or signed-zero values.
8631///
8632/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8633#[inline]
8634#[target_feature(enable = "avx512fp16,avx512vl")]
8635#[cfg_attr(test, assert_instr(vminph))]
8636#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8637pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8638 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8639}
8640
8641/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8642/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8643/// when inputs are NaN or signed-zero values.
8644///
8645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8646#[inline]
8647#[target_feature(enable = "avx512fp16")]
8648#[cfg_attr(test, assert_instr(vminph))]
8649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8650pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8651 _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8652}
8653
8654/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8655/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8656/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8657/// NaN or signed-zero values.
8658///
8659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8660#[inline]
8661#[target_feature(enable = "avx512fp16")]
8662#[cfg_attr(test, assert_instr(vminph))]
8663#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8664pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8665 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8666}
8667
8668/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8669/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8670/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8671/// NaN or signed-zero values.
8672///
8673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8674#[inline]
8675#[target_feature(enable = "avx512fp16")]
8676#[cfg_attr(test, assert_instr(vminph))]
8677#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8678pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8679 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8680}
8681
8682/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8683/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8684/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8685///
8686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8687#[inline]
8688#[target_feature(enable = "avx512fp16")]
8689#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8690#[rustc_legacy_const_generics(2)]
8691#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8692pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8693 unsafe {
8694 static_assert_sae!(SAE);
8695 vminph_512(a, b, SAE)
8696 }
8697}
8698
8699/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8700/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8702/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8703///
8704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
8705#[inline]
8706#[target_feature(enable = "avx512fp16")]
8707#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8708#[rustc_legacy_const_generics(4)]
8709#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8710pub fn _mm512_mask_min_round_ph<const SAE: i32>(
8711 src: __m512h,
8712 k: __mmask32,
8713 a: __m512h,
8714 b: __m512h,
8715) -> __m512h {
8716 unsafe {
8717 static_assert_sae!(SAE);
8718 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
8719 }
8720}
8721
8722/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8723/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8724/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8725/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8726///
8727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
8728#[inline]
8729#[target_feature(enable = "avx512fp16")]
8730#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8731#[rustc_legacy_const_generics(3)]
8732#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8733pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8734 unsafe {
8735 static_assert_sae!(SAE);
8736 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8737 }
8738}
8739
8740/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8741/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8742/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
8743/// inputs are NaN or signed-zero values.
8744///
8745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
8746#[inline]
8747#[target_feature(enable = "avx512fp16,avx512vl")]
8748#[cfg_attr(test, assert_instr(vminsh))]
8749#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8750pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
8751 _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8752}
8753
8754/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8755/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8756/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8757/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8758///
8759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
8760#[inline]
8761#[target_feature(enable = "avx512fp16,avx512vl")]
8762#[cfg_attr(test, assert_instr(vminsh))]
8763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8764pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8765 _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8766}
8767
8768/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8769/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8770/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8771/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8772///
8773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
8774#[inline]
8775#[target_feature(enable = "avx512fp16,avx512vl")]
8776#[cfg_attr(test, assert_instr(vminsh))]
8777#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8778pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8779 _mm_mask_min_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8780}
8781
8782/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8783/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8784/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8785/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8786///
8787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
8788#[inline]
8789#[target_feature(enable = "avx512fp16,avx512vl")]
8790#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8791#[rustc_legacy_const_generics(2)]
8792#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8793pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8794 static_assert_sae!(SAE);
8795 _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8796}
8797
8798/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8799/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8800/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8801/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8802/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8803///
8804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
8805#[inline]
8806#[target_feature(enable = "avx512fp16,avx512vl")]
8807#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8808#[rustc_legacy_const_generics(4)]
8809#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8810pub fn _mm_mask_min_round_sh<const SAE: i32>(
8811 src: __m128h,
8812 k: __mmask8,
8813 a: __m128h,
8814 b: __m128h,
8815) -> __m128h {
8816 unsafe {
8817 static_assert_sae!(SAE);
8818 vminsh(a, b, src, k, SAE)
8819 }
8820}
8821
8822/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8823/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8824/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8825/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8826/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8827///
8828/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
8829#[inline]
8830#[target_feature(enable = "avx512fp16,avx512vl")]
8831#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8832#[rustc_legacy_const_generics(3)]
8833#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8834pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8835 static_assert_sae!(SAE);
8836 _mm_mask_min_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8837}
8838
8839/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8840/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8841/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8842///
8843/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
8844#[inline]
8845#[target_feature(enable = "avx512fp16,avx512vl")]
8846#[cfg_attr(test, assert_instr(vgetexpph))]
8847#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8848pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
8849 _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a)
8850}
8851
8852/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8853/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8854/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8855/// `floor(log2(x))` for each element.
8856///
8857/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
8858#[inline]
8859#[target_feature(enable = "avx512fp16,avx512vl")]
8860#[cfg_attr(test, assert_instr(vgetexpph))]
8861#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8862pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8863 unsafe { vgetexpph_128(a, src, k) }
8864}
8865
8866/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8867/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8868/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8869/// `floor(log2(x))` for each element.
8870///
8871/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
8872#[inline]
8873#[target_feature(enable = "avx512fp16,avx512vl")]
8874#[cfg_attr(test, assert_instr(vgetexpph))]
8875#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8876pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
8877 _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
8878}
8879
8880/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8881/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8882/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8883///
8884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
8885#[inline]
8886#[target_feature(enable = "avx512fp16,avx512vl")]
8887#[cfg_attr(test, assert_instr(vgetexpph))]
8888#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8889pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
8890 _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
8891}
8892
8893/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8894/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8895/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8896/// `floor(log2(x))` for each element.
8897///
8898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
8899#[inline]
8900#[target_feature(enable = "avx512fp16,avx512vl")]
8901#[cfg_attr(test, assert_instr(vgetexpph))]
8902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8903pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8904 unsafe { vgetexpph_256(a, src, k) }
8905}
8906
8907/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8908/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8909/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8910/// `floor(log2(x))` for each element.
8911///
8912/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
8913#[inline]
8914#[target_feature(enable = "avx512fp16,avx512vl")]
8915#[cfg_attr(test, assert_instr(vgetexpph))]
8916#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8917pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
8918 _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
8919}
8920
8921/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8922/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8923/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8924///
8925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
8926#[inline]
8927#[target_feature(enable = "avx512fp16")]
8928#[cfg_attr(test, assert_instr(vgetexpph))]
8929#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8930pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
8931 _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
8932}
8933
8934/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8935/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8936/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8937/// `floor(log2(x))` for each element.
8938///
8939/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
8940#[inline]
8941#[target_feature(enable = "avx512fp16")]
8942#[cfg_attr(test, assert_instr(vgetexpph))]
8943#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8944pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8945 _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
8946}
8947
8948/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8949/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8950/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8951/// `floor(log2(x))` for each element.
8952///
8953/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
8954#[inline]
8955#[target_feature(enable = "avx512fp16")]
8956#[cfg_attr(test, assert_instr(vgetexpph))]
8957#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8958pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
8959 _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
8960}
8961
8962/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8963/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8964/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
8965/// by passing _MM_FROUND_NO_EXC in the sae parameter
8966///
8967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
8968#[inline]
8969#[target_feature(enable = "avx512fp16")]
8970#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
8971#[rustc_legacy_const_generics(1)]
8972#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8973pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
8974 static_assert_sae!(SAE);
8975 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
8976}
8977
8978/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8979/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8980/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8981/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
8982///
8983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
8984#[inline]
8985#[target_feature(enable = "avx512fp16")]
8986#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
8987#[rustc_legacy_const_generics(3)]
8988#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8989pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
8990 src: __m512h,
8991 k: __mmask32,
8992 a: __m512h,
8993) -> __m512h {
8994 unsafe {
8995 static_assert_sae!(SAE);
8996 vgetexpph_512(a, src, k, SAE)
8997 }
8998}
8999
9000/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9001/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9002/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9003/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9004///
9005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
9006#[inline]
9007#[target_feature(enable = "avx512fp16")]
9008#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9009#[rustc_legacy_const_generics(2)]
9010#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9011pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
9012 static_assert_sae!(SAE);
9013 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9014}
9015
9016/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9017/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9018/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9019/// calculates `floor(log2(x))` for the lower element.
9020///
9021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9022#[inline]
9023#[target_feature(enable = "avx512fp16")]
9024#[cfg_attr(test, assert_instr(vgetexpsh))]
9025#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9026pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9027 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9028}
9029
9030/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9031/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9032/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9033/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9034/// for the lower element.
9035///
9036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9037#[inline]
9038#[target_feature(enable = "avx512fp16")]
9039#[cfg_attr(test, assert_instr(vgetexpsh))]
9040#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9041pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9042 _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9043}
9044
9045/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9046/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9047/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9048/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9049/// lower element.
9050///
9051/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9052#[inline]
9053#[target_feature(enable = "avx512fp16")]
9054#[cfg_attr(test, assert_instr(vgetexpsh))]
9055#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9056pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9057 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
9058}
9059
9060/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9061/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9062/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9063/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9064/// in the sae parameter
9065///
9066/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9067#[inline]
9068#[target_feature(enable = "avx512fp16")]
9069#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9070#[rustc_legacy_const_generics(2)]
9071#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9072pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9073 static_assert_sae!(SAE);
9074 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9075}
9076
9077/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9078/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9079/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9080/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9081/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9082///
9083/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9084#[inline]
9085#[target_feature(enable = "avx512fp16")]
9086#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9087#[rustc_legacy_const_generics(4)]
9088#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9089pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9090 src: __m128h,
9091 k: __mmask8,
9092 a: __m128h,
9093 b: __m128h,
9094) -> __m128h {
9095 unsafe {
9096 static_assert_sae!(SAE);
9097 vgetexpsh(a, b, src, k, SAE)
9098 }
9099}
9100
9101/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9102/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9103/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9104/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9105/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9106///
9107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9108#[inline]
9109#[target_feature(enable = "avx512fp16")]
9110#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9111#[rustc_legacy_const_generics(3)]
9112#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9113pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9114 static_assert_sae!(SAE);
9115 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9116}
9117
9118/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9119/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9120/// on the interval range defined by norm and the sign depends on sign and the source sign.
9121///
9122/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9123///
9124/// _MM_MANT_NORM_1_2 // interval [1, 2)
9125/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9126/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9127/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9128///
9129/// The sign is determined by sc which can take the following values:
9130///
9131/// _MM_MANT_SIGN_src // sign = sign(src)
9132/// _MM_MANT_SIGN_zero // sign = 0
9133/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9134///
9135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9136#[inline]
9137#[target_feature(enable = "avx512fp16,avx512vl")]
9138#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9139#[rustc_legacy_const_generics(1, 2)]
9140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9141pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9142 a: __m128h,
9143) -> __m128h {
9144 static_assert_uimm_bits!(NORM, 4);
9145 static_assert_uimm_bits!(SIGN, 2);
9146 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a)
9147}
9148
9149/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9150/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9151/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9152/// by norm and the sign depends on sign and the source sign.
9153///
9154/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9155///
9156/// _MM_MANT_NORM_1_2 // interval [1, 2)
9157/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9158/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9159/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9160///
9161/// The sign is determined by sc which can take the following values:
9162///
9163/// _MM_MANT_SIGN_src // sign = sign(src)
9164/// _MM_MANT_SIGN_zero // sign = 0
9165/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9166///
9167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9168#[inline]
9169#[target_feature(enable = "avx512fp16,avx512vl")]
9170#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9171#[rustc_legacy_const_generics(3, 4)]
9172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9173pub fn _mm_mask_getmant_ph<
9174 const NORM: _MM_MANTISSA_NORM_ENUM,
9175 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9176>(
9177 src: __m128h,
9178 k: __mmask8,
9179 a: __m128h,
9180) -> __m128h {
9181 unsafe {
9182 static_assert_uimm_bits!(NORM, 4);
9183 static_assert_uimm_bits!(SIGN, 2);
9184 vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
9185 }
9186}
9187
9188/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9189/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9190/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9191/// by norm and the sign depends on sign and the source sign.
9192///
9193/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9194///
9195/// _MM_MANT_NORM_1_2 // interval [1, 2)
9196/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9197/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9198/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9199///
9200/// The sign is determined by sc which can take the following values:
9201///
9202/// _MM_MANT_SIGN_src // sign = sign(src)
9203/// _MM_MANT_SIGN_zero // sign = 0
9204/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9205///
9206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9207#[inline]
9208#[target_feature(enable = "avx512fp16,avx512vl")]
9209#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9210#[rustc_legacy_const_generics(2, 3)]
9211#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9212pub fn _mm_maskz_getmant_ph<
9213 const NORM: _MM_MANTISSA_NORM_ENUM,
9214 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9215>(
9216 k: __mmask8,
9217 a: __m128h,
9218) -> __m128h {
9219 static_assert_uimm_bits!(NORM, 4);
9220 static_assert_uimm_bits!(SIGN, 2);
9221 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9222}
9223
9224/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9225/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9226/// on the interval range defined by norm and the sign depends on sign and the source sign.
9227///
9228/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9229///
9230/// _MM_MANT_NORM_1_2 // interval [1, 2)
9231/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9232/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9233/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9234///
9235/// The sign is determined by sc which can take the following values:
9236///
9237/// _MM_MANT_SIGN_src // sign = sign(src)
9238/// _MM_MANT_SIGN_zero // sign = 0
9239/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9240///
9241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9242#[inline]
9243#[target_feature(enable = "avx512fp16,avx512vl")]
9244#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9245#[rustc_legacy_const_generics(1, 2)]
9246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9247pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9248 a: __m256h,
9249) -> __m256h {
9250 static_assert_uimm_bits!(NORM, 4);
9251 static_assert_uimm_bits!(SIGN, 2);
9252 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a)
9253}
9254
9255/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9256/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9257/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9258/// by norm and the sign depends on sign and the source sign.
9259///
9260/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9261///
9262/// _MM_MANT_NORM_1_2 // interval [1, 2)
9263/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9264/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9265/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9266///
9267/// The sign is determined by sc which can take the following values:
9268///
9269/// _MM_MANT_SIGN_src // sign = sign(src)
9270/// _MM_MANT_SIGN_zero // sign = 0
9271/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9272///
9273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9274#[inline]
9275#[target_feature(enable = "avx512fp16,avx512vl")]
9276#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9277#[rustc_legacy_const_generics(3, 4)]
9278#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9279pub fn _mm256_mask_getmant_ph<
9280 const NORM: _MM_MANTISSA_NORM_ENUM,
9281 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9282>(
9283 src: __m256h,
9284 k: __mmask16,
9285 a: __m256h,
9286) -> __m256h {
9287 unsafe {
9288 static_assert_uimm_bits!(NORM, 4);
9289 static_assert_uimm_bits!(SIGN, 2);
9290 vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
9291 }
9292}
9293
9294/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9295/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9296/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9297/// by norm and the sign depends on sign and the source sign.
9298///
9299/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9300///
9301/// _MM_MANT_NORM_1_2 // interval [1, 2)
9302/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9303/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9304/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9305///
9306/// The sign is determined by sc which can take the following values:
9307///
9308/// _MM_MANT_SIGN_src // sign = sign(src)
9309/// _MM_MANT_SIGN_zero // sign = 0
9310/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9311///
9312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9313#[inline]
9314#[target_feature(enable = "avx512fp16,avx512vl")]
9315#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9316#[rustc_legacy_const_generics(2, 3)]
9317#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9318pub fn _mm256_maskz_getmant_ph<
9319 const NORM: _MM_MANTISSA_NORM_ENUM,
9320 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9321>(
9322 k: __mmask16,
9323 a: __m256h,
9324) -> __m256h {
9325 static_assert_uimm_bits!(NORM, 4);
9326 static_assert_uimm_bits!(SIGN, 2);
9327 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9328}
9329
9330/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9331/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9332/// on the interval range defined by norm and the sign depends on sign and the source sign.
9333///
9334/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9335///
9336/// _MM_MANT_NORM_1_2 // interval [1, 2)
9337/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9338/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9339/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9340///
9341/// The sign is determined by sc which can take the following values:
9342///
9343/// _MM_MANT_SIGN_src // sign = sign(src)
9344/// _MM_MANT_SIGN_zero // sign = 0
9345/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9346///
9347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9348#[inline]
9349#[target_feature(enable = "avx512fp16")]
9350#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9351#[rustc_legacy_const_generics(1, 2)]
9352#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9353pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9354 a: __m512h,
9355) -> __m512h {
9356 static_assert_uimm_bits!(NORM, 4);
9357 static_assert_uimm_bits!(SIGN, 2);
9358 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9359}
9360
9361/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9362/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9363/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9364/// by norm and the sign depends on sign and the source sign.
9365///
9366/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9367///
9368/// _MM_MANT_NORM_1_2 // interval [1, 2)
9369/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9370/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9371/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9372///
9373/// The sign is determined by sc which can take the following values:
9374///
9375/// _MM_MANT_SIGN_src // sign = sign(src)
9376/// _MM_MANT_SIGN_zero // sign = 0
9377/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9378///
9379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9380#[inline]
9381#[target_feature(enable = "avx512fp16")]
9382#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9383#[rustc_legacy_const_generics(3, 4)]
9384#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9385pub fn _mm512_mask_getmant_ph<
9386 const NORM: _MM_MANTISSA_NORM_ENUM,
9387 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9388>(
9389 src: __m512h,
9390 k: __mmask32,
9391 a: __m512h,
9392) -> __m512h {
9393 static_assert_uimm_bits!(NORM, 4);
9394 static_assert_uimm_bits!(SIGN, 2);
9395 _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9396}
9397
9398/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9399/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9400/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9401/// by norm and the sign depends on sign and the source sign.
9402///
9403/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9404///
9405/// _MM_MANT_NORM_1_2 // interval [1, 2)
9406/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9407/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9408/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9409///
9410/// The sign is determined by sc which can take the following values:
9411///
9412/// _MM_MANT_SIGN_src // sign = sign(src)
9413/// _MM_MANT_SIGN_zero // sign = 0
9414/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9415///
9416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9417#[inline]
9418#[target_feature(enable = "avx512fp16")]
9419#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9420#[rustc_legacy_const_generics(2, 3)]
9421#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9422pub fn _mm512_maskz_getmant_ph<
9423 const NORM: _MM_MANTISSA_NORM_ENUM,
9424 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9425>(
9426 k: __mmask32,
9427 a: __m512h,
9428) -> __m512h {
9429 static_assert_uimm_bits!(NORM, 4);
9430 static_assert_uimm_bits!(SIGN, 2);
9431 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9432}
9433
9434/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9435/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9436/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9437/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9438///
9439/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9440///
9441/// _MM_MANT_NORM_1_2 // interval [1, 2)
9442/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9443/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9444/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9445///
9446/// The sign is determined by sc which can take the following values:
9447///
9448/// _MM_MANT_SIGN_src // sign = sign(src)
9449/// _MM_MANT_SIGN_zero // sign = 0
9450/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9451///
9452/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9453///
9454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9455#[inline]
9456#[target_feature(enable = "avx512fp16")]
9457#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9458#[rustc_legacy_const_generics(1, 2, 3)]
9459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9460pub fn _mm512_getmant_round_ph<
9461 const NORM: _MM_MANTISSA_NORM_ENUM,
9462 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9463 const SAE: i32,
9464>(
9465 a: __m512h,
9466) -> __m512h {
9467 static_assert_uimm_bits!(NORM, 4);
9468 static_assert_uimm_bits!(SIGN, 2);
9469 static_assert_sae!(SAE);
9470 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9471}
9472
9473/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9474/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9475/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9476/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9477/// in the sae parameter
9478///
9479/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9480///
9481/// _MM_MANT_NORM_1_2 // interval [1, 2)
9482/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9483/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9484/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9485///
9486/// The sign is determined by sc which can take the following values:
9487///
9488/// _MM_MANT_SIGN_src // sign = sign(src)
9489/// _MM_MANT_SIGN_zero // sign = 0
9490/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9491///
9492/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9493///
9494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9495#[inline]
9496#[target_feature(enable = "avx512fp16")]
9497#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9498#[rustc_legacy_const_generics(3, 4, 5)]
9499#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9500pub fn _mm512_mask_getmant_round_ph<
9501 const NORM: _MM_MANTISSA_NORM_ENUM,
9502 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9503 const SAE: i32,
9504>(
9505 src: __m512h,
9506 k: __mmask32,
9507 a: __m512h,
9508) -> __m512h {
9509 unsafe {
9510 static_assert_uimm_bits!(NORM, 4);
9511 static_assert_uimm_bits!(SIGN, 2);
9512 static_assert_sae!(SAE);
9513 vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
9514 }
9515}
9516
9517/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9518/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9519/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9520/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9521/// in the sae parameter
9522///
9523/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9524///
9525/// _MM_MANT_NORM_1_2 // interval [1, 2)
9526/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9527/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9528/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9529///
9530/// The sign is determined by sc which can take the following values:
9531///
9532/// _MM_MANT_SIGN_src // sign = sign(src)
9533/// _MM_MANT_SIGN_zero // sign = 0
9534/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9535///
9536/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9537///
9538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9539#[inline]
9540#[target_feature(enable = "avx512fp16")]
9541#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9542#[rustc_legacy_const_generics(2, 3, 4)]
9543#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9544pub fn _mm512_maskz_getmant_round_ph<
9545 const NORM: _MM_MANTISSA_NORM_ENUM,
9546 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9547 const SAE: i32,
9548>(
9549 k: __mmask32,
9550 a: __m512h,
9551) -> __m512h {
9552 static_assert_uimm_bits!(NORM, 4);
9553 static_assert_uimm_bits!(SIGN, 2);
9554 static_assert_sae!(SAE);
9555 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9556}
9557
9558/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9559/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9560/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9561/// on the interval range defined by norm and the sign depends on sign and the source sign.
9562///
9563/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9564///
9565/// _MM_MANT_NORM_1_2 // interval [1, 2)
9566/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9567/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9568/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9569///
9570/// The sign is determined by sc which can take the following values:
9571///
9572/// _MM_MANT_SIGN_src // sign = sign(src)
9573/// _MM_MANT_SIGN_zero // sign = 0
9574/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9575///
9576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9577#[inline]
9578#[target_feature(enable = "avx512fp16")]
9579#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9580#[rustc_legacy_const_generics(2, 3)]
9581#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9582pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9583 a: __m128h,
9584 b: __m128h,
9585) -> __m128h {
9586 static_assert_uimm_bits!(NORM, 4);
9587 static_assert_uimm_bits!(SIGN, 2);
9588 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9589}
9590
9591/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9592/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9593/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9594/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9595/// the source sign.
9596///
9597/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9598///
9599/// _MM_MANT_NORM_1_2 // interval [1, 2)
9600/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9601/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9602/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9603///
9604/// The sign is determined by sc which can take the following values:
9605///
9606/// _MM_MANT_SIGN_src // sign = sign(src)
9607/// _MM_MANT_SIGN_zero // sign = 0
9608/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9609///
9610/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9611#[inline]
9612#[target_feature(enable = "avx512fp16")]
9613#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9614#[rustc_legacy_const_generics(4, 5)]
9615#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9616pub fn _mm_mask_getmant_sh<
9617 const NORM: _MM_MANTISSA_NORM_ENUM,
9618 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9619>(
9620 src: __m128h,
9621 k: __mmask8,
9622 a: __m128h,
9623 b: __m128h,
9624) -> __m128h {
9625 static_assert_uimm_bits!(NORM, 4);
9626 static_assert_uimm_bits!(SIGN, 2);
9627 _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9628}
9629
9630/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9631/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9632/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9633/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9634/// the source sign.
9635///
9636/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9637///
9638/// _MM_MANT_NORM_1_2 // interval [1, 2)
9639/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9640/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9641/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9642///
9643/// The sign is determined by sc which can take the following values:
9644///
9645/// _MM_MANT_SIGN_src // sign = sign(src)
9646/// _MM_MANT_SIGN_zero // sign = 0
9647/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9648///
9649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9650#[inline]
9651#[target_feature(enable = "avx512fp16")]
9652#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9653#[rustc_legacy_const_generics(3, 4)]
9654#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9655pub fn _mm_maskz_getmant_sh<
9656 const NORM: _MM_MANTISSA_NORM_ENUM,
9657 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9658>(
9659 k: __mmask8,
9660 a: __m128h,
9661 b: __m128h,
9662) -> __m128h {
9663 static_assert_uimm_bits!(NORM, 4);
9664 static_assert_uimm_bits!(SIGN, 2);
9665 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k, a, b)
9666}
9667
9668/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9669/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9670/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9671/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9672/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9673///
9674/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9675///
9676/// _MM_MANT_NORM_1_2 // interval [1, 2)
9677/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9678/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9679/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9680///
9681/// The sign is determined by sc which can take the following values:
9682///
9683/// _MM_MANT_SIGN_src // sign = sign(src)
9684/// _MM_MANT_SIGN_zero // sign = 0
9685/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9686///
9687/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9688///
9689/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9690#[inline]
9691#[target_feature(enable = "avx512fp16")]
9692#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9693#[rustc_legacy_const_generics(2, 3, 4)]
9694#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9695pub fn _mm_getmant_round_sh<
9696 const NORM: _MM_MANTISSA_NORM_ENUM,
9697 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9698 const SAE: i32,
9699>(
9700 a: __m128h,
9701 b: __m128h,
9702) -> __m128h {
9703 static_assert_uimm_bits!(NORM, 4);
9704 static_assert_uimm_bits!(SIGN, 2);
9705 static_assert_sae!(SAE);
9706 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9707}
9708
9709/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9710/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9711/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9712/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9713/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9714///
9715/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9716///
9717/// _MM_MANT_NORM_1_2 // interval [1, 2)
9718/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9719/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9720/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9721///
9722/// The sign is determined by sc which can take the following values:
9723///
9724/// _MM_MANT_SIGN_src // sign = sign(src)
9725/// _MM_MANT_SIGN_zero // sign = 0
9726/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9727///
9728/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9729///
9730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
9731#[inline]
9732#[target_feature(enable = "avx512fp16")]
9733#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9734#[rustc_legacy_const_generics(4, 5, 6)]
9735#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9736pub fn _mm_mask_getmant_round_sh<
9737 const NORM: _MM_MANTISSA_NORM_ENUM,
9738 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9739 const SAE: i32,
9740>(
9741 src: __m128h,
9742 k: __mmask8,
9743 a: __m128h,
9744 b: __m128h,
9745) -> __m128h {
9746 unsafe {
9747 static_assert_uimm_bits!(NORM, 4);
9748 static_assert_uimm_bits!(SIGN, 2);
9749 static_assert_sae!(SAE);
9750 vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
9751 }
9752}
9753
9754/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9755/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9756/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9757/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9758/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9759///
9760/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9761///
9762/// _MM_MANT_NORM_1_2 // interval [1, 2)
9763/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9764/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9765/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9766///
9767/// The sign is determined by sc which can take the following values:
9768///
9769/// _MM_MANT_SIGN_src // sign = sign(src)
9770/// _MM_MANT_SIGN_zero // sign = 0
9771/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9772///
9773/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9774///
9775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
9776#[inline]
9777#[target_feature(enable = "avx512fp16")]
9778#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9779#[rustc_legacy_const_generics(3, 4, 5)]
9780#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9781pub fn _mm_maskz_getmant_round_sh<
9782 const NORM: _MM_MANTISSA_NORM_ENUM,
9783 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9784 const SAE: i32,
9785>(
9786 k: __mmask8,
9787 a: __m128h,
9788 b: __m128h,
9789) -> __m128h {
9790 static_assert_uimm_bits!(NORM, 4);
9791 static_assert_uimm_bits!(SIGN, 2);
9792 static_assert_sae!(SAE);
9793 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9794}
9795
9796/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9797/// specified by imm8, and store the results in dst.
9798///
9799/// Rounding is done according to the imm8 parameter, which can be one of:
9800///
9801/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9802/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9803/// * [`_MM_FROUND_TO_POS_INF`] : round up
9804/// * [`_MM_FROUND_TO_ZERO`] : truncate
9805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9806///
9807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
9808#[inline]
9809#[target_feature(enable = "avx512fp16,avx512vl")]
9810#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9811#[rustc_legacy_const_generics(1)]
9812#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9813pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
9814 static_assert_uimm_bits!(IMM8, 8);
9815 _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
9816}
9817
9818/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9819/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9820/// the corresponding mask bit is not set).
9821///
9822/// Rounding is done according to the imm8 parameter, which can be one of:
9823///
9824/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9825/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9826/// * [`_MM_FROUND_TO_POS_INF`] : round up
9827/// * [`_MM_FROUND_TO_ZERO`] : truncate
9828/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9829///
9830/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
9831#[inline]
9832#[target_feature(enable = "avx512fp16,avx512vl")]
9833#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9834#[rustc_legacy_const_generics(3)]
9835#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9836pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9837 unsafe {
9838 static_assert_uimm_bits!(IMM8, 8);
9839 vrndscaleph_128(a, IMM8, src, k)
9840 }
9841}
9842
9843/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9844/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9845/// mask bit is not set).
9846///
9847/// Rounding is done according to the imm8 parameter, which can be one of:
9848///
9849/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9850/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9851/// * [`_MM_FROUND_TO_POS_INF`] : round up
9852/// * [`_MM_FROUND_TO_ZERO`] : truncate
9853/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9854///
9855/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
9856#[inline]
9857#[target_feature(enable = "avx512fp16,avx512vl")]
9858#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9859#[rustc_legacy_const_generics(2)]
9860#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9861pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
9862 static_assert_uimm_bits!(IMM8, 8);
9863 _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
9864}
9865
9866/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9867/// specified by imm8, and store the results in dst.
9868///
9869/// Rounding is done according to the imm8 parameter, which can be one of:
9870///
9871/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9872/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9873/// * [`_MM_FROUND_TO_POS_INF`] : round up
9874/// * [`_MM_FROUND_TO_ZERO`] : truncate
9875/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9876///
9877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
9878#[inline]
9879#[target_feature(enable = "avx512fp16,avx512vl")]
9880#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9881#[rustc_legacy_const_generics(1)]
9882#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9883pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
9884 static_assert_uimm_bits!(IMM8, 8);
9885 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
9886}
9887
9888/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9889/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9890/// the corresponding mask bit is not set).
9891///
9892/// Rounding is done according to the imm8 parameter, which can be one of:
9893///
9894/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9895/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9896/// * [`_MM_FROUND_TO_POS_INF`] : round up
9897/// * [`_MM_FROUND_TO_ZERO`] : truncate
9898/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9899///
9900/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
9901#[inline]
9902#[target_feature(enable = "avx512fp16,avx512vl")]
9903#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9904#[rustc_legacy_const_generics(3)]
9905#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9906pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
9907 src: __m256h,
9908 k: __mmask16,
9909 a: __m256h,
9910) -> __m256h {
9911 unsafe {
9912 static_assert_uimm_bits!(IMM8, 8);
9913 vrndscaleph_256(a, IMM8, src, k)
9914 }
9915}
9916
9917/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9918/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9919/// mask bit is not set).
9920///
9921/// Rounding is done according to the imm8 parameter, which can be one of:
9922///
9923/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9924/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9925/// * [`_MM_FROUND_TO_POS_INF`] : round up
9926/// * [`_MM_FROUND_TO_ZERO`] : truncate
9927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9928///
9929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
9930#[inline]
9931#[target_feature(enable = "avx512fp16,avx512vl")]
9932#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9933#[rustc_legacy_const_generics(2)]
9934#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9935pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
9936 static_assert_uimm_bits!(IMM8, 8);
9937 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
9938}
9939
9940/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9941/// specified by imm8, and store the results in dst.
9942///
9943/// Rounding is done according to the imm8 parameter, which can be one of:
9944///
9945/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9946/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9947/// * [`_MM_FROUND_TO_POS_INF`] : round up
9948/// * [`_MM_FROUND_TO_ZERO`] : truncate
9949/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9950///
9951/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
9952#[inline]
9953#[target_feature(enable = "avx512fp16")]
9954#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9955#[rustc_legacy_const_generics(1)]
9956#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9957pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
9958 static_assert_uimm_bits!(IMM8, 8);
9959 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9960}
9961
9962/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9963/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9964/// the corresponding mask bit is not set).
9965///
9966/// Rounding is done according to the imm8 parameter, which can be one of:
9967///
9968/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9969/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9970/// * [`_MM_FROUND_TO_POS_INF`] : round up
9971/// * [`_MM_FROUND_TO_ZERO`] : truncate
9972/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9973///
9974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
9975#[inline]
9976#[target_feature(enable = "avx512fp16")]
9977#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9978#[rustc_legacy_const_generics(3)]
9979#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9980pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
9981 src: __m512h,
9982 k: __mmask32,
9983 a: __m512h,
9984) -> __m512h {
9985 static_assert_uimm_bits!(IMM8, 8);
9986 _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9987}
9988
9989/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9990/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9991/// mask bit is not set).
9992///
9993/// Rounding is done according to the imm8 parameter, which can be one of:
9994///
9995/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9996/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9997/// * [`_MM_FROUND_TO_POS_INF`] : round up
9998/// * [`_MM_FROUND_TO_ZERO`] : truncate
9999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10000///
10001/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
10002#[inline]
10003#[target_feature(enable = "avx512fp16")]
10004#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10005#[rustc_legacy_const_generics(2)]
10006#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10007pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10008 static_assert_uimm_bits!(IMM8, 8);
10009 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10010}
10011
10012/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10013/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10014/// in the sae parameter
10015///
10016/// Rounding is done according to the imm8 parameter, which can be one of:
10017///
10018/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10019/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10020/// * [`_MM_FROUND_TO_POS_INF`] : round up
10021/// * [`_MM_FROUND_TO_ZERO`] : truncate
10022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10023///
10024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10025#[inline]
10026#[target_feature(enable = "avx512fp16")]
10027#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10028#[rustc_legacy_const_generics(1, 2)]
10029#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10030pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10031 static_assert_uimm_bits!(IMM8, 8);
10032 static_assert_sae!(SAE);
10033 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10034}
10035
10036/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10037/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10038/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10039/// in the sae parameter
10040///
10041/// Rounding is done according to the imm8 parameter, which can be one of:
10042///
10043/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10044/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10045/// * [`_MM_FROUND_TO_POS_INF`] : round up
10046/// * [`_MM_FROUND_TO_ZERO`] : truncate
10047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10048///
10049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10050#[inline]
10051#[target_feature(enable = "avx512fp16")]
10052#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10053#[rustc_legacy_const_generics(3, 4)]
10054#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10055pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10056 src: __m512h,
10057 k: __mmask32,
10058 a: __m512h,
10059) -> __m512h {
10060 unsafe {
10061 static_assert_uimm_bits!(IMM8, 8);
10062 static_assert_sae!(SAE);
10063 vrndscaleph_512(a, IMM8, src, k, SAE)
10064 }
10065}
10066
10067/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10068/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10069/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10070///
10071/// Rounding is done according to the imm8 parameter, which can be one of:
10072///
10073/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10074/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10075/// * [`_MM_FROUND_TO_POS_INF`] : round up
10076/// * [`_MM_FROUND_TO_ZERO`] : truncate
10077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10078///
10079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10080#[inline]
10081#[target_feature(enable = "avx512fp16")]
10082#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10083#[rustc_legacy_const_generics(2, 3)]
10084#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10085pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10086 k: __mmask32,
10087 a: __m512h,
10088) -> __m512h {
10089 static_assert_uimm_bits!(IMM8, 8);
10090 static_assert_sae!(SAE);
10091 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10092}
10093
10094/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10095/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10096/// from a to the upper elements of dst.
10097///
10098/// Rounding is done according to the imm8 parameter, which can be one of:
10099///
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10107#[inline]
10108#[target_feature(enable = "avx512fp16")]
10109#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10110#[rustc_legacy_const_generics(2)]
10111#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10112pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10113 static_assert_uimm_bits!(IMM8, 8);
10114 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10115}
10116
10117/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10118/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10119/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10120///
10121/// Rounding is done according to the imm8 parameter, which can be one of:
10122///
10123/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10124/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10125/// * [`_MM_FROUND_TO_POS_INF`] : round up
10126/// * [`_MM_FROUND_TO_ZERO`] : truncate
10127/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10128///
10129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10130#[inline]
10131#[target_feature(enable = "avx512fp16")]
10132#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10133#[rustc_legacy_const_generics(4)]
10134#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10135pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10136 src: __m128h,
10137 k: __mmask8,
10138 a: __m128h,
10139 b: __m128h,
10140) -> __m128h {
10141 static_assert_uimm_bits!(IMM8, 8);
10142 _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10143}
10144
10145/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10146/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10147/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10148///
10149/// Rounding is done according to the imm8 parameter, which can be one of:
10150///
10151/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10152/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10153/// * [`_MM_FROUND_TO_POS_INF`] : round up
10154/// * [`_MM_FROUND_TO_ZERO`] : truncate
10155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10156///
10157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10158#[inline]
10159#[target_feature(enable = "avx512fp16")]
10160#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10161#[rustc_legacy_const_generics(3)]
10162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10163pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10164 static_assert_uimm_bits!(IMM8, 8);
10165 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10166}
10167
10168/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10169/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10170/// from a to the upper elements of dst.
10171///
10172/// Rounding is done according to the imm8 parameter, which can be one of:
10173///
10174/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10175/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10176/// * [`_MM_FROUND_TO_POS_INF`] : round up
10177/// * [`_MM_FROUND_TO_ZERO`] : truncate
10178/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10179///
10180/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10181///
10182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10183#[inline]
10184#[target_feature(enable = "avx512fp16")]
10185#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10186#[rustc_legacy_const_generics(2, 3)]
10187#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10188pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10189 static_assert_uimm_bits!(IMM8, 8);
10190 static_assert_sae!(SAE);
10191 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10192}
10193
10194/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10195/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10196/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10197///
10198/// Rounding is done according to the imm8 parameter, which can be one of:
10199///
10200/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10201/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10202/// * [`_MM_FROUND_TO_POS_INF`] : round up
10203/// * [`_MM_FROUND_TO_ZERO`] : truncate
10204/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10205///
10206/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10207///
10208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10209#[inline]
10210#[target_feature(enable = "avx512fp16")]
10211#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10212#[rustc_legacy_const_generics(4, 5)]
10213#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10214pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10215 src: __m128h,
10216 k: __mmask8,
10217 a: __m128h,
10218 b: __m128h,
10219) -> __m128h {
10220 unsafe {
10221 static_assert_uimm_bits!(IMM8, 8);
10222 static_assert_sae!(SAE);
10223 vrndscalesh(a, b, src, k, IMM8, SAE)
10224 }
10225}
10226
10227/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10228/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10229/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10230///
10231/// Rounding is done according to the imm8 parameter, which can be one of:
10232///
10233/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10234/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10235/// * [`_MM_FROUND_TO_POS_INF`] : round up
10236/// * [`_MM_FROUND_TO_ZERO`] : truncate
10237/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10238///
10239/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10240///
10241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10242#[inline]
10243#[target_feature(enable = "avx512fp16")]
10244#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10245#[rustc_legacy_const_generics(3, 4)]
10246#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10247pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10248 k: __mmask8,
10249 a: __m128h,
10250 b: __m128h,
10251) -> __m128h {
10252 static_assert_uimm_bits!(IMM8, 8);
10253 static_assert_sae!(SAE);
10254 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
10255}
10256
10257/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10258/// the results in dst.
10259///
10260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10261#[inline]
10262#[target_feature(enable = "avx512fp16,avx512vl")]
10263#[cfg_attr(test, assert_instr(vscalefph))]
10264#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10265pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10266 _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b)
10267}
10268
10269/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10270/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10271///
10272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10273#[inline]
10274#[target_feature(enable = "avx512fp16,avx512vl")]
10275#[cfg_attr(test, assert_instr(vscalefph))]
10276#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10277pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10278 unsafe { vscalefph_128(a, b, src, k) }
10279}
10280
10281/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10282/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10283///
10284/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10285#[inline]
10286#[target_feature(enable = "avx512fp16,avx512vl")]
10287#[cfg_attr(test, assert_instr(vscalefph))]
10288#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10289pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10290 _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10291}
10292
10293/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10294/// the results in dst.
10295///
10296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10297#[inline]
10298#[target_feature(enable = "avx512fp16,avx512vl")]
10299#[cfg_attr(test, assert_instr(vscalefph))]
10300#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10301pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10302 _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b)
10303}
10304
10305/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10306/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10307///
10308/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10309#[inline]
10310#[target_feature(enable = "avx512fp16,avx512vl")]
10311#[cfg_attr(test, assert_instr(vscalefph))]
10312#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10313pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10314 unsafe { vscalefph_256(a, b, src, k) }
10315}
10316
10317/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10318/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10319///
10320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10321#[inline]
10322#[target_feature(enable = "avx512fp16,avx512vl")]
10323#[cfg_attr(test, assert_instr(vscalefph))]
10324#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10325pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10326 _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10327}
10328
10329/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10330/// the results in dst.
10331///
10332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10333#[inline]
10334#[target_feature(enable = "avx512fp16")]
10335#[cfg_attr(test, assert_instr(vscalefph))]
10336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10337pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10338 _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10339}
10340
10341/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10342/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10343///
10344/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10345#[inline]
10346#[target_feature(enable = "avx512fp16")]
10347#[cfg_attr(test, assert_instr(vscalefph))]
10348#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10349pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10350 _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10351}
10352
10353/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10354/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10355///
10356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10357#[inline]
10358#[target_feature(enable = "avx512fp16")]
10359#[cfg_attr(test, assert_instr(vscalefph))]
10360#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10361pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10362 _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10363}
10364
10365/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10366/// the results in dst.
10367///
10368/// Rounding is done according to the rounding parameter, which can be one of:
10369///
10370/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10371/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10372/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10373/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10374/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10375///
10376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10377#[inline]
10378#[target_feature(enable = "avx512fp16")]
10379#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10380#[rustc_legacy_const_generics(2)]
10381#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10382pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10383 static_assert_rounding!(ROUNDING);
10384 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10385}
10386
10387/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10388/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10389///
10390/// Rounding is done according to the rounding parameter, which can be one of:
10391///
10392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10397///
10398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10399#[inline]
10400#[target_feature(enable = "avx512fp16")]
10401#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10402#[rustc_legacy_const_generics(4)]
10403#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10404pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10405 src: __m512h,
10406 k: __mmask32,
10407 a: __m512h,
10408 b: __m512h,
10409) -> __m512h {
10410 unsafe {
10411 static_assert_rounding!(ROUNDING);
10412 vscalefph_512(a, b, src, k, ROUNDING)
10413 }
10414}
10415
10416/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10417/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10418///
10419/// Rounding is done according to the rounding parameter, which can be one of:
10420///
10421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10426///
10427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10428#[inline]
10429#[target_feature(enable = "avx512fp16")]
10430#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10431#[rustc_legacy_const_generics(3)]
10432#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10433pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10434 k: __mmask32,
10435 a: __m512h,
10436 b: __m512h,
10437) -> __m512h {
10438 static_assert_rounding!(ROUNDING);
10439 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10440}
10441
10442/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10443/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10444/// elements of dst.
10445///
10446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10447#[inline]
10448#[target_feature(enable = "avx512fp16")]
10449#[cfg_attr(test, assert_instr(vscalefsh))]
10450#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10451pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10452 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10453}
10454
10455/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10456/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10457/// and copy the upper 7 packed elements from a to the upper elements of dst.
10458///
10459/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10460#[inline]
10461#[target_feature(enable = "avx512fp16")]
10462#[cfg_attr(test, assert_instr(vscalefsh))]
10463#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10464pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10465 _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10466}
10467
10468/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10469/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10470/// and copy the upper 7 packed elements from a to the upper elements of dst.
10471///
10472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10473#[inline]
10474#[target_feature(enable = "avx512fp16")]
10475#[cfg_attr(test, assert_instr(vscalefsh))]
10476#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10477pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10478 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
10479}
10480
10481/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10482/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10483/// elements of dst.
10484///
10485/// Rounding is done according to the rounding parameter, which can be one of:
10486///
10487/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10488/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10489/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10490/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10491/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10492///
10493/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10494#[inline]
10495#[target_feature(enable = "avx512fp16")]
10496#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10497#[rustc_legacy_const_generics(2)]
10498#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10499pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10500 static_assert_rounding!(ROUNDING);
10501 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10502}
10503
10504/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10505/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10506/// and copy the upper 7 packed elements from a to the upper elements of dst.
10507///
10508/// Rounding is done according to the rounding parameter, which can be one of:
10509///
10510/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10511/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10512/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10513/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10514/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10515///
10516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10517#[inline]
10518#[target_feature(enable = "avx512fp16")]
10519#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10520#[rustc_legacy_const_generics(4)]
10521#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10522pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10523 src: __m128h,
10524 k: __mmask8,
10525 a: __m128h,
10526 b: __m128h,
10527) -> __m128h {
10528 unsafe {
10529 static_assert_rounding!(ROUNDING);
10530 vscalefsh(a, b, src, k, ROUNDING)
10531 }
10532}
10533
10534/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10535/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10536/// and copy the upper 7 packed elements from a to the upper elements of dst.
10537///
10538/// Rounding is done according to the rounding parameter, which can be one of:
10539///
10540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10545///
10546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10547#[inline]
10548#[target_feature(enable = "avx512fp16")]
10549#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10550#[rustc_legacy_const_generics(3)]
10551#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10552pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10553 k: __mmask8,
10554 a: __m128h,
10555 b: __m128h,
10556) -> __m128h {
10557 static_assert_rounding!(ROUNDING);
10558 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
10559}
10560
10561/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10562/// number of bits specified by imm8, and store the results in dst.
10563///
10564/// Rounding is done according to the imm8 parameter, which can be one of:
10565///
10566/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10567/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10568/// * [`_MM_FROUND_TO_POS_INF`] : round up
10569/// * [`_MM_FROUND_TO_ZERO`] : truncate
10570/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10571///
10572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10573#[inline]
10574#[target_feature(enable = "avx512fp16,avx512vl")]
10575#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10576#[rustc_legacy_const_generics(1)]
10577#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10578pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10579 static_assert_uimm_bits!(IMM8, 8);
10580 _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
10581}
10582
10583/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10584/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10585/// from src when the corresponding mask bit is not set).
10586///
10587/// Rounding is done according to the imm8 parameter, which can be one of:
10588///
10589/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10590/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10591/// * [`_MM_FROUND_TO_POS_INF`] : round up
10592/// * [`_MM_FROUND_TO_ZERO`] : truncate
10593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10594///
10595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10596#[inline]
10597#[target_feature(enable = "avx512fp16,avx512vl")]
10598#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10599#[rustc_legacy_const_generics(3)]
10600#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10601pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10602 unsafe {
10603 static_assert_uimm_bits!(IMM8, 8);
10604 vreduceph_128(a, IMM8, src, k)
10605 }
10606}
10607
10608/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10609/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10610/// out when the corresponding mask bit is not set).
10611///
10612/// Rounding is done according to the imm8 parameter, which can be one of:
10613///
10614/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10615/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10616/// * [`_MM_FROUND_TO_POS_INF`] : round up
10617/// * [`_MM_FROUND_TO_ZERO`] : truncate
10618/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10619///
10620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10621#[inline]
10622#[target_feature(enable = "avx512fp16,avx512vl")]
10623#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10624#[rustc_legacy_const_generics(2)]
10625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10626pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10627 static_assert_uimm_bits!(IMM8, 8);
10628 _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10629}
10630
10631/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10632/// number of bits specified by imm8, and store the results in dst.
10633///
10634/// Rounding is done according to the imm8 parameter, which can be one of:
10635///
10636/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10637/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10638/// * [`_MM_FROUND_TO_POS_INF`] : round up
10639/// * [`_MM_FROUND_TO_ZERO`] : truncate
10640/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10641///
10642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10643#[inline]
10644#[target_feature(enable = "avx512fp16,avx512vl")]
10645#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10646#[rustc_legacy_const_generics(1)]
10647#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10648pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10649 static_assert_uimm_bits!(IMM8, 8);
10650 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
10651}
10652
10653/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10654/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10655/// from src when the corresponding mask bit is not set).
10656///
10657/// Rounding is done according to the imm8 parameter, which can be one of:
10658///
10659/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10660/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10661/// * [`_MM_FROUND_TO_POS_INF`] : round up
10662/// * [`_MM_FROUND_TO_ZERO`] : truncate
10663/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10664///
10665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10666#[inline]
10667#[target_feature(enable = "avx512fp16,avx512vl")]
10668#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10669#[rustc_legacy_const_generics(3)]
10670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10671pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10672 unsafe {
10673 static_assert_uimm_bits!(IMM8, 8);
10674 vreduceph_256(a, IMM8, src, k)
10675 }
10676}
10677
10678/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10679/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10680/// out when the corresponding mask bit is not set).
10681///
10682/// Rounding is done according to the imm8 parameter, which can be one of:
10683///
10684/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10685/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10686/// * [`_MM_FROUND_TO_POS_INF`] : round up
10687/// * [`_MM_FROUND_TO_ZERO`] : truncate
10688/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10689///
10690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10691#[inline]
10692#[target_feature(enable = "avx512fp16,avx512vl")]
10693#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10694#[rustc_legacy_const_generics(2)]
10695#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10696pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10697 static_assert_uimm_bits!(IMM8, 8);
10698 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10699}
10700
10701/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10702/// number of bits specified by imm8, and store the results in dst.
10703///
10704/// Rounding is done according to the imm8 parameter, which can be one of:
10705///
10706/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10707/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10708/// * [`_MM_FROUND_TO_POS_INF`] : round up
10709/// * [`_MM_FROUND_TO_ZERO`] : truncate
10710/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10711///
10712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
10713#[inline]
10714#[target_feature(enable = "avx512fp16")]
10715#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10716#[rustc_legacy_const_generics(1)]
10717#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10718pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10719 static_assert_uimm_bits!(IMM8, 8);
10720 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10721}
10722
10723/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10724/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10725/// from src when the corresponding mask bit is not set).
10726///
10727/// Rounding is done according to the imm8 parameter, which can be one of:
10728///
10729/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10730/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10731/// * [`_MM_FROUND_TO_POS_INF`] : round up
10732/// * [`_MM_FROUND_TO_ZERO`] : truncate
10733/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10734///
10735/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
10736#[inline]
10737#[target_feature(enable = "avx512fp16")]
10738#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10739#[rustc_legacy_const_generics(3)]
10740#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10741pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
10742 static_assert_uimm_bits!(IMM8, 8);
10743 _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10744}
10745
10746/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10747/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10748/// out when the corresponding mask bit is not set).
10749///
10750/// Rounding is done according to the imm8 parameter, which can be one of:
10751///
10752/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10753/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10754/// * [`_MM_FROUND_TO_POS_INF`] : round up
10755/// * [`_MM_FROUND_TO_ZERO`] : truncate
10756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10757///
10758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
10759#[inline]
10760#[target_feature(enable = "avx512fp16")]
10761#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10762#[rustc_legacy_const_generics(2)]
10763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10764pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10765 static_assert_uimm_bits!(IMM8, 8);
10766 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10767}
10768
10769/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10770/// number of bits specified by imm8, and store the results in dst.
10771///
10772/// Rounding is done according to the imm8 parameter, which can be one of:
10773///
10774/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10775/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10776/// * [`_MM_FROUND_TO_POS_INF`] : round up
10777/// * [`_MM_FROUND_TO_ZERO`] : truncate
10778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10779///
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
10783#[inline]
10784#[target_feature(enable = "avx512fp16")]
10785#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10786#[rustc_legacy_const_generics(1, 2)]
10787#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10788pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10789 static_assert_uimm_bits!(IMM8, 8);
10790 static_assert_sae!(SAE);
10791 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10792}
10793
10794/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10795/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10796/// from src when the corresponding mask bit is not set).
10797///
10798/// Rounding is done according to the imm8 parameter, which can be one of:
10799///
10800/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10801/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10802/// * [`_MM_FROUND_TO_POS_INF`] : round up
10803/// * [`_MM_FROUND_TO_ZERO`] : truncate
10804/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10805///
10806/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10807///
10808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
10809#[inline]
10810#[target_feature(enable = "avx512fp16")]
10811#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10812#[rustc_legacy_const_generics(3, 4)]
10813#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10814pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10815 src: __m512h,
10816 k: __mmask32,
10817 a: __m512h,
10818) -> __m512h {
10819 unsafe {
10820 static_assert_uimm_bits!(IMM8, 8);
10821 static_assert_sae!(SAE);
10822 vreduceph_512(a, IMM8, src, k, SAE)
10823 }
10824}
10825
10826/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10827/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10828/// out when the corresponding mask bit is not set).
10829///
10830/// Rounding is done according to the imm8 parameter, which can be one of:
10831///
10832/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10833/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10834/// * [`_MM_FROUND_TO_POS_INF`] : round up
10835/// * [`_MM_FROUND_TO_ZERO`] : truncate
10836/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10837///
10838/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10839///
10840/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
10841#[inline]
10842#[target_feature(enable = "avx512fp16")]
10843#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10844#[rustc_legacy_const_generics(2, 3)]
10845#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10846pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10847 k: __mmask32,
10848 a: __m512h,
10849) -> __m512h {
10850 static_assert_uimm_bits!(IMM8, 8);
10851 static_assert_sae!(SAE);
10852 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10853}
10854
10855/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10856/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
10857/// upper 7 packed elements from a to the upper elements of dst.
10858///
10859/// Rounding is done according to the imm8 parameter, which can be one of:
10860///
10861/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10862/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10863/// * [`_MM_FROUND_TO_POS_INF`] : round up
10864/// * [`_MM_FROUND_TO_ZERO`] : truncate
10865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10866///
10867/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
10868#[inline]
10869#[target_feature(enable = "avx512fp16")]
10870#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10871#[rustc_legacy_const_generics(2)]
10872#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10873pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10874 static_assert_uimm_bits!(IMM8, 8);
10875 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10876}
10877
10878/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10879/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10880/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
10881/// a to the upper elements of dst.
10882///
10883/// Rounding is done according to the imm8 parameter, which can be one of:
10884///
10885/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10886/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10887/// * [`_MM_FROUND_TO_POS_INF`] : round up
10888/// * [`_MM_FROUND_TO_ZERO`] : truncate
10889/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10890///
10891/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
10892#[inline]
10893#[target_feature(enable = "avx512fp16")]
10894#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10895#[rustc_legacy_const_generics(4)]
10896#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10897pub fn _mm_mask_reduce_sh<const IMM8: i32>(
10898 src: __m128h,
10899 k: __mmask8,
10900 a: __m128h,
10901 b: __m128h,
10902) -> __m128h {
10903 static_assert_uimm_bits!(IMM8, 8);
10904 _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10905}
10906
10907/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10908/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10909/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10910/// to the upper elements of dst.
10911///
10912/// Rounding is done according to the imm8 parameter, which can be one of:
10913///
10914/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10915/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10916/// * [`_MM_FROUND_TO_POS_INF`] : round up
10917/// * [`_MM_FROUND_TO_ZERO`] : truncate
10918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10919///
10920/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
10921#[inline]
10922#[target_feature(enable = "avx512fp16")]
10923#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10924#[rustc_legacy_const_generics(3)]
10925#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10926pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10927 static_assert_uimm_bits!(IMM8, 8);
10928 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10929}
10930
10931/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10932/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
10933/// 7 packed elements from a to the upper elements of dst.
10934///
10935/// Rounding is done according to the imm8 parameter, which can be one of:
10936///
10937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10939/// * [`_MM_FROUND_TO_POS_INF`] : round up
10940/// * [`_MM_FROUND_TO_ZERO`] : truncate
10941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10942///
10943/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10944///
10945/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
10946#[inline]
10947#[target_feature(enable = "avx512fp16")]
10948#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
10949#[rustc_legacy_const_generics(2, 3)]
10950#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10951pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10952 static_assert_uimm_bits!(IMM8, 8);
10953 static_assert_sae!(SAE);
10954 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10955}
10956
10957/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10958/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10959/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
10960/// to the upper elements of dst.
10961///
10962/// Rounding is done according to the imm8 parameter, which can be one of:
10963///
10964/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10965/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10966/// * [`_MM_FROUND_TO_POS_INF`] : round up
10967/// * [`_MM_FROUND_TO_ZERO`] : truncate
10968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10969///
10970/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10971///
10972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
10973#[inline]
10974#[target_feature(enable = "avx512fp16")]
10975#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
10976#[rustc_legacy_const_generics(4, 5)]
10977#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10978pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
10979 src: __m128h,
10980 k: __mmask8,
10981 a: __m128h,
10982 b: __m128h,
10983) -> __m128h {
10984 unsafe {
10985 static_assert_uimm_bits!(IMM8, 8);
10986 static_assert_sae!(SAE);
10987 vreducesh(a, b, src, k, IMM8, SAE)
10988 }
10989}
10990
10991/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10992/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
10993/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
10994/// to the upper elements of dst.
10995///
10996/// Rounding is done according to the imm8 parameter, which can be one of:
10997///
10998/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10999/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11000/// * [`_MM_FROUND_TO_POS_INF`] : round up
11001/// * [`_MM_FROUND_TO_ZERO`] : truncate
11002/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11003///
11004/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11005///
11006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
11007#[inline]
11008#[target_feature(enable = "avx512fp16")]
11009#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11010#[rustc_legacy_const_generics(3, 4)]
11011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11012pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11013 k: __mmask8,
11014 a: __m128h,
11015 b: __m128h,
11016) -> __m128h {
11017 static_assert_uimm_bits!(IMM8, 8);
11018 static_assert_sae!(SAE);
11019 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
11020}
11021
11022/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11023/// sum of all elements in a.
11024///
11025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11026#[inline]
11027#[target_feature(enable = "avx512fp16,avx512vl")]
11028#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11029pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11030 unsafe {
11031 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11032 let a: __m128h = _mm_add_ph(a, b);
11033 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11034 let a: __m128h = _mm_add_ph(a, b);
11035 simd_extract::<_, f16>(x:a, idx:0) + simd_extract::<_, f16>(x:a, idx:1)
11036 }
11037}
11038
11039/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11040/// sum of all elements in a.
11041///
11042/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11043#[inline]
11044#[target_feature(enable = "avx512fp16,avx512vl")]
11045#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11046pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11047 unsafe {
11048 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11049 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11050 _mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11051 }
11052}
11053
11054/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11055/// sum of all elements in a.
11056///
11057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11058#[inline]
11059#[target_feature(enable = "avx512fp16")]
11060#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11061pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11062 unsafe {
11063 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11064 let q: __m256h = simd_shuffle!(
11065 a,
11066 a,
11067 [
11068 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11069 ]
11070 );
11071 _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11072 }
11073}
11074
11075/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11076/// the product of all elements in a.
11077///
11078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11079#[inline]
11080#[target_feature(enable = "avx512fp16,avx512vl")]
11081#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11082pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11083 unsafe {
11084 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11085 let a: __m128h = _mm_mul_ph(a, b);
11086 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11087 let a: __m128h = _mm_mul_ph(a, b);
11088 simd_extract::<_, f16>(x:a, idx:0) * simd_extract::<_, f16>(x:a, idx:1)
11089 }
11090}
11091
11092/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11093/// the product of all elements in a.
11094///
11095/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11096#[inline]
11097#[target_feature(enable = "avx512fp16,avx512vl")]
11098#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11099pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11100 unsafe {
11101 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11102 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11103 _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11104 }
11105}
11106
11107/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11108/// the product of all elements in a.
11109///
11110/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11111#[inline]
11112#[target_feature(enable = "avx512fp16")]
11113#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11114pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11115 unsafe {
11116 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11117 let q: __m256h = simd_shuffle!(
11118 a,
11119 a,
11120 [
11121 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11122 ]
11123 );
11124 _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11125 }
11126}
11127
11128/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11129/// minimum of all elements in a.
11130///
11131/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11132#[inline]
11133#[target_feature(enable = "avx512fp16,avx512vl")]
11134#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11135pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11136 unsafe {
11137 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11138 let a: __m128h = _mm_min_ph(a, b);
11139 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11140 let a: __m128h = _mm_min_ph(a, b);
11141 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11142 simd_extract!(_mm_min_sh(a, b), 0)
11143 }
11144}
11145
11146/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11147/// minimum of all elements in a.
11148///
11149/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11150#[inline]
11151#[target_feature(enable = "avx512fp16,avx512vl")]
11152#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11153pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11154 unsafe {
11155 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11156 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11157 _mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11158 }
11159}
11160
11161/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11162/// minimum of all elements in a.
11163///
11164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11165#[inline]
11166#[target_feature(enable = "avx512fp16")]
11167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11168pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11169 unsafe {
11170 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11171 let q: __m256h = simd_shuffle!(
11172 a,
11173 a,
11174 [
11175 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11176 ]
11177 );
11178 _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11179 }
11180}
11181
11182/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11183/// maximum of all elements in a.
11184///
11185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11186#[inline]
11187#[target_feature(enable = "avx512fp16,avx512vl")]
11188#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11189pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11190 unsafe {
11191 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11192 let a: __m128h = _mm_max_ph(a, b);
11193 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11194 let a: __m128h = _mm_max_ph(a, b);
11195 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11196 simd_extract!(_mm_max_sh(a, b), 0)
11197 }
11198}
11199
11200/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11201/// maximum of all elements in a.
11202///
11203/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11204#[inline]
11205#[target_feature(enable = "avx512fp16,avx512vl")]
11206#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11207pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11208 unsafe {
11209 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11210 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11211 _mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11212 }
11213}
11214
11215/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11216/// maximum of all elements in a.
11217///
11218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11219#[inline]
11220#[target_feature(enable = "avx512fp16")]
11221#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11222pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11223 unsafe {
11224 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11225 let q: __m256h = simd_shuffle!(
11226 a,
11227 a,
11228 [
11229 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11230 ]
11231 );
11232 _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11233 }
11234}
11235
11236macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11237 ($mask_type: ty, $reg: ident, $a: expr) => {{
11238 let dst: $mask_type;
11239 asm!(
11240 "vfpclassph {k}, {src}, {imm8}",
11241 k = lateout(kreg) dst,
11242 src = in($reg) $a,
11243 imm8 = const IMM8,
11244 options(pure, nomem, nostack)
11245 );
11246 dst
11247 }};
11248 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11249 let dst: $mask_type;
11250 asm!(
11251 "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11252 k = lateout(kreg) dst,
11253 mask = in(kreg) $mask,
11254 src = in($reg) $a,
11255 imm8 = const IMM8,
11256 options(pure, nomem, nostack)
11257 );
11258 dst
11259 }};
11260}
11261
11262/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11263/// by imm8, and store the results in mask vector k.
11264/// imm can be a combination of:
11265///
11266/// 0x01 // QNaN
11267/// 0x02 // Positive Zero
11268/// 0x04 // Negative Zero
11269/// 0x08 // Positive Infinity
11270/// 0x10 // Negative Infinity
11271/// 0x20 // Denormal
11272/// 0x40 // Negative
11273/// 0x80 // SNaN
11274///
11275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11276#[inline]
11277#[target_feature(enable = "avx512fp16,avx512vl")]
11278#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11279#[rustc_legacy_const_generics(1)]
11280#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11281pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11282 unsafe {
11283 static_assert_uimm_bits!(IMM8, 8);
11284 fpclass_asm!(__mmask8, xmm_reg, a)
11285 }
11286}
11287
11288/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11289/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11290/// corresponding mask bit is not set).
11291/// imm can be a combination of:
11292///
11293/// 0x01 // QNaN
11294/// 0x02 // Positive Zero
11295/// 0x04 // Negative Zero
11296/// 0x08 // Positive Infinity
11297/// 0x10 // Negative Infinity
11298/// 0x20 // Denormal
11299/// 0x40 // Negative
11300/// 0x80 // SNaN
11301///
11302/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11303#[inline]
11304#[target_feature(enable = "avx512fp16,avx512vl")]
11305#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11306#[rustc_legacy_const_generics(2)]
11307#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11308pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11309 unsafe {
11310 static_assert_uimm_bits!(IMM8, 8);
11311 fpclass_asm!(__mmask8, k1, xmm_reg, a)
11312 }
11313}
11314
11315/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11316/// by imm8, and store the results in mask vector k.
11317/// imm can be a combination of:
11318///
11319/// 0x01 // QNaN
11320/// 0x02 // Positive Zero
11321/// 0x04 // Negative Zero
11322/// 0x08 // Positive Infinity
11323/// 0x10 // Negative Infinity
11324/// 0x20 // Denormal
11325/// 0x40 // Negative
11326/// 0x80 // SNaN
11327///
11328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11329#[inline]
11330#[target_feature(enable = "avx512fp16,avx512vl")]
11331#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11332#[rustc_legacy_const_generics(1)]
11333#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11334pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11335 unsafe {
11336 static_assert_uimm_bits!(IMM8, 8);
11337 fpclass_asm!(__mmask16, ymm_reg, a)
11338 }
11339}
11340
11341/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11342/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11343/// corresponding mask bit is not set).
11344/// imm can be a combination of:
11345///
11346/// 0x01 // QNaN
11347/// 0x02 // Positive Zero
11348/// 0x04 // Negative Zero
11349/// 0x08 // Positive Infinity
11350/// 0x10 // Negative Infinity
11351/// 0x20 // Denormal
11352/// 0x40 // Negative
11353/// 0x80 // SNaN
11354///
11355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11356#[inline]
11357#[target_feature(enable = "avx512fp16,avx512vl")]
11358#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11359#[rustc_legacy_const_generics(2)]
11360#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11361pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11362 unsafe {
11363 static_assert_uimm_bits!(IMM8, 8);
11364 fpclass_asm!(__mmask16, k1, ymm_reg, a)
11365 }
11366}
11367
11368/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11369/// by imm8, and store the results in mask vector k.
11370/// imm can be a combination of:
11371///
11372/// 0x01 // QNaN
11373/// 0x02 // Positive Zero
11374/// 0x04 // Negative Zero
11375/// 0x08 // Positive Infinity
11376/// 0x10 // Negative Infinity
11377/// 0x20 // Denormal
11378/// 0x40 // Negative
11379/// 0x80 // SNaN
11380///
11381/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11382#[inline]
11383#[target_feature(enable = "avx512fp16")]
11384#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11385#[rustc_legacy_const_generics(1)]
11386#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11387pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11388 unsafe {
11389 static_assert_uimm_bits!(IMM8, 8);
11390 fpclass_asm!(__mmask32, zmm_reg, a)
11391 }
11392}
11393
11394/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11395/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11396/// corresponding mask bit is not set).
11397/// imm can be a combination of:
11398///
11399/// 0x01 // QNaN
11400/// 0x02 // Positive Zero
11401/// 0x04 // Negative Zero
11402/// 0x08 // Positive Infinity
11403/// 0x10 // Negative Infinity
11404/// 0x20 // Denormal
11405/// 0x40 // Negative
11406/// 0x80 // SNaN
11407///
11408/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11409#[inline]
11410#[target_feature(enable = "avx512fp16")]
11411#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11412#[rustc_legacy_const_generics(2)]
11413#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11414pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11415 unsafe {
11416 static_assert_uimm_bits!(IMM8, 8);
11417 fpclass_asm!(__mmask32, k1, zmm_reg, a)
11418 }
11419}
11420
11421/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11422/// by imm8, and store the result in mask vector k.
11423/// imm can be a combination of:
11424///
11425/// 0x01 // QNaN
11426/// 0x02 // Positive Zero
11427/// 0x04 // Negative Zero
11428/// 0x08 // Positive Infinity
11429/// 0x10 // Negative Infinity
11430/// 0x20 // Denormal
11431/// 0x40 // Negative
11432/// 0x80 // SNaN
11433///
11434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11435#[inline]
11436#[target_feature(enable = "avx512fp16")]
11437#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11438#[rustc_legacy_const_generics(1)]
11439#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11440pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11441 _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a)
11442}
11443
11444/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11445/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11446/// corresponding mask bit is not set).
11447/// imm can be a combination of:
11448///
11449/// 0x01 // QNaN
11450/// 0x02 // Positive Zero
11451/// 0x04 // Negative Zero
11452/// 0x08 // Positive Infinity
11453/// 0x10 // Negative Infinity
11454/// 0x20 // Denormal
11455/// 0x40 // Negative
11456/// 0x80 // SNaN
11457///
11458/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11459#[inline]
11460#[target_feature(enable = "avx512fp16")]
11461#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11462#[rustc_legacy_const_generics(2)]
11463#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11464pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11465 unsafe {
11466 static_assert_uimm_bits!(IMM8, 8);
11467 vfpclasssh(a, IMM8, k:k1)
11468 }
11469}
11470
11471/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11472/// and store the results in dst.
11473///
11474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11475#[inline]
11476#[target_feature(enable = "avx512fp16,avx512vl")]
11477#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11478pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11479 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11480}
11481
11482/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11483/// and store the results in dst.
11484///
11485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11486#[inline]
11487#[target_feature(enable = "avx512fp16,avx512vl")]
11488#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11489pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11490 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11491}
11492
11493/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11494/// and store the results in dst.
11495///
11496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11497#[inline]
11498#[target_feature(enable = "avx512fp16")]
11499#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11500pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11501 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11502}
11503
11504/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11505/// and index in idx, and store the results in dst.
11506///
11507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11508#[inline]
11509#[target_feature(enable = "avx512fp16,avx512vl")]
11510#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11511pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11512 _mm_castsi128_ph(_mm_permutex2var_epi16(
11513 a:_mm_castph_si128(a),
11514 idx,
11515 b:_mm_castph_si128(b),
11516 ))
11517}
11518
11519/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11520/// and index in idx, and store the results in dst.
11521///
11522/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11523#[inline]
11524#[target_feature(enable = "avx512fp16,avx512vl")]
11525#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11526pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11527 _mm256_castsi256_ph(_mm256_permutex2var_epi16(
11528 a:_mm256_castph_si256(a),
11529 idx,
11530 b:_mm256_castph_si256(b),
11531 ))
11532}
11533
11534/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11535/// and index in idx, and store the results in dst.
11536///
11537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11538#[inline]
11539#[target_feature(enable = "avx512fp16")]
11540#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11541pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11542 _mm512_castsi512_ph(_mm512_permutex2var_epi16(
11543 a:_mm512_castph_si512(a),
11544 idx,
11545 b:_mm512_castph_si512(b),
11546 ))
11547}
11548
11549/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11550/// and store the results in dst.
11551///
11552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11553#[inline]
11554#[target_feature(enable = "avx512fp16,avx512vl")]
11555#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11556pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11557 _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11558}
11559
11560/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11561/// and store the results in dst.
11562///
11563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11564#[inline]
11565#[target_feature(enable = "avx512fp16,avx512vl")]
11566#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11567pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11568 _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11569}
11570
11571/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11572/// and store the results in dst.
11573///
11574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11575#[inline]
11576#[target_feature(enable = "avx512fp16")]
11577#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11578pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11579 _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11580}
11581
11582/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11583/// and store the results in dst.
11584///
11585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11586#[inline]
11587#[target_feature(enable = "avx512fp16,avx512vl")]
11588#[cfg_attr(test, assert_instr(vcvtw2ph))]
11589#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11590pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11591 unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11592}
11593
11594/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11595/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11596/// mask bit is not set).
11597///
11598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11599#[inline]
11600#[target_feature(enable = "avx512fp16,avx512vl")]
11601#[cfg_attr(test, assert_instr(vcvtw2ph))]
11602#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11603pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11604 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11605}
11606
11607/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11608/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11609///
11610/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11611#[inline]
11612#[target_feature(enable = "avx512fp16,avx512vl")]
11613#[cfg_attr(test, assert_instr(vcvtw2ph))]
11614#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11615pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11616 _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11617}
11618
11619/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11620/// and store the results in dst.
11621///
11622/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11623#[inline]
11624#[target_feature(enable = "avx512fp16,avx512vl")]
11625#[cfg_attr(test, assert_instr(vcvtw2ph))]
11626#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11627pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11628 unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11629}
11630
11631/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11632/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11633/// mask bit is not set).
11634///
11635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11636#[inline]
11637#[target_feature(enable = "avx512fp16,avx512vl")]
11638#[cfg_attr(test, assert_instr(vcvtw2ph))]
11639#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11640pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11641 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11642}
11643
11644/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11645/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11646///
11647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11648#[inline]
11649#[target_feature(enable = "avx512fp16,avx512vl")]
11650#[cfg_attr(test, assert_instr(vcvtw2ph))]
11651#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11652pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11653 _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11654}
11655
11656/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11657/// and store the results in dst.
11658///
11659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11660#[inline]
11661#[target_feature(enable = "avx512fp16")]
11662#[cfg_attr(test, assert_instr(vcvtw2ph))]
11663#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11664pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11665 unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11666}
11667
11668/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11669/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11670/// mask bit is not set).
11671///
11672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11673#[inline]
11674#[target_feature(enable = "avx512fp16")]
11675#[cfg_attr(test, assert_instr(vcvtw2ph))]
11676#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11677pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11678 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11679}
11680
11681/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11682/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11683///
11684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11685#[inline]
11686#[target_feature(enable = "avx512fp16")]
11687#[cfg_attr(test, assert_instr(vcvtw2ph))]
11688#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11689pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11690 _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11691}
11692
11693/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11694/// and store the results in dst.
11695///
11696/// Rounding is done according to the rounding parameter, which can be one of:
11697///
11698/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11699/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11700/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11701/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11702/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11703///
11704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
11705#[inline]
11706#[target_feature(enable = "avx512fp16")]
11707#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11708#[rustc_legacy_const_generics(1)]
11709#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11710pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11711 unsafe {
11712 static_assert_rounding!(ROUNDING);
11713 vcvtw2ph_512(a.as_i16x32(), ROUNDING)
11714 }
11715}
11716
11717/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11718/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11719/// mask bit is not set).
11720///
11721/// Rounding is done according to the rounding parameter, which can be one of:
11722///
11723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11728///
11729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
11730#[inline]
11731#[target_feature(enable = "avx512fp16")]
11732#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11733#[rustc_legacy_const_generics(3)]
11734#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11735pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
11736 src: __m512h,
11737 k: __mmask32,
11738 a: __m512i,
11739) -> __m512h {
11740 unsafe {
11741 static_assert_rounding!(ROUNDING);
11742 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
11743 }
11744}
11745
11746/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11747/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11748///
11749/// Rounding is done according to the rounding parameter, which can be one of:
11750///
11751/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11752/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11753/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11754/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11755/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11756///
11757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
11758#[inline]
11759#[target_feature(enable = "avx512fp16")]
11760#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11761#[rustc_legacy_const_generics(2)]
11762#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11763pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11764 static_assert_rounding!(ROUNDING);
11765 _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11766}
11767
11768/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11769/// and store the results in dst.
11770///
11771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
11772#[inline]
11773#[target_feature(enable = "avx512fp16,avx512vl")]
11774#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11775#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11776pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
11777 unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
11778}
11779
11780/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11781/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11782/// mask bit is not set).
11783///
11784/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
11785#[inline]
11786#[target_feature(enable = "avx512fp16,avx512vl")]
11787#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11788#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11789pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11790 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
11791}
11792
11793/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11794/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11795///
11796/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
11797#[inline]
11798#[target_feature(enable = "avx512fp16,avx512vl")]
11799#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11800#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11801pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
11802 _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
11803}
11804
11805/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11806/// and store the results in dst.
11807///
11808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
11809#[inline]
11810#[target_feature(enable = "avx512fp16,avx512vl")]
11811#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11812#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11813pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
11814 unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
11815}
11816
11817/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11818/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11819/// mask bit is not set).
11820///
11821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
11822#[inline]
11823#[target_feature(enable = "avx512fp16,avx512vl")]
11824#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11825#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11826pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11827 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
11828}
11829
11830/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11831/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11832///
11833/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
11834#[inline]
11835#[target_feature(enable = "avx512fp16,avx512vl")]
11836#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11837#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11838pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
11839 _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
11840}
11841
11842/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11843/// and store the results in dst.
11844///
11845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
11846#[inline]
11847#[target_feature(enable = "avx512fp16")]
11848#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11849#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11850pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
11851 unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
11852}
11853
11854/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11855/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11856/// mask bit is not set).
11857///
11858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
11859#[inline]
11860#[target_feature(enable = "avx512fp16")]
11861#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11862#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11863pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11864 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
11865}
11866
11867/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11868/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11869///
11870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
11871#[inline]
11872#[target_feature(enable = "avx512fp16")]
11873#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11874#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11875pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
11876 _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
11877}
11878
11879/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11880/// and store the results in dst.
11881///
11882/// Rounding is done according to the rounding parameter, which can be one of:
11883///
11884/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11885/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11886/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11887/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11888/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11889///
11890/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
11891#[inline]
11892#[target_feature(enable = "avx512fp16")]
11893#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11894#[rustc_legacy_const_generics(1)]
11895#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11896pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11897 unsafe {
11898 static_assert_rounding!(ROUNDING);
11899 vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
11900 }
11901}
11902
11903/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11904/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11905/// mask bit is not set).
11906///
11907/// Rounding is done according to the rounding parameter, which can be one of:
11908///
11909/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11910/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11911/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11912/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11913/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11914///
11915/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
11916#[inline]
11917#[target_feature(enable = "avx512fp16")]
11918#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11919#[rustc_legacy_const_generics(3)]
11920#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11921pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
11922 src: __m512h,
11923 k: __mmask32,
11924 a: __m512i,
11925) -> __m512h {
11926 unsafe {
11927 static_assert_rounding!(ROUNDING);
11928 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
11929 }
11930}
11931
11932/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11933/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11934///
11935/// Rounding is done according to the rounding parameter, which can be one of:
11936///
11937/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11938/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11939/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11940/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11942///
11943/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
11944#[inline]
11945#[target_feature(enable = "avx512fp16")]
11946#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11947#[rustc_legacy_const_generics(2)]
11948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11949pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11950 static_assert_rounding!(ROUNDING);
11951 _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11952}
11953
11954/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11955/// and store the results in dst. The upper 64 bits of dst are zeroed out.
11956///
11957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
11958#[inline]
11959#[target_feature(enable = "avx512fp16,avx512vl")]
11960#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11961#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11962pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
11963 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a)
11964}
11965
11966/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11967/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11968/// mask bit is not set). The upper 64 bits of dst are zeroed out.
11969///
11970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
11971#[inline]
11972#[target_feature(enable = "avx512fp16,avx512vl")]
11973#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11974#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11975pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11976 unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
11977}
11978
11979/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11980/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11981/// The upper 64 bits of dst are zeroed out.
11982///
11983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
11984#[inline]
11985#[target_feature(enable = "avx512fp16,avx512vl")]
11986#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11987#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11988pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
11989 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
11990}
11991
11992/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
11993/// and store the results in dst.
11994///
11995/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
11996#[inline]
11997#[target_feature(enable = "avx512fp16,avx512vl")]
11998#[cfg_attr(test, assert_instr(vcvtdq2ph))]
11999#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12000pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
12001 unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
12002}
12003
12004/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12005/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12006/// mask bit is not set).
12007///
12008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
12009#[inline]
12010#[target_feature(enable = "avx512fp16,avx512vl")]
12011#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12013pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12014 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12015}
12016
12017/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12018/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12021#[inline]
12022#[target_feature(enable = "avx512fp16,avx512vl")]
12023#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12024#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12025pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12026 _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12027}
12028
12029/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12030/// and store the results in dst.
12031///
12032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12033#[inline]
12034#[target_feature(enable = "avx512fp16")]
12035#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12036#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12037pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12038 unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12039}
12040
12041/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12042/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12043/// mask bit is not set).
12044///
12045/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12046#[inline]
12047#[target_feature(enable = "avx512fp16")]
12048#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12049#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12050pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12051 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12052}
12053
12054/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12055/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12056///
12057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12058#[inline]
12059#[target_feature(enable = "avx512fp16")]
12060#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12061#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12062pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12063 _mm512_mask_cvtepi32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12064}
12065
12066/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12067/// and store the results in dst.
12068///
12069/// Rounding is done according to the rounding parameter, which can be one of:
12070///
12071/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12072/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12073/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12074/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12075/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12076///
12077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12078#[inline]
12079#[target_feature(enable = "avx512fp16")]
12080#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12081#[rustc_legacy_const_generics(1)]
12082#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12083pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12084 unsafe {
12085 static_assert_rounding!(ROUNDING);
12086 vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12087 }
12088}
12089
12090/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12091/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12092/// mask bit is not set).
12093///
12094/// Rounding is done according to the rounding parameter, which can be one of:
12095///
12096/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12097/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12098/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12099/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12101///
12102/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12103#[inline]
12104#[target_feature(enable = "avx512fp16")]
12105#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12106#[rustc_legacy_const_generics(3)]
12107#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12108pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12109 src: __m256h,
12110 k: __mmask16,
12111 a: __m512i,
12112) -> __m256h {
12113 unsafe {
12114 static_assert_rounding!(ROUNDING);
12115 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12116 }
12117}
12118
12119/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12120/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12121///
12122/// Rounding is done according to the rounding parameter, which can be one of:
12123///
12124/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12125/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12126/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12127/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12128/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12129///
12130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12131#[inline]
12132#[target_feature(enable = "avx512fp16")]
12133#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12134#[rustc_legacy_const_generics(2)]
12135#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12136pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12137 static_assert_rounding!(ROUNDING);
12138 _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12139}
12140
12141/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12142/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12143/// of dst.
12144///
12145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12146#[inline]
12147#[target_feature(enable = "avx512fp16")]
12148#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12149#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12150pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12151 unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12152}
12153
12154/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12155/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12156/// of dst.
12157///
12158/// Rounding is done according to the rounding parameter, which can be one of:
12159///
12160/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12161/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12162/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12163/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12165///
12166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12167#[inline]
12168#[target_feature(enable = "avx512fp16")]
12169#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
12170#[rustc_legacy_const_generics(2)]
12171#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12172pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12173 unsafe {
12174 static_assert_rounding!(ROUNDING);
12175 vcvtsi2sh(a, b, ROUNDING)
12176 }
12177}
12178
12179/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12180/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12181///
12182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12183#[inline]
12184#[target_feature(enable = "avx512fp16,avx512vl")]
12185#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12186#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12187pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12188 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a)
12189}
12190
12191/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12192/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12193/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12194///
12195/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12196#[inline]
12197#[target_feature(enable = "avx512fp16,avx512vl")]
12198#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12199#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12200pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12201 unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12202}
12203
12204/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12205/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12206/// The upper 64 bits of dst are zeroed out.
12207///
12208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12209#[inline]
12210#[target_feature(enable = "avx512fp16,avx512vl")]
12211#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12212#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12213pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12214 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12215}
12216
12217/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12218/// and store the results in dst.
12219///
12220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12221#[inline]
12222#[target_feature(enable = "avx512fp16,avx512vl")]
12223#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12224#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12225pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12226 unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12227}
12228
12229/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12230/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12231/// mask bit is not set).
12232///
12233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12234#[inline]
12235#[target_feature(enable = "avx512fp16,avx512vl")]
12236#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12237#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12238pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12239 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12240}
12241
12242/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12243/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12244///
12245/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12246#[inline]
12247#[target_feature(enable = "avx512fp16,avx512vl")]
12248#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12249#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12250pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12251 _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12252}
12253
12254/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12255/// and store the results in dst.
12256///
12257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12258#[inline]
12259#[target_feature(enable = "avx512fp16")]
12260#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12262pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12263 unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12264}
12265
12266/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12267/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12268/// mask bit is not set).
12269///
12270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12271#[inline]
12272#[target_feature(enable = "avx512fp16")]
12273#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12275pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12276 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12277}
12278
12279/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12280/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12281///
12282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12283#[inline]
12284#[target_feature(enable = "avx512fp16")]
12285#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12287pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12288 _mm512_mask_cvtepu32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12289}
12290
12291/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12292/// and store the results in dst.
12293///
12294/// Rounding is done according to the rounding parameter, which can be one of:
12295///
12296/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12297/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12298/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12299/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12300/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12301///
12302/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12303#[inline]
12304#[target_feature(enable = "avx512fp16")]
12305#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12306#[rustc_legacy_const_generics(1)]
12307#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12308pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12309 unsafe {
12310 static_assert_rounding!(ROUNDING);
12311 vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12312 }
12313}
12314
12315/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12316/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12317/// mask bit is not set).
12318///
12319/// Rounding is done according to the rounding parameter, which can be one of:
12320///
12321/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12322/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12323/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12324/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12325/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12326///
12327/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12328#[inline]
12329#[target_feature(enable = "avx512fp16")]
12330#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12331#[rustc_legacy_const_generics(3)]
12332#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12333pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12334 src: __m256h,
12335 k: __mmask16,
12336 a: __m512i,
12337) -> __m256h {
12338 unsafe {
12339 static_assert_rounding!(ROUNDING);
12340 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12341 }
12342}
12343
12344/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12345/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12346///
12347/// Rounding is done according to the rounding parameter, which can be one of:
12348///
12349/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12350/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12351/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12352/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12353/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12354///
12355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12356#[inline]
12357#[target_feature(enable = "avx512fp16")]
12358#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12359#[rustc_legacy_const_generics(2)]
12360#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12361pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12362 static_assert_rounding!(ROUNDING);
12363 _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12364}
12365
12366/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12367/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12368/// of dst.
12369///
12370/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12371#[inline]
12372#[target_feature(enable = "avx512fp16")]
12373#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12374#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12375pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12376 unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12377}
12378
12379/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12380/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12381/// of dst.
12382///
12383/// Rounding is done according to the rounding parameter, which can be one of:
12384///
12385/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12386/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12387/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12388/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12389/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12390///
12391/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12392#[inline]
12393#[target_feature(enable = "avx512fp16")]
12394#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
12395#[rustc_legacy_const_generics(2)]
12396#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12397pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12398 unsafe {
12399 static_assert_rounding!(ROUNDING);
12400 vcvtusi2sh(a, b, ROUNDING)
12401 }
12402}
12403
12404/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12405/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12406///
12407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12408#[inline]
12409#[target_feature(enable = "avx512fp16,avx512vl")]
12410#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12411#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12412pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12413 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12414}
12415
12416/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12417/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12418/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12419///
12420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12421#[inline]
12422#[target_feature(enable = "avx512fp16,avx512vl")]
12423#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12424#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12425pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12426 unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12427}
12428
12429/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12430/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12431/// The upper 96 bits of dst are zeroed out.
12432///
12433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12434#[inline]
12435#[target_feature(enable = "avx512fp16,avx512vl")]
12436#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12437#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12438pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12439 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12440}
12441
12442/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12443/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12444///
12445/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12446#[inline]
12447#[target_feature(enable = "avx512fp16,avx512vl")]
12448#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12449#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12450pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12451 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12452}
12453
12454/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12455/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12456/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12457///
12458/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12459#[inline]
12460#[target_feature(enable = "avx512fp16,avx512vl")]
12461#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12462#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12463pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12464 unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12465}
12466
12467/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12468/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12469/// The upper 64 bits of dst are zeroed out.
12470///
12471/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12472#[inline]
12473#[target_feature(enable = "avx512fp16,avx512vl")]
12474#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12475#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12476pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12477 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12478}
12479
12480/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12481/// and store the results in dst.
12482///
12483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12484#[inline]
12485#[target_feature(enable = "avx512fp16")]
12486#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12487#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12488pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12489 unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12490}
12491
12492/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12493/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12494/// mask bit is not set).
12495///
12496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12497#[inline]
12498#[target_feature(enable = "avx512fp16")]
12499#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12500#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12501pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12502 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12503}
12504
12505/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12506/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12507///
12508/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12509#[inline]
12510#[target_feature(enable = "avx512fp16")]
12511#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12512#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12513pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12514 _mm512_mask_cvtepi64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12515}
12516
12517/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12518/// and store the results in dst.
12519///
12520/// Rounding is done according to the rounding parameter, which can be one of:
12521///
12522/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12523/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12524/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12525/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12526/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12527///
12528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12529#[inline]
12530#[target_feature(enable = "avx512fp16")]
12531#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12532#[rustc_legacy_const_generics(1)]
12533#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12534pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12535 unsafe {
12536 static_assert_rounding!(ROUNDING);
12537 vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12538 }
12539}
12540
12541/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12542/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12543/// mask bit is not set).
12544///
12545/// Rounding is done according to the rounding parameter, which can be one of:
12546///
12547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12552///
12553/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12554#[inline]
12555#[target_feature(enable = "avx512fp16")]
12556#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12557#[rustc_legacy_const_generics(3)]
12558#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12559pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12560 src: __m128h,
12561 k: __mmask8,
12562 a: __m512i,
12563) -> __m128h {
12564 unsafe {
12565 static_assert_rounding!(ROUNDING);
12566 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12567 }
12568}
12569
12570/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12571/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12572///
12573/// Rounding is done according to the rounding parameter, which can be one of:
12574///
12575/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12576/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12577/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12578/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12579/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12580///
12581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12582#[inline]
12583#[target_feature(enable = "avx512fp16")]
12584#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12585#[rustc_legacy_const_generics(2)]
12586#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12587pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12588 static_assert_rounding!(ROUNDING);
12589 _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12590}
12591
12592/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12593/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12594///
12595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12596#[inline]
12597#[target_feature(enable = "avx512fp16,avx512vl")]
12598#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12599#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12600pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12601 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12602}
12603
12604/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12605/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12606/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12607///
12608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12609#[inline]
12610#[target_feature(enable = "avx512fp16,avx512vl")]
12611#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12612#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12613pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12614 unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12615}
12616
12617/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12618/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12619/// The upper 96 bits of dst are zeroed out.
12620///
12621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12622#[inline]
12623#[target_feature(enable = "avx512fp16,avx512vl")]
12624#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12626pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12627 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12628}
12629
12630/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12631/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12632///
12633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12634#[inline]
12635#[target_feature(enable = "avx512fp16,avx512vl")]
12636#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12637#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12638pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12639 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12640}
12641
12642/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12643/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12644/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12645///
12646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12647#[inline]
12648#[target_feature(enable = "avx512fp16,avx512vl")]
12649#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12650#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12651pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12652 unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12653}
12654
12655/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12656/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12657/// The upper 64 bits of dst are zeroed out.
12658///
12659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12660#[inline]
12661#[target_feature(enable = "avx512fp16,avx512vl")]
12662#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12663#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12664pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12665 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12666}
12667
12668/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12669/// and store the results in dst.
12670///
12671/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12672#[inline]
12673#[target_feature(enable = "avx512fp16")]
12674#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12675#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12676pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12677 unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12678}
12679
12680/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12681/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12682/// mask bit is not set).
12683///
12684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12685#[inline]
12686#[target_feature(enable = "avx512fp16")]
12687#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12688#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12689pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12690 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12691}
12692
12693/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12694/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12695///
12696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
12697#[inline]
12698#[target_feature(enable = "avx512fp16")]
12699#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12701pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
12702 _mm512_mask_cvtepu64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12703}
12704
12705/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12706/// and store the results in dst.
12707///
12708/// Rounding is done according to the rounding parameter, which can be one of:
12709///
12710/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12711/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12712/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12713/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12714/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12715///
12716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
12717#[inline]
12718#[target_feature(enable = "avx512fp16")]
12719#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12720#[rustc_legacy_const_generics(1)]
12721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12722pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12723 unsafe {
12724 static_assert_rounding!(ROUNDING);
12725 vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
12726 }
12727}
12728
12729/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12730/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12731/// mask bit is not set).
12732///
12733/// Rounding is done according to the rounding parameter, which can be one of:
12734///
12735/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12736/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12737/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12738/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12739/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12740///
12741/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
12742#[inline]
12743#[target_feature(enable = "avx512fp16")]
12744#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12745#[rustc_legacy_const_generics(3)]
12746#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12747pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
12748 src: __m128h,
12749 k: __mmask8,
12750 a: __m512i,
12751) -> __m128h {
12752 unsafe {
12753 static_assert_rounding!(ROUNDING);
12754 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
12755 }
12756}
12757
12758/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12759/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12760///
12761/// Rounding is done according to the rounding parameter, which can be one of:
12762///
12763/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12764/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12765/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12766/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12767/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12768///
12769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
12770#[inline]
12771#[target_feature(enable = "avx512fp16")]
12772#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12773#[rustc_legacy_const_generics(2)]
12774#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12775pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12776 static_assert_rounding!(ROUNDING);
12777 _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12778}
12779
12780/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12781/// floating-point elements, and store the results in dst.
12782///
12783/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
12784#[inline]
12785#[target_feature(enable = "avx512fp16,avx512vl")]
12786#[cfg_attr(test, assert_instr(vcvtps2phx))]
12787#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12788pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
12789 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12790}
12791
12792/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12793/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12794/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12795///
12796/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
12797#[inline]
12798#[target_feature(enable = "avx512fp16,avx512vl")]
12799#[cfg_attr(test, assert_instr(vcvtps2phx))]
12800#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12801pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
12802 unsafe { vcvtps2phx_128(a, src, k) }
12803}
12804
12805/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12806/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12807/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12808///
12809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
12810#[inline]
12811#[target_feature(enable = "avx512fp16,avx512vl")]
12812#[cfg_attr(test, assert_instr(vcvtps2phx))]
12813#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12814pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
12815 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12816}
12817
12818/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12819/// floating-point elements, and store the results in dst.
12820///
12821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
12822#[inline]
12823#[target_feature(enable = "avx512fp16,avx512vl")]
12824#[cfg_attr(test, assert_instr(vcvtps2phx))]
12825#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12826pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
12827 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12828}
12829
12830/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12831/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12832/// when the corresponding mask bit is not set).
12833///
12834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
12835#[inline]
12836#[target_feature(enable = "avx512fp16,avx512vl")]
12837#[cfg_attr(test, assert_instr(vcvtps2phx))]
12838#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12839pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
12840 unsafe { vcvtps2phx_256(a, src, k) }
12841}
12842
12843/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12844/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12845/// corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
12848#[inline]
12849#[target_feature(enable = "avx512fp16,avx512vl")]
12850#[cfg_attr(test, assert_instr(vcvtps2phx))]
12851#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12852pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
12853 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12854}
12855
12856/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12857/// floating-point elements, and store the results in dst.
12858///
12859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
12860#[inline]
12861#[target_feature(enable = "avx512fp16")]
12862#[cfg_attr(test, assert_instr(vcvtps2phx))]
12863#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12864pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
12865 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
12866}
12867
12868/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12869/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12870/// when the corresponding mask bit is not set).
12871///
12872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
12873#[inline]
12874#[target_feature(enable = "avx512fp16")]
12875#[cfg_attr(test, assert_instr(vcvtps2phx))]
12876#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12877pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
12878 unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
12879}
12880
12881/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12882/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12883/// corresponding mask bit is not set).
12884///
12885/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
12886#[inline]
12887#[target_feature(enable = "avx512fp16")]
12888#[cfg_attr(test, assert_instr(vcvtps2phx))]
12889#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12890pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
12891 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k, a)
12892}
12893
12894/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12895/// floating-point elements, and store the results in dst.
12896///
12897/// Rounding is done according to the rounding parameter, which can be one of:
12898///
12899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12904///
12905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
12906#[inline]
12907#[target_feature(enable = "avx512fp16")]
12908#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12909#[rustc_legacy_const_generics(1)]
12910#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12911pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
12912 static_assert_rounding!(ROUNDING);
12913 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
12914}
12915
12916/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12917/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12918/// when the corresponding mask bit is not set).
12919///
12920/// Rounding is done according to the rounding parameter, which can be one of:
12921///
12922/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12923/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12924/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12925/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12926/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12927///
12928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
12929#[inline]
12930#[target_feature(enable = "avx512fp16")]
12931#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12932#[rustc_legacy_const_generics(3)]
12933#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12934pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
12935 src: __m256h,
12936 k: __mmask16,
12937 a: __m512,
12938) -> __m256h {
12939 unsafe {
12940 static_assert_rounding!(ROUNDING);
12941 vcvtps2phx_512(a, src, k, ROUNDING)
12942 }
12943}
12944
12945/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12946/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12947/// corresponding mask bit is not set).
12948///
12949/// Rounding is done according to the rounding parameter, which can be one of:
12950///
12951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12956///
12957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
12958#[inline]
12959#[target_feature(enable = "avx512fp16")]
12960#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
12961#[rustc_legacy_const_generics(2)]
12962#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12963pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
12964 static_assert_rounding!(ROUNDING);
12965 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12966}
12967
12968/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12969/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
12970/// elements from a to the upper elements of dst.
12971///
12972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
12973#[inline]
12974#[target_feature(enable = "avx512fp16")]
12975#[cfg_attr(test, assert_instr(vcvtss2sh))]
12976#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12977pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
12978 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
12979}
12980
12981/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12982/// floating-point elements, store the result in the lower element of dst using writemask k (the element
12983/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
12984/// upper elements of dst.
12985///
12986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
12987#[inline]
12988#[target_feature(enable = "avx512fp16")]
12989#[cfg_attr(test, assert_instr(vcvtss2sh))]
12990#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12991pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
12992 unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
12993}
12994
12995/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
12996/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
12997/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
12998/// elements of dst.
12999///
13000/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
13001#[inline]
13002#[target_feature(enable = "avx512fp16")]
13003#[cfg_attr(test, assert_instr(vcvtss2sh))]
13004#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13005pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13006 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13007}
13008
13009/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13010/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13011/// elements from a to the upper elements of dst.
13012///
13013/// Rounding is done according to the rounding parameter, which can be one of:
13014///
13015/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13016/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13017/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13018/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13019/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13020///
13021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13022#[inline]
13023#[target_feature(enable = "avx512fp16")]
13024#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13025#[rustc_legacy_const_generics(2)]
13026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13027pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13028 static_assert_rounding!(ROUNDING);
13029 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13030}
13031
13032/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13033/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13034/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13035/// upper elements of dst.
13036///
13037/// Rounding is done according to the rounding parameter, which can be one of:
13038///
13039/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13040/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13041/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13042/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13043/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13044///
13045/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13046#[inline]
13047#[target_feature(enable = "avx512fp16")]
13048#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13049#[rustc_legacy_const_generics(4)]
13050#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13051pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13052 src: __m128h,
13053 k: __mmask8,
13054 a: __m128h,
13055 b: __m128,
13056) -> __m128h {
13057 unsafe {
13058 static_assert_rounding!(ROUNDING);
13059 vcvtss2sh(a, b, src, k, ROUNDING)
13060 }
13061}
13062
13063/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13064/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13065/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13066/// elements of dst.
13067///
13068/// Rounding is done according to the rounding parameter, which can be one of:
13069///
13070/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13071/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13072/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13073/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13074/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13075///
13076/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13077#[inline]
13078#[target_feature(enable = "avx512fp16")]
13079#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13080#[rustc_legacy_const_generics(3)]
13081#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13082pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13083 k: __mmask8,
13084 a: __m128h,
13085 b: __m128,
13086) -> __m128h {
13087 static_assert_rounding!(ROUNDING);
13088 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13089}
13090
13091/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13092/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13093///
13094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13095#[inline]
13096#[target_feature(enable = "avx512fp16,avx512vl")]
13097#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13098#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13099pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13100 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13101}
13102
13103/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13104/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13105/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13106///
13107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13108#[inline]
13109#[target_feature(enable = "avx512fp16,avx512vl")]
13110#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13111#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13112pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13113 unsafe { vcvtpd2ph_128(a, src, k) }
13114}
13115
13116/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13117/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13118/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13119///
13120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13121#[inline]
13122#[target_feature(enable = "avx512fp16,avx512vl")]
13123#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13124#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13125pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13126 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13127}
13128
13129/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13130/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13131///
13132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13133#[inline]
13134#[target_feature(enable = "avx512fp16,avx512vl")]
13135#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13136#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13137pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13138 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13139}
13140
13141/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13142/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13143/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13144///
13145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13146#[inline]
13147#[target_feature(enable = "avx512fp16,avx512vl")]
13148#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13149#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13150pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13151 unsafe { vcvtpd2ph_256(a, src, k) }
13152}
13153
13154/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13155/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13156/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13157///
13158/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13159#[inline]
13160#[target_feature(enable = "avx512fp16,avx512vl")]
13161#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13163pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13164 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13165}
13166
13167/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13168/// floating-point elements, and store the results in dst.
13169///
13170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13171#[inline]
13172#[target_feature(enable = "avx512fp16")]
13173#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13174#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13175pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13176 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13177}
13178
13179/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13180/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13181/// when the corresponding mask bit is not set).
13182///
13183/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13184#[inline]
13185#[target_feature(enable = "avx512fp16")]
13186#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13187#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13188pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13189 unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13190}
13191
13192/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13193/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13194/// corresponding mask bit is not set).
13195///
13196/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13197#[inline]
13198#[target_feature(enable = "avx512fp16")]
13199#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13200#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13201pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13202 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k, a)
13203}
13204
13205/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13206/// floating-point elements, and store the results in dst.
13207///
13208/// Rounding is done according to the rounding parameter, which can be one of:
13209///
13210/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13211/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13212/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13213/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13214/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13215///
13216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13217#[inline]
13218#[target_feature(enable = "avx512fp16")]
13219#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13220#[rustc_legacy_const_generics(1)]
13221#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13222pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13223 static_assert_rounding!(ROUNDING);
13224 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13225}
13226
13227/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13228/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13229/// when the corresponding mask bit is not set).
13230///
13231/// Rounding is done according to the rounding parameter, which can be one of:
13232///
13233/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13234/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13235/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13236/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13237/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13238///
13239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13240#[inline]
13241#[target_feature(enable = "avx512fp16")]
13242#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13243#[rustc_legacy_const_generics(3)]
13244#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13245pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13246 src: __m128h,
13247 k: __mmask8,
13248 a: __m512d,
13249) -> __m128h {
13250 unsafe {
13251 static_assert_rounding!(ROUNDING);
13252 vcvtpd2ph_512(a, src, k, ROUNDING)
13253 }
13254}
13255
13256/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13257/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13258/// corresponding mask bit is not set).
13259///
13260/// Rounding is done according to the rounding parameter, which can be one of:
13261///
13262/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13263/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13264/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13265/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13267///
13268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13269#[inline]
13270#[target_feature(enable = "avx512fp16")]
13271#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13272#[rustc_legacy_const_generics(2)]
13273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13274pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13275 static_assert_rounding!(ROUNDING);
13276 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
13277}
13278
13279/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13280/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13281/// elements from a to the upper elements of dst.
13282///
13283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13284#[inline]
13285#[target_feature(enable = "avx512fp16")]
13286#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13287#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13288pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13289 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13290}
13291
13292/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13293/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13294/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13295/// upper elements of dst.
13296///
13297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13298#[inline]
13299#[target_feature(enable = "avx512fp16")]
13300#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13301#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13302pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13303 unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13304}
13305
13306/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13307/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13308/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13309/// elements of dst.
13310///
13311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13312#[inline]
13313#[target_feature(enable = "avx512fp16")]
13314#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13316pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13317 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13318}
13319
13320/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13321/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13322/// elements from a to the upper elements of dst.
13323///
13324/// Rounding is done according to the rounding parameter, which can be one of:
13325///
13326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13331///
13332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13333#[inline]
13334#[target_feature(enable = "avx512fp16")]
13335#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13336#[rustc_legacy_const_generics(2)]
13337#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13338pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13339 static_assert_rounding!(ROUNDING);
13340 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13341}
13342
13343/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13344/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13345/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13346/// upper elements of dst.
13347///
13348/// Rounding is done according to the rounding parameter, which can be one of:
13349///
13350/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13351/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13352/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13353/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13354/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13355///
13356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13357#[inline]
13358#[target_feature(enable = "avx512fp16")]
13359#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13360#[rustc_legacy_const_generics(4)]
13361#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13362pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13363 src: __m128h,
13364 k: __mmask8,
13365 a: __m128h,
13366 b: __m128d,
13367) -> __m128h {
13368 unsafe {
13369 static_assert_rounding!(ROUNDING);
13370 vcvtsd2sh(a, b, src, k, ROUNDING)
13371 }
13372}
13373
13374/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13375/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13376/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13377/// elements of dst.
13378///
13379/// Rounding is done according to the rounding parameter, which can be one of:
13380///
13381/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13382/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13383/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13384/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13386///
13387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13388#[inline]
13389#[target_feature(enable = "avx512fp16")]
13390#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13391#[rustc_legacy_const_generics(3)]
13392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13393pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13394 k: __mmask8,
13395 a: __m128h,
13396 b: __m128d,
13397) -> __m128h {
13398 static_assert_rounding!(ROUNDING);
13399 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13400}
13401
13402/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13403/// store the results in dst.
13404///
13405/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13406#[inline]
13407#[target_feature(enable = "avx512fp16,avx512vl")]
13408#[cfg_attr(test, assert_instr(vcvtph2w))]
13409#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13410pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13411 _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13412}
13413
13414/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13415/// store the results in dst using writemask k (elements are copied from src when the corresponding
13416/// mask bit is not set).
13417///
13418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13419#[inline]
13420#[target_feature(enable = "avx512fp16,avx512vl")]
13421#[cfg_attr(test, assert_instr(vcvtph2w))]
13422#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13423pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13424 unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13425}
13426
13427/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13428/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13429///
13430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13431#[inline]
13432#[target_feature(enable = "avx512fp16,avx512vl")]
13433#[cfg_attr(test, assert_instr(vcvtph2w))]
13434#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13435pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13436 _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13437}
13438
13439/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13440/// store the results in dst.
13441///
13442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13443#[inline]
13444#[target_feature(enable = "avx512fp16,avx512vl")]
13445#[cfg_attr(test, assert_instr(vcvtph2w))]
13446#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13447pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13448 _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13449}
13450
13451/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13452/// store the results in dst using writemask k (elements are copied from src when the corresponding
13453/// mask bit is not set).
13454///
13455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13456#[inline]
13457#[target_feature(enable = "avx512fp16,avx512vl")]
13458#[cfg_attr(test, assert_instr(vcvtph2w))]
13459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13460pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13461 unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13462}
13463
13464/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13465/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13466///
13467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13468#[inline]
13469#[target_feature(enable = "avx512fp16,avx512vl")]
13470#[cfg_attr(test, assert_instr(vcvtph2w))]
13471#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13472pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13473 _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13474}
13475
13476/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13477/// store the results in dst.
13478///
13479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13480#[inline]
13481#[target_feature(enable = "avx512fp16")]
13482#[cfg_attr(test, assert_instr(vcvtph2w))]
13483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13484pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13485 _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13486}
13487
13488/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13489/// store the results in dst using writemask k (elements are copied from src when the corresponding
13490/// mask bit is not set).
13491///
13492/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13493#[inline]
13494#[target_feature(enable = "avx512fp16")]
13495#[cfg_attr(test, assert_instr(vcvtph2w))]
13496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13497pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13498 unsafe {
13499 transmute(src:vcvtph2w_512(
13500 a,
13501 src.as_i16x32(),
13502 k,
13503 _MM_FROUND_CUR_DIRECTION,
13504 ))
13505 }
13506}
13507
13508/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13509/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13510///
13511/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13512#[inline]
13513#[target_feature(enable = "avx512fp16")]
13514#[cfg_attr(test, assert_instr(vcvtph2w))]
13515#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13516pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13517 _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13518}
13519
13520/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13521/// store the results in dst.
13522///
13523/// Rounding is done according to the rounding parameter, which can be one of:
13524///
13525/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13526/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13527/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13528/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13529/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13530///
13531/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13532#[inline]
13533#[target_feature(enable = "avx512fp16")]
13534#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13535#[rustc_legacy_const_generics(1)]
13536#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13537pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13538 static_assert_rounding!(ROUNDING);
13539 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13540}
13541
13542/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13543/// store the results in dst using writemask k (elements are copied from src when the corresponding
13544/// mask bit is not set).
13545///
13546/// Rounding is done according to the rounding parameter, which can be one of:
13547///
13548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13553///
13554/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13555#[inline]
13556#[target_feature(enable = "avx512fp16")]
13557#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13558#[rustc_legacy_const_generics(3)]
13559#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13560pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13561 src: __m512i,
13562 k: __mmask32,
13563 a: __m512h,
13564) -> __m512i {
13565 unsafe {
13566 static_assert_rounding!(ROUNDING);
13567 transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13568 }
13569}
13570
13571/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13572/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13573///
13574/// Rounding is done according to the rounding parameter, which can be one of:
13575///
13576/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13577/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13578/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13579/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13580/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13581///
13582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13583#[inline]
13584#[target_feature(enable = "avx512fp16")]
13585#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13586#[rustc_legacy_const_generics(2)]
13587#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13588pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13589 static_assert_rounding!(ROUNDING);
13590 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13591}
13592
13593/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13594/// and store the results in dst.
13595///
13596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13597#[inline]
13598#[target_feature(enable = "avx512fp16,avx512vl")]
13599#[cfg_attr(test, assert_instr(vcvtph2uw))]
13600#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13601pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13602 _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13603}
13604
13605/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13606/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13607/// mask bit is not set).
13608///
13609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13610#[inline]
13611#[target_feature(enable = "avx512fp16,avx512vl")]
13612#[cfg_attr(test, assert_instr(vcvtph2uw))]
13613#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13614pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13615 unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13616}
13617
13618/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13619/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13620///
13621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13622#[inline]
13623#[target_feature(enable = "avx512fp16,avx512vl")]
13624#[cfg_attr(test, assert_instr(vcvtph2uw))]
13625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13626pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13627 _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13628}
13629
13630/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13631/// and store the results in dst.
13632///
13633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13634#[inline]
13635#[target_feature(enable = "avx512fp16,avx512vl")]
13636#[cfg_attr(test, assert_instr(vcvtph2uw))]
13637#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13638pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13639 _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13640}
13641
13642/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13643/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13644/// mask bit is not set).
13645///
13646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13647#[inline]
13648#[target_feature(enable = "avx512fp16,avx512vl")]
13649#[cfg_attr(test, assert_instr(vcvtph2uw))]
13650#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13651pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13652 unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13653}
13654
13655/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13656/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13657///
13658/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13659#[inline]
13660#[target_feature(enable = "avx512fp16,avx512vl")]
13661#[cfg_attr(test, assert_instr(vcvtph2uw))]
13662#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13663pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13664 _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13665}
13666
13667/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13668/// and store the results in dst.
13669///
13670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13671#[inline]
13672#[target_feature(enable = "avx512fp16")]
13673#[cfg_attr(test, assert_instr(vcvtph2uw))]
13674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13675pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13676 _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13677}
13678
13679/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13680/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13681/// mask bit is not set).
13682///
13683/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13684#[inline]
13685#[target_feature(enable = "avx512fp16")]
13686#[cfg_attr(test, assert_instr(vcvtph2uw))]
13687#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13688pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13689 unsafe {
13690 transmute(src:vcvtph2uw_512(
13691 a,
13692 src.as_u16x32(),
13693 k,
13694 _MM_FROUND_CUR_DIRECTION,
13695 ))
13696 }
13697}
13698
13699/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13700/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13701///
13702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
13703#[inline]
13704#[target_feature(enable = "avx512fp16")]
13705#[cfg_attr(test, assert_instr(vcvtph2uw))]
13706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13707pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
13708 _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
13709}
13710
13711/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13712/// and store the results in dst.
13713///
13714/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13715///
13716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
13717#[inline]
13718#[target_feature(enable = "avx512fp16")]
13719#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13720#[rustc_legacy_const_generics(1)]
13721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13722pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
13723 static_assert_sae!(SAE);
13724 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13725}
13726
13727/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13728/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13729/// mask bit is not set).
13730///
13731/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13732///
13733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
13734#[inline]
13735#[target_feature(enable = "avx512fp16")]
13736#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13737#[rustc_legacy_const_generics(3)]
13738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13739pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
13740 src: __m512i,
13741 k: __mmask32,
13742 a: __m512h,
13743) -> __m512i {
13744 unsafe {
13745 static_assert_sae!(SAE);
13746 transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, SAE))
13747 }
13748}
13749
13750/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13751/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13752///
13753/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13754///
13755/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
13756#[inline]
13757#[target_feature(enable = "avx512fp16")]
13758#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13759#[rustc_legacy_const_generics(2)]
13760#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13761pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13762 static_assert_sae!(SAE);
13763 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
13764}
13765
13766/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13767/// truncation, and store the results in dst.
13768///
13769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
13770#[inline]
13771#[target_feature(enable = "avx512fp16,avx512vl")]
13772#[cfg_attr(test, assert_instr(vcvttph2w))]
13773#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13774pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
13775 _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13776}
13777
13778/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13779/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13780/// mask bit is not set).
13781///
13782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
13783#[inline]
13784#[target_feature(enable = "avx512fp16,avx512vl")]
13785#[cfg_attr(test, assert_instr(vcvttph2w))]
13786#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13787pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13788 unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
13789}
13790
13791/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13792/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13793/// mask bit is not set).
13794///
13795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
13796#[inline]
13797#[target_feature(enable = "avx512fp16,avx512vl")]
13798#[cfg_attr(test, assert_instr(vcvttph2w))]
13799#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13800pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13801 _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
13802}
13803
13804/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13805/// truncation, and store the results in dst.
13806///
13807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
13808#[inline]
13809#[target_feature(enable = "avx512fp16,avx512vl")]
13810#[cfg_attr(test, assert_instr(vcvttph2w))]
13811#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13812pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
13813 _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13814}
13815
13816/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13817/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13818/// mask bit is not set).
13819///
13820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
13821#[inline]
13822#[target_feature(enable = "avx512fp16,avx512vl")]
13823#[cfg_attr(test, assert_instr(vcvttph2w))]
13824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13825pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13826 unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
13827}
13828
13829/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13830/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13831/// mask bit is not set).
13832///
13833/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
13834#[inline]
13835#[target_feature(enable = "avx512fp16,avx512vl")]
13836#[cfg_attr(test, assert_instr(vcvttph2w))]
13837#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13838pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13839 _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
13840}
13841
13842/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13843/// truncation, and store the results in dst.
13844///
13845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
13846#[inline]
13847#[target_feature(enable = "avx512fp16")]
13848#[cfg_attr(test, assert_instr(vcvttph2w))]
13849#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13850pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
13851 _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13852}
13853
13854/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13855/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13856/// mask bit is not set).
13857///
13858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
13859#[inline]
13860#[target_feature(enable = "avx512fp16")]
13861#[cfg_attr(test, assert_instr(vcvttph2w))]
13862#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13863pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13864 unsafe {
13865 transmute(src:vcvttph2w_512(
13866 a,
13867 src.as_i16x32(),
13868 k,
13869 _MM_FROUND_CUR_DIRECTION,
13870 ))
13871 }
13872}
13873
13874/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13875/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13876/// mask bit is not set).
13877///
13878/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
13879#[inline]
13880#[target_feature(enable = "avx512fp16")]
13881#[cfg_attr(test, assert_instr(vcvttph2w))]
13882#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13883pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13884 _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
13885}
13886
13887/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13888/// truncation, and store the results in dst.
13889///
13890/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13891///
13892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
13893#[inline]
13894#[target_feature(enable = "avx512fp16")]
13895#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13896#[rustc_legacy_const_generics(1)]
13897#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13898pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
13899 static_assert_sae!(SAE);
13900 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13901}
13902
13903/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13904/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13905/// mask bit is not set).
13906///
13907/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13908///
13909/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
13910#[inline]
13911#[target_feature(enable = "avx512fp16")]
13912#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13913#[rustc_legacy_const_generics(3)]
13914#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13915pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
13916 src: __m512i,
13917 k: __mmask32,
13918 a: __m512h,
13919) -> __m512i {
13920 unsafe {
13921 static_assert_sae!(SAE);
13922 transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
13923 }
13924}
13925
13926/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13927/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13928/// mask bit is not set).
13929///
13930/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13931///
13932/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
13933#[inline]
13934#[target_feature(enable = "avx512fp16")]
13935#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13936#[rustc_legacy_const_generics(2)]
13937#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13938pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13939 static_assert_sae!(SAE);
13940 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
13941}
13942
13943/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13944/// truncation, and store the results in dst.
13945///
13946/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
13947#[inline]
13948#[target_feature(enable = "avx512fp16,avx512vl")]
13949#[cfg_attr(test, assert_instr(vcvttph2uw))]
13950#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13951pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
13952 _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13953}
13954
13955/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13956/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13957/// mask bit is not set).
13958///
13959/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
13960#[inline]
13961#[target_feature(enable = "avx512fp16,avx512vl")]
13962#[cfg_attr(test, assert_instr(vcvttph2uw))]
13963#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13964pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13965 unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
13966}
13967
13968/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13969/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13970/// mask bit is not set).
13971///
13972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
13973#[inline]
13974#[target_feature(enable = "avx512fp16,avx512vl")]
13975#[cfg_attr(test, assert_instr(vcvttph2uw))]
13976#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13977pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13978 _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
13979}
13980
13981/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13982/// truncation, and store the results in dst.
13983///
13984/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
13985#[inline]
13986#[target_feature(enable = "avx512fp16,avx512vl")]
13987#[cfg_attr(test, assert_instr(vcvttph2uw))]
13988#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13989pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
13990 _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13991}
13992
13993/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
13994/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13995/// mask bit is not set).
13996///
13997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
13998#[inline]
13999#[target_feature(enable = "avx512fp16,avx512vl")]
14000#[cfg_attr(test, assert_instr(vcvttph2uw))]
14001#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14002pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14003 unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14004}
14005
14006/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14007/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14008/// mask bit is not set).
14009///
14010/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14011#[inline]
14012#[target_feature(enable = "avx512fp16,avx512vl")]
14013#[cfg_attr(test, assert_instr(vcvttph2uw))]
14014#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14015pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14016 _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14017}
14018
14019/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14020/// truncation, and store the results in dst.
14021///
14022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14023#[inline]
14024#[target_feature(enable = "avx512fp16")]
14025#[cfg_attr(test, assert_instr(vcvttph2uw))]
14026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14027pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14028 _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14029}
14030
14031/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14032/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14033/// mask bit is not set).
14034///
14035/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14036#[inline]
14037#[target_feature(enable = "avx512fp16")]
14038#[cfg_attr(test, assert_instr(vcvttph2uw))]
14039#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14040pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14041 unsafe {
14042 transmute(src:vcvttph2uw_512(
14043 a,
14044 src.as_u16x32(),
14045 k,
14046 _MM_FROUND_CUR_DIRECTION,
14047 ))
14048 }
14049}
14050
14051/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14052/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14053/// mask bit is not set).
14054///
14055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14056#[inline]
14057#[target_feature(enable = "avx512fp16")]
14058#[cfg_attr(test, assert_instr(vcvttph2uw))]
14059#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14060pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14061 _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14062}
14063
14064/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14065/// truncation, and store the results in dst.
14066///
14067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14068///
14069/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14070#[inline]
14071#[target_feature(enable = "avx512fp16")]
14072#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14073#[rustc_legacy_const_generics(1)]
14074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14075pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14076 static_assert_sae!(SAE);
14077 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14078}
14079
14080/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14081/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14082/// mask bit is not set).
14083///
14084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14085///
14086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14087#[inline]
14088#[target_feature(enable = "avx512fp16")]
14089#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14090#[rustc_legacy_const_generics(3)]
14091#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14092pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14093 src: __m512i,
14094 k: __mmask32,
14095 a: __m512h,
14096) -> __m512i {
14097 unsafe {
14098 static_assert_sae!(SAE);
14099 transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14100 }
14101}
14102
14103/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14104/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14105/// mask bit is not set).
14106///
14107/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14108///
14109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14110#[inline]
14111#[target_feature(enable = "avx512fp16")]
14112#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14113#[rustc_legacy_const_generics(2)]
14114#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14115pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14116 static_assert_sae!(SAE);
14117 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14118}
14119
14120/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14121/// results in dst.
14122///
14123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14124#[inline]
14125#[target_feature(enable = "avx512fp16,avx512vl")]
14126#[cfg_attr(test, assert_instr(vcvtph2dq))]
14127#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14128pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14129 _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14130}
14131
14132/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14133/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14134///
14135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14136#[inline]
14137#[target_feature(enable = "avx512fp16,avx512vl")]
14138#[cfg_attr(test, assert_instr(vcvtph2dq))]
14139#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14140pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14141 unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14142}
14143
14144/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14145/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146///
14147/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14148#[inline]
14149#[target_feature(enable = "avx512fp16,avx512vl")]
14150#[cfg_attr(test, assert_instr(vcvtph2dq))]
14151#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14152pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14153 _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14154}
14155
14156/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14157/// results in dst.
14158///
14159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14160#[inline]
14161#[target_feature(enable = "avx512fp16,avx512vl")]
14162#[cfg_attr(test, assert_instr(vcvtph2dq))]
14163#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14164pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14165 _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14166}
14167
14168/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14169/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14170///
14171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14172#[inline]
14173#[target_feature(enable = "avx512fp16,avx512vl")]
14174#[cfg_attr(test, assert_instr(vcvtph2dq))]
14175#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14176pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14177 unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14178}
14179
14180/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14181/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14184#[inline]
14185#[target_feature(enable = "avx512fp16,avx512vl")]
14186#[cfg_attr(test, assert_instr(vcvtph2dq))]
14187#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14188pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14189 _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14190}
14191
14192/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14193/// results in dst.
14194///
14195/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14196#[inline]
14197#[target_feature(enable = "avx512fp16")]
14198#[cfg_attr(test, assert_instr(vcvtph2dq))]
14199#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14200pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14201 _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14202}
14203
14204/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14205/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14206///
14207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14208#[inline]
14209#[target_feature(enable = "avx512fp16")]
14210#[cfg_attr(test, assert_instr(vcvtph2dq))]
14211#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14212pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14213 unsafe {
14214 transmute(src:vcvtph2dq_512(
14215 a,
14216 src.as_i32x16(),
14217 k,
14218 _MM_FROUND_CUR_DIRECTION,
14219 ))
14220 }
14221}
14222
14223/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14224/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14225///
14226/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14227#[inline]
14228#[target_feature(enable = "avx512fp16")]
14229#[cfg_attr(test, assert_instr(vcvtph2dq))]
14230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14231pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14232 _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14233}
14234
14235/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14236/// results in dst.
14237///
14238/// Rounding is done according to the rounding parameter, which can be one of:
14239///
14240/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14241/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14242/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14243/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14244/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14245///
14246/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14247#[inline]
14248#[target_feature(enable = "avx512fp16")]
14249#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14250#[rustc_legacy_const_generics(1)]
14251#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14252pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14253 static_assert_rounding!(ROUNDING);
14254 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14255}
14256
14257/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14258/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14259///
14260/// Rounding is done according to the rounding parameter, which can be one of:
14261///
14262/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14263/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14264/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14265/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14267///
14268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14269#[inline]
14270#[target_feature(enable = "avx512fp16")]
14271#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14272#[rustc_legacy_const_generics(3)]
14273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14274pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14275 src: __m512i,
14276 k: __mmask16,
14277 a: __m256h,
14278) -> __m512i {
14279 unsafe {
14280 static_assert_rounding!(ROUNDING);
14281 transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14282 }
14283}
14284
14285/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14286/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14287///
14288/// Rounding is done according to the rounding parameter, which can be one of:
14289///
14290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14295///
14296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14297#[inline]
14298#[target_feature(enable = "avx512fp16")]
14299#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14300#[rustc_legacy_const_generics(2)]
14301#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14302pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14303 static_assert_rounding!(ROUNDING);
14304 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14305}
14306
14307/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14308/// the result in dst.
14309///
14310/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14311#[inline]
14312#[target_feature(enable = "avx512fp16")]
14313#[cfg_attr(test, assert_instr(vcvtsh2si))]
14314#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14315pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14316 unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14317}
14318
14319/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14320/// the result in dst.
14321///
14322/// Rounding is done according to the rounding parameter, which can be one of:
14323///
14324/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14325/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14326/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14327/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14329///
14330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14331#[inline]
14332#[target_feature(enable = "avx512fp16")]
14333#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
14334#[rustc_legacy_const_generics(1)]
14335#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14336pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14337 unsafe {
14338 static_assert_rounding!(ROUNDING);
14339 vcvtsh2si32(a, ROUNDING)
14340 }
14341}
14342
14343/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14344/// results in dst.
14345///
14346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14347#[inline]
14348#[target_feature(enable = "avx512fp16,avx512vl")]
14349#[cfg_attr(test, assert_instr(vcvtph2udq))]
14350#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14351pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14352 _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14353}
14354
14355/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14356/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14357///
14358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14359#[inline]
14360#[target_feature(enable = "avx512fp16,avx512vl")]
14361#[cfg_attr(test, assert_instr(vcvtph2udq))]
14362#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14363pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14364 unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14365}
14366
14367/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14368/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14371#[inline]
14372#[target_feature(enable = "avx512fp16,avx512vl")]
14373#[cfg_attr(test, assert_instr(vcvtph2udq))]
14374#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14375pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14376 _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14377}
14378
14379/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14380/// the results in dst.
14381///
14382/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14383#[inline]
14384#[target_feature(enable = "avx512fp16,avx512vl")]
14385#[cfg_attr(test, assert_instr(vcvtph2udq))]
14386#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14387pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14388 _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14389}
14390
14391/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14392/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14393///
14394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14395#[inline]
14396#[target_feature(enable = "avx512fp16,avx512vl")]
14397#[cfg_attr(test, assert_instr(vcvtph2udq))]
14398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14399pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14400 unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14401}
14402
14403/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14404/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14405///
14406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14407#[inline]
14408#[target_feature(enable = "avx512fp16,avx512vl")]
14409#[cfg_attr(test, assert_instr(vcvtph2udq))]
14410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14411pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14412 _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14413}
14414
14415/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14416/// the results in dst.
14417///
14418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14419#[inline]
14420#[target_feature(enable = "avx512fp16")]
14421#[cfg_attr(test, assert_instr(vcvtph2udq))]
14422#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14423pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14424 _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14425}
14426
14427/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14428/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14429///
14430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14431#[inline]
14432#[target_feature(enable = "avx512fp16")]
14433#[cfg_attr(test, assert_instr(vcvtph2udq))]
14434#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14435pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14436 unsafe {
14437 transmute(src:vcvtph2udq_512(
14438 a,
14439 src.as_u32x16(),
14440 k,
14441 _MM_FROUND_CUR_DIRECTION,
14442 ))
14443 }
14444}
14445
14446/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14447/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14448///
14449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14450#[inline]
14451#[target_feature(enable = "avx512fp16")]
14452#[cfg_attr(test, assert_instr(vcvtph2udq))]
14453#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14454pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14455 _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14456}
14457
14458/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14459/// the results in dst.
14460///
14461/// Rounding is done according to the rounding parameter, which can be one of:
14462///
14463/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14464/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14465/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14466/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14467/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14468///
14469/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14470#[inline]
14471#[target_feature(enable = "avx512fp16")]
14472#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14473#[rustc_legacy_const_generics(1)]
14474#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14475pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14476 static_assert_rounding!(ROUNDING);
14477 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14478}
14479
14480/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14481/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14482///
14483/// Rounding is done according to the rounding parameter, which can be one of:
14484///
14485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14490///
14491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14492#[inline]
14493#[target_feature(enable = "avx512fp16")]
14494#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14495#[rustc_legacy_const_generics(3)]
14496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14497pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14498 src: __m512i,
14499 k: __mmask16,
14500 a: __m256h,
14501) -> __m512i {
14502 unsafe {
14503 static_assert_rounding!(ROUNDING);
14504 transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14505 }
14506}
14507
14508/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14509/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14510///
14511/// Rounding is done according to the rounding parameter, which can be one of:
14512///
14513/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14514/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14515/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14516/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14517/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14518///
14519/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14520#[inline]
14521#[target_feature(enable = "avx512fp16")]
14522#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14525pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14526 static_assert_rounding!(ROUNDING);
14527 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14528}
14529
14530/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14531/// the result in dst.
14532///
14533/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14534#[inline]
14535#[target_feature(enable = "avx512fp16")]
14536#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14537#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14538pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14539 unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14540}
14541
14542/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14543/// the result in dst.
14544///
14545/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14546///
14547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14548#[inline]
14549#[target_feature(enable = "avx512fp16")]
14550#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))]
14551#[rustc_legacy_const_generics(1)]
14552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14553pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14554 unsafe {
14555 static_assert_rounding!(SAE);
14556 vcvtsh2usi32(a, SAE)
14557 }
14558}
14559
14560/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14561/// store the results in dst.
14562///
14563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14564#[inline]
14565#[target_feature(enable = "avx512fp16,avx512vl")]
14566#[cfg_attr(test, assert_instr(vcvttph2dq))]
14567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14568pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14569 _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14570}
14571
14572/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14573/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14574///
14575/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14576#[inline]
14577#[target_feature(enable = "avx512fp16,avx512vl")]
14578#[cfg_attr(test, assert_instr(vcvttph2dq))]
14579#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14580pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14581 unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14582}
14583
14584/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14585/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14586///
14587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14588#[inline]
14589#[target_feature(enable = "avx512fp16,avx512vl")]
14590#[cfg_attr(test, assert_instr(vcvttph2dq))]
14591#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14592pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14593 _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14594}
14595
14596/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14597/// store the results in dst.
14598///
14599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14600#[inline]
14601#[target_feature(enable = "avx512fp16,avx512vl")]
14602#[cfg_attr(test, assert_instr(vcvttph2dq))]
14603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14604pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14605 _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14606}
14607
14608/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14609/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14610///
14611/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14612#[inline]
14613#[target_feature(enable = "avx512fp16,avx512vl")]
14614#[cfg_attr(test, assert_instr(vcvttph2dq))]
14615#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14616pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14617 unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14618}
14619
14620/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14621/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14622///
14623/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14624#[inline]
14625#[target_feature(enable = "avx512fp16,avx512vl")]
14626#[cfg_attr(test, assert_instr(vcvttph2dq))]
14627#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14628pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14629 _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14630}
14631
14632/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14633/// store the results in dst.
14634///
14635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14636#[inline]
14637#[target_feature(enable = "avx512fp16")]
14638#[cfg_attr(test, assert_instr(vcvttph2dq))]
14639#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14640pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14641 _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14642}
14643
14644/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14645/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14646///
14647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14648#[inline]
14649#[target_feature(enable = "avx512fp16")]
14650#[cfg_attr(test, assert_instr(vcvttph2dq))]
14651#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14652pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14653 unsafe {
14654 transmute(src:vcvttph2dq_512(
14655 a,
14656 src.as_i32x16(),
14657 k,
14658 _MM_FROUND_CUR_DIRECTION,
14659 ))
14660 }
14661}
14662
14663/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14664/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14665///
14666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14667#[inline]
14668#[target_feature(enable = "avx512fp16")]
14669#[cfg_attr(test, assert_instr(vcvttph2dq))]
14670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14671pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14672 _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14673}
14674
14675/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14676/// store the results in dst.
14677///
14678/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14679///
14680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14681#[inline]
14682#[target_feature(enable = "avx512fp16")]
14683#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14684#[rustc_legacy_const_generics(1)]
14685#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14686pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14687 static_assert_sae!(SAE);
14688 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14689}
14690
14691/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14692/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14693///
14694/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14695///
14696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
14697#[inline]
14698#[target_feature(enable = "avx512fp16")]
14699#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14700#[rustc_legacy_const_generics(3)]
14701#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14702pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
14703 src: __m512i,
14704 k: __mmask16,
14705 a: __m256h,
14706) -> __m512i {
14707 unsafe {
14708 static_assert_sae!(SAE);
14709 transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
14710 }
14711}
14712
14713/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14714/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14715///
14716/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14717///
14718/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
14719#[inline]
14720#[target_feature(enable = "avx512fp16")]
14721#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14722#[rustc_legacy_const_generics(2)]
14723#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14724pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14725 static_assert_sae!(SAE);
14726 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
14727}
14728
14729/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14730/// the result in dst.
14731///
14732/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
14733#[inline]
14734#[target_feature(enable = "avx512fp16")]
14735#[cfg_attr(test, assert_instr(vcvttsh2si))]
14736#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14737pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
14738 unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14739}
14740
14741/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14742/// the result in dst.
14743///
14744/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14745///
14746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
14747#[inline]
14748#[target_feature(enable = "avx512fp16")]
14749#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
14750#[rustc_legacy_const_generics(1)]
14751#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14752pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
14753 unsafe {
14754 static_assert_sae!(SAE);
14755 vcvttsh2si32(a, SAE)
14756 }
14757}
14758
14759/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14760/// store the results in dst.
14761///
14762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
14763#[inline]
14764#[target_feature(enable = "avx512fp16,avx512vl")]
14765#[cfg_attr(test, assert_instr(vcvttph2udq))]
14766#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14767pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
14768 _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14769}
14770
14771/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14772/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14773///
14774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
14775#[inline]
14776#[target_feature(enable = "avx512fp16,avx512vl")]
14777#[cfg_attr(test, assert_instr(vcvttph2udq))]
14778#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14779pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14780 unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
14781}
14782
14783/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14784/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14785///
14786/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
14787#[inline]
14788#[target_feature(enable = "avx512fp16,avx512vl")]
14789#[cfg_attr(test, assert_instr(vcvttph2udq))]
14790#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14791pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14792 _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
14793}
14794
14795/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14796/// store the results in dst.
14797///
14798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
14799#[inline]
14800#[target_feature(enable = "avx512fp16,avx512vl")]
14801#[cfg_attr(test, assert_instr(vcvttph2udq))]
14802#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14803pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
14804 _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14805}
14806
14807/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14808/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14809///
14810/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
14811#[inline]
14812#[target_feature(enable = "avx512fp16,avx512vl")]
14813#[cfg_attr(test, assert_instr(vcvttph2udq))]
14814#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14815pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14816 unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
14817}
14818
14819/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14820/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14821///
14822/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
14823#[inline]
14824#[target_feature(enable = "avx512fp16,avx512vl")]
14825#[cfg_attr(test, assert_instr(vcvttph2udq))]
14826#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14827pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14828 _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
14829}
14830
14831/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14832/// store the results in dst.
14833///
14834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
14835#[inline]
14836#[target_feature(enable = "avx512fp16")]
14837#[cfg_attr(test, assert_instr(vcvttph2udq))]
14838#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14839pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
14840 _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14841}
14842
14843/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14844/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14845///
14846/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
14847#[inline]
14848#[target_feature(enable = "avx512fp16")]
14849#[cfg_attr(test, assert_instr(vcvttph2udq))]
14850#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14851pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14852 unsafe {
14853 transmute(src:vcvttph2udq_512(
14854 a,
14855 src.as_u32x16(),
14856 k,
14857 _MM_FROUND_CUR_DIRECTION,
14858 ))
14859 }
14860}
14861
14862/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14863/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14864///
14865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
14866#[inline]
14867#[target_feature(enable = "avx512fp16")]
14868#[cfg_attr(test, assert_instr(vcvttph2udq))]
14869#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14870pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14871 _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
14872}
14873
14874/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14875/// store the results in dst.
14876///
14877/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14878///
14879/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
14880#[inline]
14881#[target_feature(enable = "avx512fp16")]
14882#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14883#[rustc_legacy_const_generics(1)]
14884#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14885pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
14886 static_assert_sae!(SAE);
14887 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14888}
14889
14890/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14891/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14892///
14893/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14894///
14895/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
14896#[inline]
14897#[target_feature(enable = "avx512fp16")]
14898#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14899#[rustc_legacy_const_generics(3)]
14900#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14901pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
14902 src: __m512i,
14903 k: __mmask16,
14904 a: __m256h,
14905) -> __m512i {
14906 unsafe {
14907 static_assert_sae!(SAE);
14908 transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
14909 }
14910}
14911
14912/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14913/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14914///
14915/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14916///
14917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
14918#[inline]
14919#[target_feature(enable = "avx512fp16")]
14920#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14921#[rustc_legacy_const_generics(2)]
14922#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14923pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14924 static_assert_sae!(SAE);
14925 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
14926}
14927
14928/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14929/// the result in dst.
14930///
14931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
14932#[inline]
14933#[target_feature(enable = "avx512fp16")]
14934#[cfg_attr(test, assert_instr(vcvttsh2usi))]
14935#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14936pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
14937 unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14938}
14939
14940/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
14941/// the result in dst.
14942///
14943/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14944///
14945/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
14946#[inline]
14947#[target_feature(enable = "avx512fp16")]
14948#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
14949#[rustc_legacy_const_generics(1)]
14950#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14951pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14952 unsafe {
14953 static_assert_sae!(SAE);
14954 vcvttsh2usi32(a, SAE)
14955 }
14956}
14957
14958/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14959/// store the results in dst.
14960///
14961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
14962#[inline]
14963#[target_feature(enable = "avx512fp16,avx512vl")]
14964#[cfg_attr(test, assert_instr(vcvtph2qq))]
14965#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14966pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
14967 _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a)
14968}
14969
14970/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14971/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14972///
14973/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
14974#[inline]
14975#[target_feature(enable = "avx512fp16,avx512vl")]
14976#[cfg_attr(test, assert_instr(vcvtph2qq))]
14977#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14978pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14979 unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
14980}
14981
14982/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14983/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14984///
14985/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
14986#[inline]
14987#[target_feature(enable = "avx512fp16,avx512vl")]
14988#[cfg_attr(test, assert_instr(vcvtph2qq))]
14989#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14990pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
14991 _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
14992}
14993
14994/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
14995/// store the results in dst.
14996///
14997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
14998#[inline]
14999#[target_feature(enable = "avx512fp16,avx512vl")]
15000#[cfg_attr(test, assert_instr(vcvtph2qq))]
15001#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15002pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15003 _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15004}
15005
15006/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15007/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15008///
15009/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15010#[inline]
15011#[target_feature(enable = "avx512fp16,avx512vl")]
15012#[cfg_attr(test, assert_instr(vcvtph2qq))]
15013#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15014pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15015 unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15016}
15017
15018/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15019/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15020///
15021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15022#[inline]
15023#[target_feature(enable = "avx512fp16,avx512vl")]
15024#[cfg_attr(test, assert_instr(vcvtph2qq))]
15025#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15026pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15027 _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15028}
15029
15030/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15031/// store the results in dst.
15032///
15033/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15034#[inline]
15035#[target_feature(enable = "avx512fp16")]
15036#[cfg_attr(test, assert_instr(vcvtph2qq))]
15037#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15038pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15039 _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15040}
15041
15042/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15043/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15044///
15045/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15046#[inline]
15047#[target_feature(enable = "avx512fp16")]
15048#[cfg_attr(test, assert_instr(vcvtph2qq))]
15049#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15050pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15051 unsafe {
15052 transmute(src:vcvtph2qq_512(
15053 a,
15054 src.as_i64x8(),
15055 k,
15056 _MM_FROUND_CUR_DIRECTION,
15057 ))
15058 }
15059}
15060
15061/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15062/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15063///
15064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15065#[inline]
15066#[target_feature(enable = "avx512fp16")]
15067#[cfg_attr(test, assert_instr(vcvtph2qq))]
15068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15069pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15070 _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15071}
15072
15073/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15074/// store the results in dst.
15075///
15076/// Rounding is done according to the rounding parameter, which can be one of:
15077///
15078/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15079/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15080/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15081/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15082/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15083///
15084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15085#[inline]
15086#[target_feature(enable = "avx512fp16")]
15087#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15088#[rustc_legacy_const_generics(1)]
15089#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15090pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15091 static_assert_rounding!(ROUNDING);
15092 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15093}
15094
15095/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15096/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15097///
15098/// Rounding is done according to the rounding parameter, which can be one of:
15099///
15100/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15101/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15102/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15103/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15105///
15106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15107#[inline]
15108#[target_feature(enable = "avx512fp16")]
15109#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15110#[rustc_legacy_const_generics(3)]
15111#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15112pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15113 src: __m512i,
15114 k: __mmask8,
15115 a: __m128h,
15116) -> __m512i {
15117 unsafe {
15118 static_assert_rounding!(ROUNDING);
15119 transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15120 }
15121}
15122
15123/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15124/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15125///
15126/// Rounding is done according to the rounding parameter, which can be one of:
15127///
15128/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15129/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15130/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15131/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15132/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15133///
15134/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15135#[inline]
15136#[target_feature(enable = "avx512fp16")]
15137#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15138#[rustc_legacy_const_generics(2)]
15139#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15140pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15141 static_assert_rounding!(ROUNDING);
15142 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15143}
15144
15145/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15146/// store the results in dst.
15147///
15148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15149#[inline]
15150#[target_feature(enable = "avx512fp16,avx512vl")]
15151#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15152#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15153pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15154 _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15155}
15156
15157/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15158/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15159///
15160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15161#[inline]
15162#[target_feature(enable = "avx512fp16,avx512vl")]
15163#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15164#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15165pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15166 unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15167}
15168
15169/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15170/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15171///
15172/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15173#[inline]
15174#[target_feature(enable = "avx512fp16,avx512vl")]
15175#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15176#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15177pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15178 _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15179}
15180
15181/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15182/// store the results in dst.
15183///
15184/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15185#[inline]
15186#[target_feature(enable = "avx512fp16,avx512vl")]
15187#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15188#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15189pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15190 _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15191}
15192
15193/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15194/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15195///
15196/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15197#[inline]
15198#[target_feature(enable = "avx512fp16,avx512vl")]
15199#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15200#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15201pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15202 unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15203}
15204
15205/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15206/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15207///
15208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15209#[inline]
15210#[target_feature(enable = "avx512fp16,avx512vl")]
15211#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15212#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15213pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15214 _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15215}
15216
15217/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15218/// store the results in dst.
15219///
15220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15221#[inline]
15222#[target_feature(enable = "avx512fp16")]
15223#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15224#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15225pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15226 _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15227}
15228
15229/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15230/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15231///
15232/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15233#[inline]
15234#[target_feature(enable = "avx512fp16")]
15235#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15236#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15237pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15238 unsafe {
15239 transmute(src:vcvtph2uqq_512(
15240 a,
15241 src.as_u64x8(),
15242 k,
15243 _MM_FROUND_CUR_DIRECTION,
15244 ))
15245 }
15246}
15247
15248/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15249/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15250///
15251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15252#[inline]
15253#[target_feature(enable = "avx512fp16")]
15254#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15256pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15257 _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15258}
15259
15260/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15261/// store the results in dst.
15262///
15263/// Rounding is done according to the rounding parameter, which can be one of:
15264///
15265/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15266/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15267/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15268/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15269/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15270///
15271/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15272#[inline]
15273#[target_feature(enable = "avx512fp16")]
15274#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15275#[rustc_legacy_const_generics(1)]
15276#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15277pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15278 static_assert_rounding!(ROUNDING);
15279 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15280}
15281
15282/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15283/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15284///
15285/// Rounding is done according to the rounding parameter, which can be one of:
15286///
15287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15292///
15293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15294#[inline]
15295#[target_feature(enable = "avx512fp16")]
15296#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15297#[rustc_legacy_const_generics(3)]
15298#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15299pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15300 src: __m512i,
15301 k: __mmask8,
15302 a: __m128h,
15303) -> __m512i {
15304 unsafe {
15305 static_assert_rounding!(ROUNDING);
15306 transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15307 }
15308}
15309
15310/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15311/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15312///
15313/// Rounding is done according to the rounding parameter, which can be one of:
15314///
15315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15320///
15321/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15322#[inline]
15323#[target_feature(enable = "avx512fp16")]
15324#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15325#[rustc_legacy_const_generics(2)]
15326#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15327pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15328 static_assert_rounding!(ROUNDING);
15329 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15330}
15331
15332/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15333/// store the results in dst.
15334///
15335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15336#[inline]
15337#[target_feature(enable = "avx512fp16,avx512vl")]
15338#[cfg_attr(test, assert_instr(vcvttph2qq))]
15339#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15340pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15341 _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15342}
15343
15344/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15345/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15346///
15347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15348#[inline]
15349#[target_feature(enable = "avx512fp16,avx512vl")]
15350#[cfg_attr(test, assert_instr(vcvttph2qq))]
15351#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15352pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15353 unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15354}
15355
15356/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15357/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15358///
15359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15360#[inline]
15361#[target_feature(enable = "avx512fp16,avx512vl")]
15362#[cfg_attr(test, assert_instr(vcvttph2qq))]
15363#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15364pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15365 _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15366}
15367
15368/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15369/// store the results in dst.
15370///
15371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15372#[inline]
15373#[target_feature(enable = "avx512fp16,avx512vl")]
15374#[cfg_attr(test, assert_instr(vcvttph2qq))]
15375#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15376pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15377 _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15378}
15379
15380/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15381/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15382///
15383/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15384#[inline]
15385#[target_feature(enable = "avx512fp16,avx512vl")]
15386#[cfg_attr(test, assert_instr(vcvttph2qq))]
15387#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15388pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15389 unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15390}
15391
15392/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15393/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15394///
15395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15396#[inline]
15397#[target_feature(enable = "avx512fp16,avx512vl")]
15398#[cfg_attr(test, assert_instr(vcvttph2qq))]
15399#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15400pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15401 _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15402}
15403
15404/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15405/// store the results in dst.
15406///
15407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15408#[inline]
15409#[target_feature(enable = "avx512fp16")]
15410#[cfg_attr(test, assert_instr(vcvttph2qq))]
15411#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15412pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15413 _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15414}
15415
15416/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15417/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15418///
15419/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15420#[inline]
15421#[target_feature(enable = "avx512fp16")]
15422#[cfg_attr(test, assert_instr(vcvttph2qq))]
15423#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15424pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15425 unsafe {
15426 transmute(src:vcvttph2qq_512(
15427 a,
15428 src.as_i64x8(),
15429 k,
15430 _MM_FROUND_CUR_DIRECTION,
15431 ))
15432 }
15433}
15434
15435/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15436/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15437///
15438/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15439#[inline]
15440#[target_feature(enable = "avx512fp16")]
15441#[cfg_attr(test, assert_instr(vcvttph2qq))]
15442#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15443pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15444 _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15445}
15446
15447/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15448/// store the results in dst.
15449///
15450/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15451///
15452/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15453#[inline]
15454#[target_feature(enable = "avx512fp16")]
15455#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15456#[rustc_legacy_const_generics(1)]
15457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15458pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15459 static_assert_sae!(SAE);
15460 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15461}
15462
15463/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15464/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15465///
15466/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15467///
15468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15469#[inline]
15470#[target_feature(enable = "avx512fp16")]
15471#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15472#[rustc_legacy_const_generics(3)]
15473#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15474pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15475 src: __m512i,
15476 k: __mmask8,
15477 a: __m128h,
15478) -> __m512i {
15479 unsafe {
15480 static_assert_sae!(SAE);
15481 transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15482 }
15483}
15484
15485/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15486/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15487///
15488/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15489///
15490/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15491#[inline]
15492#[target_feature(enable = "avx512fp16")]
15493#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15494#[rustc_legacy_const_generics(2)]
15495#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15496pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15497 static_assert_sae!(SAE);
15498 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15499}
15500
15501/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15502/// store the results in dst.
15503///
15504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15505#[inline]
15506#[target_feature(enable = "avx512fp16,avx512vl")]
15507#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15508#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15509pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15510 _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15511}
15512
15513/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15514/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15515///
15516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15517#[inline]
15518#[target_feature(enable = "avx512fp16,avx512vl")]
15519#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15520#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15521pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15522 unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15523}
15524
15525/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15526/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15527///
15528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15529#[inline]
15530#[target_feature(enable = "avx512fp16,avx512vl")]
15531#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15532#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15533pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15534 _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15535}
15536
15537/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15538/// store the results in dst.
15539///
15540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15541#[inline]
15542#[target_feature(enable = "avx512fp16,avx512vl")]
15543#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15544#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15545pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15546 _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15547}
15548
15549/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15550/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15551///
15552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15553#[inline]
15554#[target_feature(enable = "avx512fp16,avx512vl")]
15555#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15556#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15557pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15558 unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15559}
15560
15561/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15562/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15563///
15564/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15565#[inline]
15566#[target_feature(enable = "avx512fp16,avx512vl")]
15567#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15568#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15569pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15570 _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15571}
15572
15573/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15574/// store the results in dst.
15575///
15576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15577#[inline]
15578#[target_feature(enable = "avx512fp16")]
15579#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15580#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15581pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15582 _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15583}
15584
15585/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15586/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15587///
15588/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15589#[inline]
15590#[target_feature(enable = "avx512fp16")]
15591#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15592#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15593pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15594 unsafe {
15595 transmute(src:vcvttph2uqq_512(
15596 a,
15597 src.as_u64x8(),
15598 k,
15599 _MM_FROUND_CUR_DIRECTION,
15600 ))
15601 }
15602}
15603
15604/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15605/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15606///
15607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15608#[inline]
15609#[target_feature(enable = "avx512fp16")]
15610#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15612pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15613 _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15614}
15615
15616/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15617/// store the results in dst.
15618///
15619/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15620///
15621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15622#[inline]
15623#[target_feature(enable = "avx512fp16")]
15624#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15625#[rustc_legacy_const_generics(1)]
15626#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15627pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15628 static_assert_sae!(SAE);
15629 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15630}
15631
15632/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15633/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15634///
15635/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15636///
15637/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15638#[inline]
15639#[target_feature(enable = "avx512fp16")]
15640#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15641#[rustc_legacy_const_generics(3)]
15642#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15643pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15644 src: __m512i,
15645 k: __mmask8,
15646 a: __m128h,
15647) -> __m512i {
15648 unsafe {
15649 static_assert_sae!(SAE);
15650 transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15651 }
15652}
15653
15654/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15655/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15656///
15657/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15658///
15659/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15660#[inline]
15661#[target_feature(enable = "avx512fp16")]
15662#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15663#[rustc_legacy_const_generics(2)]
15664#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15665pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15666 static_assert_sae!(SAE);
15667 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15668}
15669
15670/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15671/// floating-point elements, and store the results in dst.
15672///
15673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15674#[inline]
15675#[target_feature(enable = "avx512fp16,avx512vl")]
15676#[cfg_attr(test, assert_instr(vcvtph2psx))]
15677#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15678pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15679 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a)
15680}
15681
15682/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15683/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15684/// dst when the corresponding mask bit is not set).
15685///
15686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15687#[inline]
15688#[target_feature(enable = "avx512fp16,avx512vl")]
15689#[cfg_attr(test, assert_instr(vcvtph2psx))]
15690#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15691pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
15692 unsafe { vcvtph2psx_128(a, src, k) }
15693}
15694
15695/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15696/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15697/// corresponding mask bit is not set).
15698///
15699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
15700#[inline]
15701#[target_feature(enable = "avx512fp16,avx512vl")]
15702#[cfg_attr(test, assert_instr(vcvtph2psx))]
15703#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15704pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
15705 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
15706}
15707
15708/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15709/// floating-point elements, and store the results in dst.
15710///
15711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
15712#[inline]
15713#[target_feature(enable = "avx512fp16,avx512vl")]
15714#[cfg_attr(test, assert_instr(vcvtph2psx))]
15715#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15716pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
15717 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a)
15718}
15719
15720/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15721/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15722/// dst when the corresponding mask bit is not set).
15723///
15724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
15725#[inline]
15726#[target_feature(enable = "avx512fp16,avx512vl")]
15727#[cfg_attr(test, assert_instr(vcvtph2psx))]
15728#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15729pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
15730 unsafe { vcvtph2psx_256(a, src, k) }
15731}
15732
15733/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15734/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15735/// corresponding mask bit is not set).
15736///
15737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
15738#[inline]
15739#[target_feature(enable = "avx512fp16,avx512vl")]
15740#[cfg_attr(test, assert_instr(vcvtph2psx))]
15741#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15742pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
15743 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
15744}
15745
15746/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15747/// floating-point elements, and store the results in dst.
15748///
15749/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
15750#[inline]
15751#[target_feature(enable = "avx512fp16")]
15752#[cfg_attr(test, assert_instr(vcvtph2psx))]
15753#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15754pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
15755 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a)
15756}
15757
15758/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15759/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15760/// dst when the corresponding mask bit is not set).
15761///
15762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
15763#[inline]
15764#[target_feature(enable = "avx512fp16")]
15765#[cfg_attr(test, assert_instr(vcvtph2psx))]
15766#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15767pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
15768 unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
15769}
15770
15771/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15772/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15773/// corresponding mask bit is not set).
15774///
15775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
15776#[inline]
15777#[target_feature(enable = "avx512fp16")]
15778#[cfg_attr(test, assert_instr(vcvtph2psx))]
15779#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15780pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
15781 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
15782}
15783
15784/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15785/// floating-point elements, and store the results in dst.
15786///
15787/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15788///
15789/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
15790#[inline]
15791#[target_feature(enable = "avx512fp16")]
15792#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15793#[rustc_legacy_const_generics(1)]
15794#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15795pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
15796 static_assert_sae!(SAE);
15797 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a)
15798}
15799
15800/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15801/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15802/// dst when the corresponding mask bit is not set).
15803///
15804/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15805///
15806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
15807#[inline]
15808#[target_feature(enable = "avx512fp16")]
15809#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15810#[rustc_legacy_const_generics(3)]
15811#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15812pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
15813 src: __m512,
15814 k: __mmask16,
15815 a: __m256h,
15816) -> __m512 {
15817 unsafe {
15818 static_assert_sae!(SAE);
15819 vcvtph2psx_512(a, src, k, SAE)
15820 }
15821}
15822
15823/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15824/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15825/// corresponding mask bit is not set).
15826///
15827/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15828///
15829/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
15830#[inline]
15831#[target_feature(enable = "avx512fp16")]
15832#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15833#[rustc_legacy_const_generics(2)]
15834#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15835pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
15836 static_assert_sae!(SAE);
15837 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
15838}
15839
15840/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15841/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
15842/// elements from a to the upper elements of dst.
15843///
15844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
15845#[inline]
15846#[target_feature(enable = "avx512fp16")]
15847#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15848#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15849pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
15850 _mm_mask_cvtsh_ss(src:a, k:0xff, a, b)
15851}
15852
15853/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15854/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15855/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15856/// upper elements of dst.
15857///
15858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
15859#[inline]
15860#[target_feature(enable = "avx512fp16")]
15861#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15862#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15863pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15864 unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
15865}
15866
15867/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15868/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15869/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15870/// of dst.
15871///
15872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
15873#[inline]
15874#[target_feature(enable = "avx512fp16")]
15875#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15876#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15877pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15878 _mm_mask_cvtsh_ss(src:_mm_set_ss(0.0), k, a, b)
15879}
15880
15881/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15882/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
15883/// from a to the upper elements of dst.
15884///
15885/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15886///
15887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
15888#[inline]
15889#[target_feature(enable = "avx512fp16")]
15890#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15891#[rustc_legacy_const_generics(2)]
15892#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15893pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
15894 static_assert_sae!(SAE);
15895 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b)
15896}
15897
15898/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15899/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15900/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15901/// upper elements of dst.
15902///
15903/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15904///
15905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
15906#[inline]
15907#[target_feature(enable = "avx512fp16")]
15908#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15909#[rustc_legacy_const_generics(4)]
15910#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15911pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
15912 src: __m128,
15913 k: __mmask8,
15914 a: __m128,
15915 b: __m128h,
15916) -> __m128 {
15917 unsafe {
15918 static_assert_sae!(SAE);
15919 vcvtsh2ss(a, b, src, k, SAE)
15920 }
15921}
15922
15923/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15924/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15925/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15926/// of dst.
15927///
15928/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15929///
15930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
15931#[inline]
15932#[target_feature(enable = "avx512fp16")]
15933#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15934#[rustc_legacy_const_generics(3)]
15935#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15936pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15937 static_assert_sae!(SAE);
15938 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_set_ss(0.0), k, a, b)
15939}
15940
15941/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15942/// floating-point elements, and store the results in dst.
15943///
15944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
15945#[inline]
15946#[target_feature(enable = "avx512fp16,avx512vl")]
15947#[cfg_attr(test, assert_instr(vcvtph2pd))]
15948#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15949pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
15950 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a)
15951}
15952
15953/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15954/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15955/// dst when the corresponding mask bit is not set).
15956///
15957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
15958#[inline]
15959#[target_feature(enable = "avx512fp16,avx512vl")]
15960#[cfg_attr(test, assert_instr(vcvtph2pd))]
15961#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15962pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
15963 unsafe { vcvtph2pd_128(a, src, k) }
15964}
15965
15966/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15967/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15968/// corresponding mask bit is not set).
15969///
15970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
15971#[inline]
15972#[target_feature(enable = "avx512fp16,avx512vl")]
15973#[cfg_attr(test, assert_instr(vcvtph2pd))]
15974#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15975pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
15976 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
15977}
15978
15979/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15980/// floating-point elements, and store the results in dst.
15981///
15982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
15983#[inline]
15984#[target_feature(enable = "avx512fp16,avx512vl")]
15985#[cfg_attr(test, assert_instr(vcvtph2pd))]
15986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15987pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
15988 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a)
15989}
15990
15991/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
15992/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15993/// dst when the corresponding mask bit is not set).
15994///
15995/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
15996#[inline]
15997#[target_feature(enable = "avx512fp16,avx512vl")]
15998#[cfg_attr(test, assert_instr(vcvtph2pd))]
15999#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16000pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16001 unsafe { vcvtph2pd_256(a, src, k) }
16002}
16003
16004/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16005/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16006/// corresponding mask bit is not set).
16007///
16008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16009#[inline]
16010#[target_feature(enable = "avx512fp16,avx512vl")]
16011#[cfg_attr(test, assert_instr(vcvtph2pd))]
16012#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16013pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16014 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16015}
16016
16017/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16018/// floating-point elements, and store the results in dst.
16019///
16020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16021#[inline]
16022#[target_feature(enable = "avx512fp16")]
16023#[cfg_attr(test, assert_instr(vcvtph2pd))]
16024#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16025pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16026 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a)
16027}
16028
16029/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16030/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16031/// dst when the corresponding mask bit is not set).
16032///
16033/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16034#[inline]
16035#[target_feature(enable = "avx512fp16")]
16036#[cfg_attr(test, assert_instr(vcvtph2pd))]
16037#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16038pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16039 unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16040}
16041
16042/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16043/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16044/// corresponding mask bit is not set).
16045///
16046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16047#[inline]
16048#[target_feature(enable = "avx512fp16")]
16049#[cfg_attr(test, assert_instr(vcvtph2pd))]
16050#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16051pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16052 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16053}
16054
16055/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16056/// floating-point elements, and store the results in dst.
16057///
16058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16059///
16060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16061#[inline]
16062#[target_feature(enable = "avx512fp16")]
16063#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16064#[rustc_legacy_const_generics(1)]
16065#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16066pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16067 static_assert_sae!(SAE);
16068 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a)
16069}
16070
16071/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16072/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16073/// dst when the corresponding mask bit is not set).
16074///
16075/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16076///
16077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16078#[inline]
16079#[target_feature(enable = "avx512fp16")]
16080#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16081#[rustc_legacy_const_generics(3)]
16082#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16083pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16084 src: __m512d,
16085 k: __mmask8,
16086 a: __m128h,
16087) -> __m512d {
16088 unsafe {
16089 static_assert_sae!(SAE);
16090 vcvtph2pd_512(a, src, k, SAE)
16091 }
16092}
16093
16094/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16095/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16096/// corresponding mask bit is not set).
16097///
16098/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16099///
16100/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16101#[inline]
16102#[target_feature(enable = "avx512fp16")]
16103#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16104#[rustc_legacy_const_generics(2)]
16105#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16106pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16107 static_assert_sae!(SAE);
16108 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16109}
16110
16111/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16112/// floating-point element, store the result in the lower element of dst, and copy the upper element
16113/// from a to the upper element of dst.
16114///
16115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16116#[inline]
16117#[target_feature(enable = "avx512fp16")]
16118#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16119#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16120pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16121 _mm_mask_cvtsh_sd(src:a, k:0xff, a, b)
16122}
16123
16124/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16125/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16126/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16127/// of dst.
16128///
16129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16130#[inline]
16131#[target_feature(enable = "avx512fp16")]
16132#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16133#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16134pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16135 unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16136}
16137
16138/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16139/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16140/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16141///
16142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16143#[inline]
16144#[target_feature(enable = "avx512fp16")]
16145#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16147pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16148 _mm_mask_cvtsh_sd(src:_mm_set_sd(0.0), k, a, b)
16149}
16150
16151/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16152/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16153/// to the upper element of dst.
16154///
16155/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16156///
16157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16158#[inline]
16159#[target_feature(enable = "avx512fp16")]
16160#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16161#[rustc_legacy_const_generics(2)]
16162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16163pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16164 static_assert_sae!(SAE);
16165 _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b)
16166}
16167
16168/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16169/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16170/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16171/// of dst.
16172///
16173/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16174///
16175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16176#[inline]
16177#[target_feature(enable = "avx512fp16")]
16178#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16179#[rustc_legacy_const_generics(4)]
16180#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16181pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16182 src: __m128d,
16183 k: __mmask8,
16184 a: __m128d,
16185 b: __m128h,
16186) -> __m128d {
16187 unsafe {
16188 static_assert_sae!(SAE);
16189 vcvtsh2sd(a, b, src, k, SAE)
16190 }
16191}
16192
16193/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16194/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16195/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16196///
16197/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16198///
16199/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16200#[inline]
16201#[target_feature(enable = "avx512fp16")]
16202#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16203#[rustc_legacy_const_generics(3)]
16204#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16205pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16206 static_assert_sae!(SAE);
16207 _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_set_sd(0.0), k, a, b)
16208}
16209
16210/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16211///
16212/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16213#[inline]
16214#[target_feature(enable = "avx512fp16")]
16215#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16216pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
16217 unsafe { simd_extract!(a, 0) }
16218}
16219
16220/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16221///
16222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16223#[inline]
16224#[target_feature(enable = "avx512fp16")]
16225#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16226pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16227 unsafe { simd_extract!(a, 0) }
16228}
16229
16230/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16231///
16232/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16233#[inline]
16234#[target_feature(enable = "avx512fp16")]
16235#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16236pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16237 unsafe { simd_extract!(a, 0) }
16238}
16239
16240/// Copy the lower 16-bit integer in a to dst.
16241///
16242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16243#[inline]
16244#[target_feature(enable = "avx512fp16")]
16245#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16246pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16247 unsafe { simd_extract!(a.as_i16x8(), 0) }
16248}
16249
16250/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16251///
16252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16253#[inline]
16254#[target_feature(enable = "avx512fp16")]
16255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16256pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16257 unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) }
16258}
16259
16260#[allow(improper_ctypes)]
16261unsafe extern "C" {
16262 #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16263 unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16264 #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16265 unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16266
16267 #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16268 unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16269 #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16270 unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16271 #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16272 unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16273 #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16274 unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16275
16276 #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16277 unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16278 #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16279 unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16280 #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16281 unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16282 #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16283 unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16284
16285 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16286 unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16287 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16288 unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16289 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16290 unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16291 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16292 unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16293
16294 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16295 unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16296 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16297 unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16298 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16299 unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16300 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16301 unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16302
16303 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16304 unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16305 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16306 unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16307 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16308 unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16309 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16310 unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16311 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16312 unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16313 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16314 unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16315 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16316 unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16317 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16318 unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16319
16320 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16321 unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16322 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16323 unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16324 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16325 unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16326 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16327 unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16328 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16329 unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16330 -> __m512;
16331 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16332 unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16333 -> __m512;
16334 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16335 unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16336 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16337 unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16338
16339 #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16340 unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16341 #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16342 unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16343
16344 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.128"]
16345 unsafefn vfmaddsubph_128(a: __m128h, b: __m128h, c: __m128h) -> __m128h;
16346 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.256"]
16347 unsafefn vfmaddsubph_256(a: __m256h, b: __m256h, c: __m256h) -> __m256h;
16348 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16349 unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16350
16351 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16352 unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16353 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16354 unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16355 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16356 unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16357 #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16358 unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16359
16360 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16361 unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16362 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16363 unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16364 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16365 unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16366 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16367 unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16368
16369 #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16370 unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16371 #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16372 unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16373
16374 #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16375 unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16376 #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16377 unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16378 #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16379 unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16380 #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16381 unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16382
16383 #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16384 unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16385 #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16386 unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16387 #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16388 unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16389 #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16390 unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16391
16392 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16393 unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16394 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16395 unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16396 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16397 unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16398 #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16399 unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16400
16401 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16402 unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16403 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16404 unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16405 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16406 unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16407 #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16408 unsafefn vgetmantsh(
16409 a: __m128h,
16410 b: __m128h,
16411 imm8: i32,
16412 src: __m128h,
16413 k: __mmask8,
16414 sae: i32,
16415 ) -> __m128h;
16416
16417 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16418 unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16419 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16420 unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16421 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16422 unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16423 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16424 unsafefn vrndscalesh(
16425 a: __m128h,
16426 b: __m128h,
16427 src: __m128h,
16428 k: __mmask8,
16429 imm8: i32,
16430 sae: i32,
16431 ) -> __m128h;
16432
16433 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16434 unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16435 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16436 unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16437 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16438 unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16439 #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16440 unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16441
16442 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16443 unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16444 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16445 unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16446 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16447 unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16448 #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16449 unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16450 -> __m128h;
16451
16452 #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16453 unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16454
16455 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16456 unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16457 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16458 unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16459 #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16460 unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16461 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16"]
16462 unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16463 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16"]
16464 unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16465 #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16"]
16466 unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16467
16468 #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16469 unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16470 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16471 unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16472 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16473 unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16474 #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16475 unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16476 #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16477 unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16478 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32"]
16479 unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16480 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32"]
16481 unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16482 #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16483 unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16484
16485 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16486 unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16487 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16488 unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16489 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16490 unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16491 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16492 unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16493 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16494 unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16495 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64"]
16496 unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16497
16498 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16499 unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16500 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16501 unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16502 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16503 unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16504 #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16505 unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16506
16507 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16508 unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16509 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16510 unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16511 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16512 unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16513 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16514 unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16515
16516 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16517 unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16518 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16519 unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16520 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16521 unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16522 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16523 unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16524 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16525 unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16526 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16527 unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16528
16529 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16530 unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16531 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16532 unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16533 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16534 unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16535 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16536 unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16537 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16538 unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16539 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16540 unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16541
16542 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16543 unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16544 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16545 unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16546 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16547 unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16548 #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16549 unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16550 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16551 unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16552 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16553 unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16554 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16555 unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16556 #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16557 unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16558
16559 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16560 unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16561 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16562 unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16563 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16564 unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16565 #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16566 unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16567 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16568 unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16569 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16570 unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16571 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16572 unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16573 #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16574 unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16575
16576 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16577 unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16578 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16579 unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16580 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16581 unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16582 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16583 unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16584 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16585 unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16586 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16587 unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16588
16589 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16590 unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16591 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16592 unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16593 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16594 unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16595 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16596 unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16597 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16598 unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16599 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16600 unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16601
16602 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16603 unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16604 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16605 unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16606 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16607 unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16608 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16609 unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16610
16611 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16612 unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16613 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16614 unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16615 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16616 unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16617 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16618 unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16619
16620}
16621
16622#[cfg(test)]
16623mod tests {
16624 use crate::core_arch::x86::*;
16625 use crate::mem::transmute;
16626 use crate::ptr::{addr_of, addr_of_mut};
16627 use stdarch_test::simd_test;
16628
16629 #[target_feature(enable = "avx512fp16")]
16630 unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16631 _mm_setr_ph(re, im, re, im, re, im, re, im)
16632 }
16633
16634 #[target_feature(enable = "avx512fp16")]
16635 unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16636 _mm256_setr_ph(
16637 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16638 )
16639 }
16640
16641 #[target_feature(enable = "avx512fp16")]
16642 unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16643 _mm512_setr_ph(
16644 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16645 re, im, re, im, re, im, re, im, re, im,
16646 )
16647 }
16648
16649 #[simd_test(enable = "avx512fp16,avx512vl")]
16650 unsafe fn test_mm_set_ph() {
16651 let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16652 let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16653 assert_eq_m128h(r, e);
16654 }
16655
16656 #[simd_test(enable = "avx512fp16,avx512vl")]
16657 unsafe fn test_mm256_set_ph() {
16658 let r = _mm256_set_ph(
16659 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16660 );
16661 let e = _mm256_setr_ph(
16662 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16663 );
16664 assert_eq_m256h(r, e);
16665 }
16666
16667 #[simd_test(enable = "avx512fp16")]
16668 unsafe fn test_mm512_set_ph() {
16669 let r = _mm512_set_ph(
16670 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16671 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16672 31.0, 32.0,
16673 );
16674 let e = _mm512_setr_ph(
16675 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16676 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16677 3.0, 2.0, 1.0,
16678 );
16679 assert_eq_m512h(r, e);
16680 }
16681
16682 #[simd_test(enable = "avx512fp16,avx512vl")]
16683 unsafe fn test_mm_set_sh() {
16684 let r = _mm_set_sh(1.0);
16685 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
16686 assert_eq_m128h(r, e);
16687 }
16688
16689 #[simd_test(enable = "avx512fp16,avx512vl")]
16690 unsafe fn test_mm_set1_ph() {
16691 let r = _mm_set1_ph(1.0);
16692 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
16693 assert_eq_m128h(r, e);
16694 }
16695
16696 #[simd_test(enable = "avx512fp16,avx512vl")]
16697 unsafe fn test_mm256_set1_ph() {
16698 let r = _mm256_set1_ph(1.0);
16699 let e = _mm256_set_ph(
16700 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16701 );
16702 assert_eq_m256h(r, e);
16703 }
16704
16705 #[simd_test(enable = "avx512fp16")]
16706 unsafe fn test_mm512_set1_ph() {
16707 let r = _mm512_set1_ph(1.0);
16708 let e = _mm512_set_ph(
16709 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16710 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16711 );
16712 assert_eq_m512h(r, e);
16713 }
16714
16715 #[simd_test(enable = "avx512fp16,avx512vl")]
16716 unsafe fn test_mm_setr_ph() {
16717 let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16718 let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16719 assert_eq_m128h(r, e);
16720 }
16721
16722 #[simd_test(enable = "avx512fp16,avx512vl")]
16723 unsafe fn test_mm256_setr_ph() {
16724 let r = _mm256_setr_ph(
16725 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16726 );
16727 let e = _mm256_set_ph(
16728 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16729 );
16730 assert_eq_m256h(r, e);
16731 }
16732
16733 #[simd_test(enable = "avx512fp16")]
16734 unsafe fn test_mm512_setr_ph() {
16735 let r = _mm512_setr_ph(
16736 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16737 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16738 31.0, 32.0,
16739 );
16740 let e = _mm512_set_ph(
16741 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16742 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16743 3.0, 2.0, 1.0,
16744 );
16745 assert_eq_m512h(r, e);
16746 }
16747
16748 #[simd_test(enable = "avx512fp16,avx512vl")]
16749 unsafe fn test_mm_setzero_ph() {
16750 let r = _mm_setzero_ph();
16751 let e = _mm_set1_ph(0.0);
16752 assert_eq_m128h(r, e);
16753 }
16754
16755 #[simd_test(enable = "avx512fp16,avx512vl")]
16756 unsafe fn test_mm256_setzero_ph() {
16757 let r = _mm256_setzero_ph();
16758 let e = _mm256_set1_ph(0.0);
16759 assert_eq_m256h(r, e);
16760 }
16761
16762 #[simd_test(enable = "avx512fp16")]
16763 unsafe fn test_mm512_setzero_ph() {
16764 let r = _mm512_setzero_ph();
16765 let e = _mm512_set1_ph(0.0);
16766 assert_eq_m512h(r, e);
16767 }
16768
16769 #[simd_test(enable = "avx512fp16,avx512vl")]
16770 unsafe fn test_mm_castsi128_ph() {
16771 let a = _mm_set1_epi16(0x3c00);
16772 let r = _mm_castsi128_ph(a);
16773 let e = _mm_set1_ph(1.0);
16774 assert_eq_m128h(r, e);
16775 }
16776
16777 #[simd_test(enable = "avx512fp16,avx512vl")]
16778 unsafe fn test_mm256_castsi256_ph() {
16779 let a = _mm256_set1_epi16(0x3c00);
16780 let r = _mm256_castsi256_ph(a);
16781 let e = _mm256_set1_ph(1.0);
16782 assert_eq_m256h(r, e);
16783 }
16784
16785 #[simd_test(enable = "avx512fp16")]
16786 unsafe fn test_mm512_castsi512_ph() {
16787 let a = _mm512_set1_epi16(0x3c00);
16788 let r = _mm512_castsi512_ph(a);
16789 let e = _mm512_set1_ph(1.0);
16790 assert_eq_m512h(r, e);
16791 }
16792
16793 #[simd_test(enable = "avx512fp16")]
16794 unsafe fn test_mm_castph_si128() {
16795 let a = _mm_set1_ph(1.0);
16796 let r = _mm_castph_si128(a);
16797 let e = _mm_set1_epi16(0x3c00);
16798 assert_eq_m128i(r, e);
16799 }
16800
16801 #[simd_test(enable = "avx512fp16")]
16802 unsafe fn test_mm256_castph_si256() {
16803 let a = _mm256_set1_ph(1.0);
16804 let r = _mm256_castph_si256(a);
16805 let e = _mm256_set1_epi16(0x3c00);
16806 assert_eq_m256i(r, e);
16807 }
16808
16809 #[simd_test(enable = "avx512fp16")]
16810 unsafe fn test_mm512_castph_si512() {
16811 let a = _mm512_set1_ph(1.0);
16812 let r = _mm512_castph_si512(a);
16813 let e = _mm512_set1_epi16(0x3c00);
16814 assert_eq_m512i(r, e);
16815 }
16816
16817 #[simd_test(enable = "avx512fp16,avx512vl")]
16818 unsafe fn test_mm_castps_ph() {
16819 let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
16820 let r = _mm_castps_ph(a);
16821 let e = _mm_set1_ph(1.0);
16822 assert_eq_m128h(r, e);
16823 }
16824
16825 #[simd_test(enable = "avx512fp16,avx512vl")]
16826 unsafe fn test_mm256_castps_ph() {
16827 let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
16828 let r = _mm256_castps_ph(a);
16829 let e = _mm256_set1_ph(1.0);
16830 assert_eq_m256h(r, e);
16831 }
16832
16833 #[simd_test(enable = "avx512fp16")]
16834 unsafe fn test_mm512_castps_ph() {
16835 let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
16836 let r = _mm512_castps_ph(a);
16837 let e = _mm512_set1_ph(1.0);
16838 assert_eq_m512h(r, e);
16839 }
16840
16841 #[simd_test(enable = "avx512fp16")]
16842 unsafe fn test_mm_castph_ps() {
16843 let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
16844 let r = _mm_castph_ps(a);
16845 let e = _mm_set1_ps(1.0);
16846 assert_eq_m128(r, e);
16847 }
16848
16849 #[simd_test(enable = "avx512fp16")]
16850 unsafe fn test_mm256_castph_ps() {
16851 let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
16852 let r = _mm256_castph_ps(a);
16853 let e = _mm256_set1_ps(1.0);
16854 assert_eq_m256(r, e);
16855 }
16856
16857 #[simd_test(enable = "avx512fp16")]
16858 unsafe fn test_mm512_castph_ps() {
16859 let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
16860 let r = _mm512_castph_ps(a);
16861 let e = _mm512_set1_ps(1.0);
16862 assert_eq_m512(r, e);
16863 }
16864
16865 #[simd_test(enable = "avx512fp16,avx512vl")]
16866 unsafe fn test_mm_castpd_ph() {
16867 let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
16868 let r = _mm_castpd_ph(a);
16869 let e = _mm_set1_ph(1.0);
16870 assert_eq_m128h(r, e);
16871 }
16872
16873 #[simd_test(enable = "avx512fp16,avx512vl")]
16874 unsafe fn test_mm256_castpd_ph() {
16875 let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
16876 let r = _mm256_castpd_ph(a);
16877 let e = _mm256_set1_ph(1.0);
16878 assert_eq_m256h(r, e);
16879 }
16880
16881 #[simd_test(enable = "avx512fp16")]
16882 unsafe fn test_mm512_castpd_ph() {
16883 let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
16884 let r = _mm512_castpd_ph(a);
16885 let e = _mm512_set1_ph(1.0);
16886 assert_eq_m512h(r, e);
16887 }
16888
16889 #[simd_test(enable = "avx512fp16")]
16890 unsafe fn test_mm_castph_pd() {
16891 let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
16892 let r = _mm_castph_pd(a);
16893 let e = _mm_set1_pd(1.0);
16894 assert_eq_m128d(r, e);
16895 }
16896
16897 #[simd_test(enable = "avx512fp16")]
16898 unsafe fn test_mm256_castph_pd() {
16899 let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
16900 let r = _mm256_castph_pd(a);
16901 let e = _mm256_set1_pd(1.0);
16902 assert_eq_m256d(r, e);
16903 }
16904
16905 #[simd_test(enable = "avx512fp16")]
16906 unsafe fn test_mm512_castph_pd() {
16907 let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
16908 let r = _mm512_castph_pd(a);
16909 let e = _mm512_set1_pd(1.0);
16910 assert_eq_m512d(r, e);
16911 }
16912
16913 #[simd_test(enable = "avx512fp16,avx512vl")]
16914 unsafe fn test_mm256_castph256_ph128() {
16915 let a = _mm256_setr_ph(
16916 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16917 );
16918 let r = _mm256_castph256_ph128(a);
16919 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16920 assert_eq_m128h(r, e);
16921 }
16922
16923 #[simd_test(enable = "avx512fp16,avx512vl")]
16924 unsafe fn test_mm512_castph512_ph128() {
16925 let a = _mm512_setr_ph(
16926 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
16927 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
16928 );
16929 let r = _mm512_castph512_ph128(a);
16930 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16931 assert_eq_m128h(r, e);
16932 }
16933
16934 #[simd_test(enable = "avx512fp16,avx512vl")]
16935 unsafe fn test_mm512_castph512_ph256() {
16936 let a = _mm512_setr_ph(
16937 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
16938 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
16939 );
16940 let r = _mm512_castph512_ph256(a);
16941 let e = _mm256_setr_ph(
16942 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16943 );
16944 assert_eq_m256h(r, e);
16945 }
16946
16947 #[simd_test(enable = "avx512fp16,avx512vl")]
16948 unsafe fn test_mm256_castph128_ph256() {
16949 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16950 let r = _mm256_castph128_ph256(a);
16951 assert_eq_m128h(_mm256_castph256_ph128(r), a);
16952 }
16953
16954 #[simd_test(enable = "avx512fp16,avx512vl")]
16955 unsafe fn test_mm512_castph128_ph512() {
16956 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16957 let r = _mm512_castph128_ph512(a);
16958 assert_eq_m128h(_mm512_castph512_ph128(r), a);
16959 }
16960
16961 #[simd_test(enable = "avx512fp16,avx512vl")]
16962 unsafe fn test_mm512_castph256_ph512() {
16963 let a = _mm256_setr_ph(
16964 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16965 );
16966 let r = _mm512_castph256_ph512(a);
16967 assert_eq_m256h(_mm512_castph512_ph256(r), a);
16968 }
16969
16970 #[simd_test(enable = "avx512fp16,avx512vl")]
16971 unsafe fn test_mm256_zextph128_ph256() {
16972 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16973 let r = _mm256_zextph128_ph256(a);
16974 let e = _mm256_setr_ph(
16975 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
16976 );
16977 assert_eq_m256h(r, e);
16978 }
16979
16980 #[simd_test(enable = "avx512fp16")]
16981 unsafe fn test_mm512_zextph128_ph512() {
16982 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
16983 let r = _mm512_zextph128_ph512(a);
16984 let e = _mm512_setr_ph(
16985 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
16986 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
16987 );
16988 assert_eq_m512h(r, e);
16989 }
16990
16991 #[simd_test(enable = "avx512fp16")]
16992 unsafe fn test_mm512_zextph256_ph512() {
16993 let a = _mm256_setr_ph(
16994 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16995 );
16996 let r = _mm512_zextph256_ph512(a);
16997 let e = _mm512_setr_ph(
16998 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
16999 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17000 );
17001 assert_eq_m512h(r, e);
17002 }
17003
17004 #[simd_test(enable = "avx512fp16,avx512vl")]
17005 unsafe fn test_mm_cmp_ph_mask() {
17006 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17007 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17008 let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17009 assert_eq!(r, 0b11110000);
17010 }
17011
17012 #[simd_test(enable = "avx512fp16,avx512vl")]
17013 unsafe fn test_mm_mask_cmp_ph_mask() {
17014 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17015 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17016 let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
17017 assert_eq!(r, 0b01010000);
17018 }
17019
17020 #[simd_test(enable = "avx512fp16,avx512vl")]
17021 unsafe fn test_mm256_cmp_ph_mask() {
17022 let a = _mm256_set_ph(
17023 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17024 );
17025 let b = _mm256_set_ph(
17026 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17027 -16.0,
17028 );
17029 let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17030 assert_eq!(r, 0b1111000011110000);
17031 }
17032
17033 #[simd_test(enable = "avx512fp16,avx512vl")]
17034 unsafe fn test_mm256_mask_cmp_ph_mask() {
17035 let a = _mm256_set_ph(
17036 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17037 );
17038 let b = _mm256_set_ph(
17039 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17040 -16.0,
17041 );
17042 let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
17043 assert_eq!(r, 0b0101000001010000);
17044 }
17045
17046 #[simd_test(enable = "avx512fp16")]
17047 unsafe fn test_mm512_cmp_ph_mask() {
17048 let a = _mm512_set_ph(
17049 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17050 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17051 31.0, 32.0,
17052 );
17053 let b = _mm512_set_ph(
17054 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17055 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17056 -29.0, -30.0, -31.0, -32.0,
17057 );
17058 let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17059 assert_eq!(r, 0b11110000111100001111000011110000);
17060 }
17061
17062 #[simd_test(enable = "avx512fp16")]
17063 unsafe fn test_mm512_mask_cmp_ph_mask() {
17064 let a = _mm512_set_ph(
17065 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17066 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17067 31.0, 32.0,
17068 );
17069 let b = _mm512_set_ph(
17070 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17071 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17072 -29.0, -30.0, -31.0, -32.0,
17073 );
17074 let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
17075 assert_eq!(r, 0b01010000010100000101000001010000);
17076 }
17077
17078 #[simd_test(enable = "avx512fp16")]
17079 unsafe fn test_mm512_cmp_round_ph_mask() {
17080 let a = _mm512_set_ph(
17081 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17082 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17083 31.0, 32.0,
17084 );
17085 let b = _mm512_set_ph(
17086 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17087 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17088 -29.0, -30.0, -31.0, -32.0,
17089 );
17090 let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17091 assert_eq!(r, 0b11110000111100001111000011110000);
17092 }
17093
17094 #[simd_test(enable = "avx512fp16")]
17095 unsafe fn test_mm512_mask_cmp_round_ph_mask() {
17096 let a = _mm512_set_ph(
17097 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17098 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17099 31.0, 32.0,
17100 );
17101 let b = _mm512_set_ph(
17102 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17103 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17104 -29.0, -30.0, -31.0, -32.0,
17105 );
17106 let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17107 0b01010101010101010101010101010101,
17108 a,
17109 b,
17110 );
17111 assert_eq!(r, 0b01010000010100000101000001010000);
17112 }
17113
17114 #[simd_test(enable = "avx512fp16")]
17115 unsafe fn test_mm_cmp_round_sh_mask() {
17116 let a = _mm_set_sh(1.0);
17117 let b = _mm_set_sh(1.0);
17118 let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17119 assert_eq!(r, 1);
17120 }
17121
17122 #[simd_test(enable = "avx512fp16")]
17123 unsafe fn test_mm_mask_cmp_round_sh_mask() {
17124 let a = _mm_set_sh(1.0);
17125 let b = _mm_set_sh(1.0);
17126 let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
17127 assert_eq!(r, 0);
17128 }
17129
17130 #[simd_test(enable = "avx512fp16")]
17131 unsafe fn test_mm_cmp_sh_mask() {
17132 let a = _mm_set_sh(1.0);
17133 let b = _mm_set_sh(1.0);
17134 let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17135 assert_eq!(r, 1);
17136 }
17137
17138 #[simd_test(enable = "avx512fp16")]
17139 unsafe fn test_mm_mask_cmp_sh_mask() {
17140 let a = _mm_set_sh(1.0);
17141 let b = _mm_set_sh(1.0);
17142 let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
17143 assert_eq!(r, 0);
17144 }
17145
17146 #[simd_test(enable = "avx512fp16")]
17147 unsafe fn test_mm_comi_round_sh() {
17148 let a = _mm_set_sh(1.0);
17149 let b = _mm_set_sh(1.0);
17150 let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17151 assert_eq!(r, 1);
17152 }
17153
17154 #[simd_test(enable = "avx512fp16")]
17155 unsafe fn test_mm_comi_sh() {
17156 let a = _mm_set_sh(1.0);
17157 let b = _mm_set_sh(1.0);
17158 let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17159 assert_eq!(r, 1);
17160 }
17161
17162 #[simd_test(enable = "avx512fp16")]
17163 unsafe fn test_mm_comieq_sh() {
17164 let a = _mm_set_sh(1.0);
17165 let b = _mm_set_sh(1.0);
17166 let r = _mm_comieq_sh(a, b);
17167 assert_eq!(r, 1);
17168 }
17169
17170 #[simd_test(enable = "avx512fp16")]
17171 unsafe fn test_mm_comige_sh() {
17172 let a = _mm_set_sh(2.0);
17173 let b = _mm_set_sh(1.0);
17174 let r = _mm_comige_sh(a, b);
17175 assert_eq!(r, 1);
17176 }
17177
17178 #[simd_test(enable = "avx512fp16")]
17179 unsafe fn test_mm_comigt_sh() {
17180 let a = _mm_set_sh(2.0);
17181 let b = _mm_set_sh(1.0);
17182 let r = _mm_comigt_sh(a, b);
17183 assert_eq!(r, 1);
17184 }
17185
17186 #[simd_test(enable = "avx512fp16")]
17187 unsafe fn test_mm_comile_sh() {
17188 let a = _mm_set_sh(1.0);
17189 let b = _mm_set_sh(2.0);
17190 let r = _mm_comile_sh(a, b);
17191 assert_eq!(r, 1);
17192 }
17193
17194 #[simd_test(enable = "avx512fp16")]
17195 unsafe fn test_mm_comilt_sh() {
17196 let a = _mm_set_sh(1.0);
17197 let b = _mm_set_sh(2.0);
17198 let r = _mm_comilt_sh(a, b);
17199 assert_eq!(r, 1);
17200 }
17201
17202 #[simd_test(enable = "avx512fp16")]
17203 unsafe fn test_mm_comineq_sh() {
17204 let a = _mm_set_sh(1.0);
17205 let b = _mm_set_sh(2.0);
17206 let r = _mm_comineq_sh(a, b);
17207 assert_eq!(r, 1);
17208 }
17209
17210 #[simd_test(enable = "avx512fp16")]
17211 unsafe fn test_mm_ucomieq_sh() {
17212 let a = _mm_set_sh(1.0);
17213 let b = _mm_set_sh(1.0);
17214 let r = _mm_ucomieq_sh(a, b);
17215 assert_eq!(r, 1);
17216 }
17217
17218 #[simd_test(enable = "avx512fp16")]
17219 unsafe fn test_mm_ucomige_sh() {
17220 let a = _mm_set_sh(2.0);
17221 let b = _mm_set_sh(1.0);
17222 let r = _mm_ucomige_sh(a, b);
17223 assert_eq!(r, 1);
17224 }
17225
17226 #[simd_test(enable = "avx512fp16")]
17227 unsafe fn test_mm_ucomigt_sh() {
17228 let a = _mm_set_sh(2.0);
17229 let b = _mm_set_sh(1.0);
17230 let r = _mm_ucomigt_sh(a, b);
17231 assert_eq!(r, 1);
17232 }
17233
17234 #[simd_test(enable = "avx512fp16")]
17235 unsafe fn test_mm_ucomile_sh() {
17236 let a = _mm_set_sh(1.0);
17237 let b = _mm_set_sh(2.0);
17238 let r = _mm_ucomile_sh(a, b);
17239 assert_eq!(r, 1);
17240 }
17241
17242 #[simd_test(enable = "avx512fp16")]
17243 unsafe fn test_mm_ucomilt_sh() {
17244 let a = _mm_set_sh(1.0);
17245 let b = _mm_set_sh(2.0);
17246 let r = _mm_ucomilt_sh(a, b);
17247 assert_eq!(r, 1);
17248 }
17249
17250 #[simd_test(enable = "avx512fp16")]
17251 unsafe fn test_mm_ucomineq_sh() {
17252 let a = _mm_set_sh(1.0);
17253 let b = _mm_set_sh(2.0);
17254 let r = _mm_ucomineq_sh(a, b);
17255 assert_eq!(r, 1);
17256 }
17257
17258 #[simd_test(enable = "avx512fp16,avx512vl")]
17259 unsafe fn test_mm_load_ph() {
17260 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17261 let b = _mm_load_ph(addr_of!(a).cast());
17262 assert_eq_m128h(a, b);
17263 }
17264
17265 #[simd_test(enable = "avx512fp16,avx512vl")]
17266 unsafe fn test_mm256_load_ph() {
17267 let a = _mm256_set_ph(
17268 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17269 );
17270 let b = _mm256_load_ph(addr_of!(a).cast());
17271 assert_eq_m256h(a, b);
17272 }
17273
17274 #[simd_test(enable = "avx512fp16")]
17275 unsafe fn test_mm512_load_ph() {
17276 let a = _mm512_set_ph(
17277 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17278 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17279 31.0, 32.0,
17280 );
17281 let b = _mm512_load_ph(addr_of!(a).cast());
17282 assert_eq_m512h(a, b);
17283 }
17284
17285 #[simd_test(enable = "avx512fp16")]
17286 unsafe fn test_mm_load_sh() {
17287 let a = _mm_set_sh(1.0);
17288 let b = _mm_load_sh(addr_of!(a).cast());
17289 assert_eq_m128h(a, b);
17290 }
17291
17292 #[simd_test(enable = "avx512fp16")]
17293 unsafe fn test_mm_mask_load_sh() {
17294 let a = _mm_set_sh(1.0);
17295 let src = _mm_set_sh(2.);
17296 let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
17297 assert_eq_m128h(a, b);
17298 let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
17299 assert_eq_m128h(src, b);
17300 }
17301
17302 #[simd_test(enable = "avx512fp16")]
17303 unsafe fn test_mm_maskz_load_sh() {
17304 let a = _mm_set_sh(1.0);
17305 let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
17306 assert_eq_m128h(a, b);
17307 let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
17308 assert_eq_m128h(_mm_setzero_ph(), b);
17309 }
17310
17311 #[simd_test(enable = "avx512fp16,avx512vl")]
17312 unsafe fn test_mm_loadu_ph() {
17313 let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
17314 let r = _mm_loadu_ph(array.as_ptr());
17315 let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17316 assert_eq_m128h(r, e);
17317 }
17318
17319 #[simd_test(enable = "avx512fp16,avx512vl")]
17320 unsafe fn test_mm256_loadu_ph() {
17321 let array = [
17322 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17323 ];
17324 let r = _mm256_loadu_ph(array.as_ptr());
17325 let e = _mm256_setr_ph(
17326 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17327 );
17328 assert_eq_m256h(r, e);
17329 }
17330
17331 #[simd_test(enable = "avx512fp16")]
17332 unsafe fn test_mm512_loadu_ph() {
17333 let array = [
17334 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17335 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17336 31.0, 32.0,
17337 ];
17338 let r = _mm512_loadu_ph(array.as_ptr());
17339 let e = _mm512_setr_ph(
17340 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17341 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17342 31.0, 32.0,
17343 );
17344 assert_eq_m512h(r, e);
17345 }
17346
17347 #[simd_test(enable = "avx512fp16")]
17348 unsafe fn test_mm_move_sh() {
17349 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17350 let b = _mm_set_sh(9.0);
17351 let r = _mm_move_sh(a, b);
17352 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
17353 assert_eq_m128h(r, e);
17354 }
17355
17356 #[simd_test(enable = "avx512fp16")]
17357 unsafe fn test_mm_mask_move_sh() {
17358 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17359 let b = _mm_set_sh(9.0);
17360 let src = _mm_set_sh(10.0);
17361 let r = _mm_mask_move_sh(src, 0, a, b);
17362 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
17363 assert_eq_m128h(r, e);
17364 }
17365
17366 #[simd_test(enable = "avx512fp16")]
17367 unsafe fn test_mm_maskz_move_sh() {
17368 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17369 let b = _mm_set_sh(9.0);
17370 let r = _mm_maskz_move_sh(0, a, b);
17371 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
17372 assert_eq_m128h(r, e);
17373 }
17374
17375 #[simd_test(enable = "avx512fp16,avx512vl")]
17376 unsafe fn test_mm_store_ph() {
17377 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17378 let mut b = _mm_setzero_ph();
17379 _mm_store_ph(addr_of_mut!(b).cast(), a);
17380 assert_eq_m128h(a, b);
17381 }
17382
17383 #[simd_test(enable = "avx512fp16,avx512vl")]
17384 unsafe fn test_mm256_store_ph() {
17385 let a = _mm256_set_ph(
17386 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17387 );
17388 let mut b = _mm256_setzero_ph();
17389 _mm256_store_ph(addr_of_mut!(b).cast(), a);
17390 assert_eq_m256h(a, b);
17391 }
17392
17393 #[simd_test(enable = "avx512fp16")]
17394 unsafe fn test_mm512_store_ph() {
17395 let a = _mm512_set_ph(
17396 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17397 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17398 31.0, 32.0,
17399 );
17400 let mut b = _mm512_setzero_ph();
17401 _mm512_store_ph(addr_of_mut!(b).cast(), a);
17402 assert_eq_m512h(a, b);
17403 }
17404
17405 #[simd_test(enable = "avx512fp16")]
17406 unsafe fn test_mm_store_sh() {
17407 let a = _mm_set_sh(1.0);
17408 let mut b = _mm_setzero_ph();
17409 _mm_store_sh(addr_of_mut!(b).cast(), a);
17410 assert_eq_m128h(a, b);
17411 }
17412
17413 #[simd_test(enable = "avx512fp16")]
17414 unsafe fn test_mm_mask_store_sh() {
17415 let a = _mm_set_sh(1.0);
17416 let mut b = _mm_setzero_ph();
17417 _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
17418 assert_eq_m128h(_mm_setzero_ph(), b);
17419 _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
17420 assert_eq_m128h(a, b);
17421 }
17422
17423 #[simd_test(enable = "avx512fp16,avx512vl")]
17424 unsafe fn test_mm_storeu_ph() {
17425 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17426 let mut array = [0.0; 8];
17427 _mm_storeu_ph(array.as_mut_ptr(), a);
17428 assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
17429 }
17430
17431 #[simd_test(enable = "avx512fp16,avx512vl")]
17432 unsafe fn test_mm256_storeu_ph() {
17433 let a = _mm256_set_ph(
17434 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17435 );
17436 let mut array = [0.0; 16];
17437 _mm256_storeu_ph(array.as_mut_ptr(), a);
17438 assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
17439 }
17440
17441 #[simd_test(enable = "avx512fp16")]
17442 unsafe fn test_mm512_storeu_ph() {
17443 let a = _mm512_set_ph(
17444 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17445 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17446 31.0, 32.0,
17447 );
17448 let mut array = [0.0; 32];
17449 _mm512_storeu_ph(array.as_mut_ptr(), a);
17450 assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
17451 }
17452
17453 #[simd_test(enable = "avx512fp16,avx512vl")]
17454 unsafe fn test_mm_add_ph() {
17455 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17456 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17457 let r = _mm_add_ph(a, b);
17458 let e = _mm_set1_ph(9.0);
17459 assert_eq_m128h(r, e);
17460 }
17461
17462 #[simd_test(enable = "avx512fp16,avx512vl")]
17463 unsafe fn test_mm_mask_add_ph() {
17464 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17465 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17466 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17467 let r = _mm_mask_add_ph(src, 0b01010101, a, b);
17468 let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
17469 assert_eq_m128h(r, e);
17470 }
17471
17472 #[simd_test(enable = "avx512fp16,avx512vl")]
17473 unsafe fn test_mm_maskz_add_ph() {
17474 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17475 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17476 let r = _mm_maskz_add_ph(0b01010101, a, b);
17477 let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
17478 assert_eq_m128h(r, e);
17479 }
17480
17481 #[simd_test(enable = "avx512fp16,avx512vl")]
17482 unsafe fn test_mm256_add_ph() {
17483 let a = _mm256_set_ph(
17484 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17485 );
17486 let b = _mm256_set_ph(
17487 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17488 );
17489 let r = _mm256_add_ph(a, b);
17490 let e = _mm256_set1_ph(17.0);
17491 assert_eq_m256h(r, e);
17492 }
17493
17494 #[simd_test(enable = "avx512fp16,avx512vl")]
17495 unsafe fn test_mm256_mask_add_ph() {
17496 let a = _mm256_set_ph(
17497 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17498 );
17499 let b = _mm256_set_ph(
17500 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17501 );
17502 let src = _mm256_set_ph(
17503 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17504 );
17505 let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
17506 let e = _mm256_set_ph(
17507 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
17508 );
17509 assert_eq_m256h(r, e);
17510 }
17511
17512 #[simd_test(enable = "avx512fp16,avx512vl")]
17513 unsafe fn test_mm256_maskz_add_ph() {
17514 let a = _mm256_set_ph(
17515 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17516 );
17517 let b = _mm256_set_ph(
17518 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17519 );
17520 let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
17521 let e = _mm256_set_ph(
17522 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
17523 );
17524 assert_eq_m256h(r, e);
17525 }
17526
17527 #[simd_test(enable = "avx512fp16")]
17528 unsafe fn test_mm512_add_ph() {
17529 let a = _mm512_set_ph(
17530 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17531 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17532 31.0, 32.0,
17533 );
17534 let b = _mm512_set_ph(
17535 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17536 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17537 3.0, 2.0, 1.0,
17538 );
17539 let r = _mm512_add_ph(a, b);
17540 let e = _mm512_set1_ph(33.0);
17541 assert_eq_m512h(r, e);
17542 }
17543
17544 #[simd_test(enable = "avx512fp16")]
17545 unsafe fn test_mm512_mask_add_ph() {
17546 let a = _mm512_set_ph(
17547 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17548 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17549 31.0, 32.0,
17550 );
17551 let b = _mm512_set_ph(
17552 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17553 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17554 3.0, 2.0, 1.0,
17555 );
17556 let src = _mm512_set_ph(
17557 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17558 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17559 );
17560 let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
17561 let e = _mm512_set_ph(
17562 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17563 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17564 );
17565 assert_eq_m512h(r, e);
17566 }
17567
17568 #[simd_test(enable = "avx512fp16")]
17569 unsafe fn test_mm512_maskz_add_ph() {
17570 let a = _mm512_set_ph(
17571 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17572 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17573 31.0, 32.0,
17574 );
17575 let b = _mm512_set_ph(
17576 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17577 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17578 3.0, 2.0, 1.0,
17579 );
17580 let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
17581 let e = _mm512_set_ph(
17582 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17583 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17584 );
17585 assert_eq_m512h(r, e);
17586 }
17587
17588 #[simd_test(enable = "avx512fp16")]
17589 unsafe fn test_mm512_add_round_ph() {
17590 let a = _mm512_set_ph(
17591 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17592 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17593 31.0, 32.0,
17594 );
17595 let b = _mm512_set_ph(
17596 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17597 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17598 3.0, 2.0, 1.0,
17599 );
17600 let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17601 let e = _mm512_set1_ph(33.0);
17602 assert_eq_m512h(r, e);
17603 }
17604
17605 #[simd_test(enable = "avx512fp16")]
17606 unsafe fn test_mm512_mask_add_round_ph() {
17607 let a = _mm512_set_ph(
17608 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17609 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17610 31.0, 32.0,
17611 );
17612 let b = _mm512_set_ph(
17613 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17614 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17615 3.0, 2.0, 1.0,
17616 );
17617 let src = _mm512_set_ph(
17618 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17619 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17620 );
17621 let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17622 src,
17623 0b01010101010101010101010101010101,
17624 a,
17625 b,
17626 );
17627 let e = _mm512_set_ph(
17628 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17629 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17630 );
17631 assert_eq_m512h(r, e);
17632 }
17633
17634 #[simd_test(enable = "avx512fp16")]
17635 unsafe fn test_mm512_maskz_add_round_ph() {
17636 let a = _mm512_set_ph(
17637 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17638 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17639 31.0, 32.0,
17640 );
17641 let b = _mm512_set_ph(
17642 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17643 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17644 3.0, 2.0, 1.0,
17645 );
17646 let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17647 0b01010101010101010101010101010101,
17648 a,
17649 b,
17650 );
17651 let e = _mm512_set_ph(
17652 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17653 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17654 );
17655 assert_eq_m512h(r, e);
17656 }
17657
17658 #[simd_test(enable = "avx512fp16")]
17659 unsafe fn test_mm_add_round_sh() {
17660 let a = _mm_set_sh(1.0);
17661 let b = _mm_set_sh(2.0);
17662 let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17663 let e = _mm_set_sh(3.0);
17664 assert_eq_m128h(r, e);
17665 }
17666
17667 #[simd_test(enable = "avx512fp16")]
17668 unsafe fn test_mm_mask_add_round_sh() {
17669 let a = _mm_set_sh(1.0);
17670 let b = _mm_set_sh(2.0);
17671 let src = _mm_set_sh(4.0);
17672 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17673 src, 0, a, b,
17674 );
17675 let e = _mm_set_sh(4.0);
17676 assert_eq_m128h(r, e);
17677 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17678 src, 1, a, b,
17679 );
17680 let e = _mm_set_sh(3.0);
17681 assert_eq_m128h(r, e);
17682 }
17683
17684 #[simd_test(enable = "avx512fp16")]
17685 unsafe fn test_mm_maskz_add_round_sh() {
17686 let a = _mm_set_sh(1.0);
17687 let b = _mm_set_sh(2.0);
17688 let r =
17689 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
17690 let e = _mm_set_sh(0.0);
17691 assert_eq_m128h(r, e);
17692 let r =
17693 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
17694 let e = _mm_set_sh(3.0);
17695 assert_eq_m128h(r, e);
17696 }
17697
17698 #[simd_test(enable = "avx512fp16")]
17699 unsafe fn test_mm_add_sh() {
17700 let a = _mm_set_sh(1.0);
17701 let b = _mm_set_sh(2.0);
17702 let r = _mm_add_sh(a, b);
17703 let e = _mm_set_sh(3.0);
17704 assert_eq_m128h(r, e);
17705 }
17706
17707 #[simd_test(enable = "avx512fp16")]
17708 unsafe fn test_mm_mask_add_sh() {
17709 let a = _mm_set_sh(1.0);
17710 let b = _mm_set_sh(2.0);
17711 let src = _mm_set_sh(4.0);
17712 let r = _mm_mask_add_sh(src, 0, a, b);
17713 let e = _mm_set_sh(4.0);
17714 assert_eq_m128h(r, e);
17715 let r = _mm_mask_add_sh(src, 1, a, b);
17716 let e = _mm_set_sh(3.0);
17717 assert_eq_m128h(r, e);
17718 }
17719
17720 #[simd_test(enable = "avx512fp16")]
17721 unsafe fn test_mm_maskz_add_sh() {
17722 let a = _mm_set_sh(1.0);
17723 let b = _mm_set_sh(2.0);
17724 let r = _mm_maskz_add_sh(0, a, b);
17725 let e = _mm_set_sh(0.0);
17726 assert_eq_m128h(r, e);
17727 let r = _mm_maskz_add_sh(1, a, b);
17728 let e = _mm_set_sh(3.0);
17729 assert_eq_m128h(r, e);
17730 }
17731
17732 #[simd_test(enable = "avx512fp16,avx512vl")]
17733 unsafe fn test_mm_sub_ph() {
17734 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17735 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17736 let r = _mm_sub_ph(a, b);
17737 let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
17738 assert_eq_m128h(r, e);
17739 }
17740
17741 #[simd_test(enable = "avx512fp16,avx512vl")]
17742 unsafe fn test_mm_mask_sub_ph() {
17743 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17744 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17745 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17746 let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
17747 let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
17748 assert_eq_m128h(r, e);
17749 }
17750
17751 #[simd_test(enable = "avx512fp16,avx512vl")]
17752 unsafe fn test_mm_maskz_sub_ph() {
17753 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17754 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17755 let r = _mm_maskz_sub_ph(0b01010101, a, b);
17756 let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
17757 assert_eq_m128h(r, e);
17758 }
17759
17760 #[simd_test(enable = "avx512fp16,avx512vl")]
17761 unsafe fn test_mm256_sub_ph() {
17762 let a = _mm256_set_ph(
17763 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17764 );
17765 let b = _mm256_set_ph(
17766 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17767 );
17768 let r = _mm256_sub_ph(a, b);
17769 let e = _mm256_set_ph(
17770 -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
17771 15.0,
17772 );
17773 assert_eq_m256h(r, e);
17774 }
17775
17776 #[simd_test(enable = "avx512fp16,avx512vl")]
17777 unsafe fn test_mm256_mask_sub_ph() {
17778 let a = _mm256_set_ph(
17779 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17780 );
17781 let b = _mm256_set_ph(
17782 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17783 );
17784 let src = _mm256_set_ph(
17785 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17786 );
17787 let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
17788 let e = _mm256_set_ph(
17789 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
17790 );
17791 assert_eq_m256h(r, e);
17792 }
17793
17794 #[simd_test(enable = "avx512fp16,avx512vl")]
17795 unsafe fn test_mm256_maskz_sub_ph() {
17796 let a = _mm256_set_ph(
17797 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17798 );
17799 let b = _mm256_set_ph(
17800 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17801 );
17802 let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
17803 let e = _mm256_set_ph(
17804 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
17805 );
17806 assert_eq_m256h(r, e);
17807 }
17808
17809 #[simd_test(enable = "avx512fp16")]
17810 unsafe fn test_mm512_sub_ph() {
17811 let a = _mm512_set_ph(
17812 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17813 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17814 31.0, 32.0,
17815 );
17816 let b = _mm512_set_ph(
17817 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17818 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17819 3.0, 2.0, 1.0,
17820 );
17821 let r = _mm512_sub_ph(a, b);
17822 let e = _mm512_set_ph(
17823 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17824 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17825 23.0, 25.0, 27.0, 29.0, 31.0,
17826 );
17827 assert_eq_m512h(r, e);
17828 }
17829
17830 #[simd_test(enable = "avx512fp16")]
17831 unsafe fn test_mm512_mask_sub_ph() {
17832 let a = _mm512_set_ph(
17833 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17834 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17835 31.0, 32.0,
17836 );
17837 let b = _mm512_set_ph(
17838 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17839 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17840 3.0, 2.0, 1.0,
17841 );
17842 let src = _mm512_set_ph(
17843 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17844 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17845 );
17846 let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
17847 let e = _mm512_set_ph(
17848 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
17849 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
17850 );
17851 assert_eq_m512h(r, e);
17852 }
17853
17854 #[simd_test(enable = "avx512fp16")]
17855 unsafe fn test_mm512_maskz_sub_ph() {
17856 let a = _mm512_set_ph(
17857 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17858 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17859 31.0, 32.0,
17860 );
17861 let b = _mm512_set_ph(
17862 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17863 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17864 3.0, 2.0, 1.0,
17865 );
17866 let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
17867 let e = _mm512_set_ph(
17868 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
17869 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
17870 );
17871 assert_eq_m512h(r, e);
17872 }
17873
17874 #[simd_test(enable = "avx512fp16")]
17875 unsafe fn test_mm512_sub_round_ph() {
17876 let a = _mm512_set_ph(
17877 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17878 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17879 31.0, 32.0,
17880 );
17881 let b = _mm512_set_ph(
17882 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17883 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17884 3.0, 2.0, 1.0,
17885 );
17886 let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17887 let e = _mm512_set_ph(
17888 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17889 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17890 23.0, 25.0, 27.0, 29.0, 31.0,
17891 );
17892 assert_eq_m512h(r, e);
17893 }
17894
17895 #[simd_test(enable = "avx512fp16")]
17896 unsafe fn test_mm512_mask_sub_round_ph() {
17897 let a = _mm512_set_ph(
17898 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17899 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17900 31.0, 32.0,
17901 );
17902 let b = _mm512_set_ph(
17903 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17904 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17905 3.0, 2.0, 1.0,
17906 );
17907 let src = _mm512_set_ph(
17908 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17909 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17910 );
17911 let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17912 src,
17913 0b01010101010101010101010101010101,
17914 a,
17915 b,
17916 );
17917 let e = _mm512_set_ph(
17918 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
17919 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
17920 );
17921 assert_eq_m512h(r, e);
17922 }
17923
17924 #[simd_test(enable = "avx512fp16")]
17925 unsafe fn test_mm512_maskz_sub_round_ph() {
17926 let a = _mm512_set_ph(
17927 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17928 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17929 31.0, 32.0,
17930 );
17931 let b = _mm512_set_ph(
17932 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17933 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17934 3.0, 2.0, 1.0,
17935 );
17936 let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17937 0b01010101010101010101010101010101,
17938 a,
17939 b,
17940 );
17941 let e = _mm512_set_ph(
17942 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
17943 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
17944 );
17945 assert_eq_m512h(r, e);
17946 }
17947
17948 #[simd_test(enable = "avx512fp16")]
17949 unsafe fn test_mm_sub_round_sh() {
17950 let a = _mm_set_sh(1.0);
17951 let b = _mm_set_sh(2.0);
17952 let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17953 let e = _mm_set_sh(-1.0);
17954 assert_eq_m128h(r, e);
17955 }
17956
17957 #[simd_test(enable = "avx512fp16")]
17958 unsafe fn test_mm_mask_sub_round_sh() {
17959 let a = _mm_set_sh(1.0);
17960 let b = _mm_set_sh(2.0);
17961 let src = _mm_set_sh(4.0);
17962 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17963 src, 0, a, b,
17964 );
17965 let e = _mm_set_sh(4.0);
17966 assert_eq_m128h(r, e);
17967 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17968 src, 1, a, b,
17969 );
17970 let e = _mm_set_sh(-1.0);
17971 assert_eq_m128h(r, e);
17972 }
17973
17974 #[simd_test(enable = "avx512fp16")]
17975 unsafe fn test_mm_maskz_sub_round_sh() {
17976 let a = _mm_set_sh(1.0);
17977 let b = _mm_set_sh(2.0);
17978 let r =
17979 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
17980 let e = _mm_set_sh(0.0);
17981 assert_eq_m128h(r, e);
17982 let r =
17983 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
17984 let e = _mm_set_sh(-1.0);
17985 assert_eq_m128h(r, e);
17986 }
17987
17988 #[simd_test(enable = "avx512fp16")]
17989 unsafe fn test_mm_sub_sh() {
17990 let a = _mm_set_sh(1.0);
17991 let b = _mm_set_sh(2.0);
17992 let r = _mm_sub_sh(a, b);
17993 let e = _mm_set_sh(-1.0);
17994 assert_eq_m128h(r, e);
17995 }
17996
17997 #[simd_test(enable = "avx512fp16")]
17998 unsafe fn test_mm_mask_sub_sh() {
17999 let a = _mm_set_sh(1.0);
18000 let b = _mm_set_sh(2.0);
18001 let src = _mm_set_sh(4.0);
18002 let r = _mm_mask_sub_sh(src, 0, a, b);
18003 let e = _mm_set_sh(4.0);
18004 assert_eq_m128h(r, e);
18005 let r = _mm_mask_sub_sh(src, 1, a, b);
18006 let e = _mm_set_sh(-1.0);
18007 assert_eq_m128h(r, e);
18008 }
18009
18010 #[simd_test(enable = "avx512fp16")]
18011 unsafe fn test_mm_maskz_sub_sh() {
18012 let a = _mm_set_sh(1.0);
18013 let b = _mm_set_sh(2.0);
18014 let r = _mm_maskz_sub_sh(0, a, b);
18015 let e = _mm_set_sh(0.0);
18016 assert_eq_m128h(r, e);
18017 let r = _mm_maskz_sub_sh(1, a, b);
18018 let e = _mm_set_sh(-1.0);
18019 assert_eq_m128h(r, e);
18020 }
18021
18022 #[simd_test(enable = "avx512fp16,avx512vl")]
18023 unsafe fn test_mm_mul_ph() {
18024 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18025 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18026 let r = _mm_mul_ph(a, b);
18027 let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
18028 assert_eq_m128h(r, e);
18029 }
18030
18031 #[simd_test(enable = "avx512fp16,avx512vl")]
18032 unsafe fn test_mm_mask_mul_ph() {
18033 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18034 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18035 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
18036 let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
18037 let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
18038 assert_eq_m128h(r, e);
18039 }
18040
18041 #[simd_test(enable = "avx512fp16,avx512vl")]
18042 unsafe fn test_mm_maskz_mul_ph() {
18043 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18044 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18045 let r = _mm_maskz_mul_ph(0b01010101, a, b);
18046 let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
18047 assert_eq_m128h(r, e);
18048 }
18049
18050 #[simd_test(enable = "avx512fp16,avx512vl")]
18051 unsafe fn test_mm256_mul_ph() {
18052 let a = _mm256_set_ph(
18053 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18054 );
18055 let b = _mm256_set_ph(
18056 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18057 );
18058 let r = _mm256_mul_ph(a, b);
18059 let e = _mm256_set_ph(
18060 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
18061 30.0, 16.0,
18062 );
18063 assert_eq_m256h(r, e);
18064 }
18065
18066 #[simd_test(enable = "avx512fp16,avx512vl")]
18067 unsafe fn test_mm256_mask_mul_ph() {
18068 let a = _mm256_set_ph(
18069 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18070 );
18071 let b = _mm256_set_ph(
18072 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18073 );
18074 let src = _mm256_set_ph(
18075 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
18076 );
18077 let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
18078 let e = _mm256_set_ph(
18079 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
18080 );
18081 assert_eq_m256h(r, e);
18082 }
18083
18084 #[simd_test(enable = "avx512fp16,avx512vl")]
18085 unsafe fn test_mm256_maskz_mul_ph() {
18086 let a = _mm256_set_ph(
18087 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18088 );
18089 let b = _mm256_set_ph(
18090 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18091 );
18092 let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
18093 let e = _mm256_set_ph(
18094 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
18095 );
18096 assert_eq_m256h(r, e);
18097 }
18098
18099 #[simd_test(enable = "avx512fp16")]
18100 unsafe fn test_mm512_mul_ph() {
18101 let a = _mm512_set_ph(
18102 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18103 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18104 31.0, 32.0,
18105 );
18106 let b = _mm512_set_ph(
18107 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18108 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18109 3.0, 2.0, 1.0,
18110 );
18111 let r = _mm512_mul_ph(a, b);
18112 let e = _mm512_set_ph(
18113 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18114 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18115 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18116 );
18117 assert_eq_m512h(r, e);
18118 }
18119
18120 #[simd_test(enable = "avx512fp16")]
18121 unsafe fn test_mm512_mask_mul_ph() {
18122 let a = _mm512_set_ph(
18123 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18124 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18125 31.0, 32.0,
18126 );
18127 let b = _mm512_set_ph(
18128 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18129 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18130 3.0, 2.0, 1.0,
18131 );
18132 let src = _mm512_set_ph(
18133 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18134 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18135 );
18136 let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
18137 let e = _mm512_set_ph(
18138 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18139 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18140 );
18141 assert_eq_m512h(r, e);
18142 }
18143
18144 #[simd_test(enable = "avx512fp16")]
18145 unsafe fn test_mm512_maskz_mul_ph() {
18146 let a = _mm512_set_ph(
18147 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18148 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18149 31.0, 32.0,
18150 );
18151 let b = _mm512_set_ph(
18152 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18153 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18154 3.0, 2.0, 1.0,
18155 );
18156 let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
18157 let e = _mm512_set_ph(
18158 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18159 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18160 );
18161 assert_eq_m512h(r, e);
18162 }
18163
18164 #[simd_test(enable = "avx512fp16")]
18165 unsafe fn test_mm512_mul_round_ph() {
18166 let a = _mm512_set_ph(
18167 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18168 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18169 31.0, 32.0,
18170 );
18171 let b = _mm512_set_ph(
18172 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18173 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18174 3.0, 2.0, 1.0,
18175 );
18176 let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18177 let e = _mm512_set_ph(
18178 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18179 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18180 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18181 );
18182 assert_eq_m512h(r, e);
18183 }
18184
18185 #[simd_test(enable = "avx512fp16")]
18186 unsafe fn test_mm512_mask_mul_round_ph() {
18187 let a = _mm512_set_ph(
18188 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18189 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18190 31.0, 32.0,
18191 );
18192 let b = _mm512_set_ph(
18193 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18194 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18195 3.0, 2.0, 1.0,
18196 );
18197 let src = _mm512_set_ph(
18198 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18199 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18200 );
18201 let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18202 src,
18203 0b01010101010101010101010101010101,
18204 a,
18205 b,
18206 );
18207 let e = _mm512_set_ph(
18208 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18209 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18210 );
18211 assert_eq_m512h(r, e);
18212 }
18213
18214 #[simd_test(enable = "avx512fp16")]
18215 unsafe fn test_mm512_maskz_mul_round_ph() {
18216 let a = _mm512_set_ph(
18217 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18218 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18219 31.0, 32.0,
18220 );
18221 let b = _mm512_set_ph(
18222 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18223 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18224 3.0, 2.0, 1.0,
18225 );
18226 let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18227 0b01010101010101010101010101010101,
18228 a,
18229 b,
18230 );
18231 let e = _mm512_set_ph(
18232 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18233 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18234 );
18235 assert_eq_m512h(r, e);
18236 }
18237
18238 #[simd_test(enable = "avx512fp16")]
18239 unsafe fn test_mm_mul_round_sh() {
18240 let a = _mm_set_sh(1.0);
18241 let b = _mm_set_sh(2.0);
18242 let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18243 let e = _mm_set_sh(2.0);
18244 assert_eq_m128h(r, e);
18245 }
18246
18247 #[simd_test(enable = "avx512fp16")]
18248 unsafe fn test_mm_mask_mul_round_sh() {
18249 let a = _mm_set_sh(1.0);
18250 let b = _mm_set_sh(2.0);
18251 let src = _mm_set_sh(4.0);
18252 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18253 src, 0, a, b,
18254 );
18255 let e = _mm_set_sh(4.0);
18256 assert_eq_m128h(r, e);
18257 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18258 src, 1, a, b,
18259 );
18260 let e = _mm_set_sh(2.0);
18261 assert_eq_m128h(r, e);
18262 }
18263
18264 #[simd_test(enable = "avx512fp16")]
18265 unsafe fn test_mm_maskz_mul_round_sh() {
18266 let a = _mm_set_sh(1.0);
18267 let b = _mm_set_sh(2.0);
18268 let r =
18269 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18270 let e = _mm_set_sh(0.0);
18271 assert_eq_m128h(r, e);
18272 let r =
18273 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18274 let e = _mm_set_sh(2.0);
18275 assert_eq_m128h(r, e);
18276 }
18277
18278 #[simd_test(enable = "avx512fp16")]
18279 unsafe fn test_mm_mul_sh() {
18280 let a = _mm_set_sh(1.0);
18281 let b = _mm_set_sh(2.0);
18282 let r = _mm_mul_sh(a, b);
18283 let e = _mm_set_sh(2.0);
18284 assert_eq_m128h(r, e);
18285 }
18286
18287 #[simd_test(enable = "avx512fp16")]
18288 unsafe fn test_mm_mask_mul_sh() {
18289 let a = _mm_set_sh(1.0);
18290 let b = _mm_set_sh(2.0);
18291 let src = _mm_set_sh(4.0);
18292 let r = _mm_mask_mul_sh(src, 0, a, b);
18293 let e = _mm_set_sh(4.0);
18294 assert_eq_m128h(r, e);
18295 let r = _mm_mask_mul_sh(src, 1, a, b);
18296 let e = _mm_set_sh(2.0);
18297 assert_eq_m128h(r, e);
18298 }
18299
18300 #[simd_test(enable = "avx512fp16")]
18301 unsafe fn test_mm_maskz_mul_sh() {
18302 let a = _mm_set_sh(1.0);
18303 let b = _mm_set_sh(2.0);
18304 let r = _mm_maskz_mul_sh(0, a, b);
18305 let e = _mm_set_sh(0.0);
18306 assert_eq_m128h(r, e);
18307 let r = _mm_maskz_mul_sh(1, a, b);
18308 let e = _mm_set_sh(2.0);
18309 assert_eq_m128h(r, e);
18310 }
18311
18312 #[simd_test(enable = "avx512fp16,avx512vl")]
18313 unsafe fn test_mm_div_ph() {
18314 let a = _mm_set1_ph(1.0);
18315 let b = _mm_set1_ph(2.0);
18316 let r = _mm_div_ph(a, b);
18317 let e = _mm_set1_ph(0.5);
18318 assert_eq_m128h(r, e);
18319 }
18320
18321 #[simd_test(enable = "avx512fp16,avx512vl")]
18322 unsafe fn test_mm_mask_div_ph() {
18323 let a = _mm_set1_ph(1.0);
18324 let b = _mm_set1_ph(2.0);
18325 let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
18326 let r = _mm_mask_div_ph(src, 0b01010101, a, b);
18327 let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
18328 assert_eq_m128h(r, e);
18329 }
18330
18331 #[simd_test(enable = "avx512fp16,avx512vl")]
18332 unsafe fn test_mm_maskz_div_ph() {
18333 let a = _mm_set1_ph(1.0);
18334 let b = _mm_set1_ph(2.0);
18335 let r = _mm_maskz_div_ph(0b01010101, a, b);
18336 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
18337 assert_eq_m128h(r, e);
18338 }
18339
18340 #[simd_test(enable = "avx512fp16,avx512vl")]
18341 unsafe fn test_mm256_div_ph() {
18342 let a = _mm256_set1_ph(1.0);
18343 let b = _mm256_set1_ph(2.0);
18344 let r = _mm256_div_ph(a, b);
18345 let e = _mm256_set1_ph(0.5);
18346 assert_eq_m256h(r, e);
18347 }
18348
18349 #[simd_test(enable = "avx512fp16,avx512vl")]
18350 unsafe fn test_mm256_mask_div_ph() {
18351 let a = _mm256_set1_ph(1.0);
18352 let b = _mm256_set1_ph(2.0);
18353 let src = _mm256_set_ph(
18354 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18355 19.0,
18356 );
18357 let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
18358 let e = _mm256_set_ph(
18359 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18360 );
18361 assert_eq_m256h(r, e);
18362 }
18363
18364 #[simd_test(enable = "avx512fp16,avx512vl")]
18365 unsafe fn test_mm256_maskz_div_ph() {
18366 let a = _mm256_set1_ph(1.0);
18367 let b = _mm256_set1_ph(2.0);
18368 let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
18369 let e = _mm256_set_ph(
18370 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18371 );
18372 assert_eq_m256h(r, e);
18373 }
18374
18375 #[simd_test(enable = "avx512fp16")]
18376 unsafe fn test_mm512_div_ph() {
18377 let a = _mm512_set1_ph(1.0);
18378 let b = _mm512_set1_ph(2.0);
18379 let r = _mm512_div_ph(a, b);
18380 let e = _mm512_set1_ph(0.5);
18381 assert_eq_m512h(r, e);
18382 }
18383
18384 #[simd_test(enable = "avx512fp16")]
18385 unsafe fn test_mm512_mask_div_ph() {
18386 let a = _mm512_set1_ph(1.0);
18387 let b = _mm512_set1_ph(2.0);
18388 let src = _mm512_set_ph(
18389 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18390 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18391 33.0, 34.0, 35.0,
18392 );
18393 let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
18394 let e = _mm512_set_ph(
18395 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18396 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18397 );
18398 assert_eq_m512h(r, e);
18399 }
18400
18401 #[simd_test(enable = "avx512fp16")]
18402 unsafe fn test_mm512_maskz_div_ph() {
18403 let a = _mm512_set1_ph(1.0);
18404 let b = _mm512_set1_ph(2.0);
18405 let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
18406 let e = _mm512_set_ph(
18407 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18408 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18409 );
18410 assert_eq_m512h(r, e);
18411 }
18412
18413 #[simd_test(enable = "avx512fp16")]
18414 unsafe fn test_mm512_div_round_ph() {
18415 let a = _mm512_set1_ph(1.0);
18416 let b = _mm512_set1_ph(2.0);
18417 let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18418 let e = _mm512_set1_ph(0.5);
18419 assert_eq_m512h(r, e);
18420 }
18421
18422 #[simd_test(enable = "avx512fp16")]
18423 unsafe fn test_mm512_mask_div_round_ph() {
18424 let a = _mm512_set1_ph(1.0);
18425 let b = _mm512_set1_ph(2.0);
18426 let src = _mm512_set_ph(
18427 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18428 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18429 33.0, 34.0, 35.0,
18430 );
18431 let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18432 src,
18433 0b01010101010101010101010101010101,
18434 a,
18435 b,
18436 );
18437 let e = _mm512_set_ph(
18438 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18439 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18440 );
18441 assert_eq_m512h(r, e);
18442 }
18443
18444 #[simd_test(enable = "avx512fp16")]
18445 unsafe fn test_mm512_maskz_div_round_ph() {
18446 let a = _mm512_set1_ph(1.0);
18447 let b = _mm512_set1_ph(2.0);
18448 let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18449 0b01010101010101010101010101010101,
18450 a,
18451 b,
18452 );
18453 let e = _mm512_set_ph(
18454 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18455 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18456 );
18457 assert_eq_m512h(r, e);
18458 }
18459
18460 #[simd_test(enable = "avx512fp16")]
18461 unsafe fn test_mm_div_round_sh() {
18462 let a = _mm_set_sh(1.0);
18463 let b = _mm_set_sh(2.0);
18464 let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18465 let e = _mm_set_sh(0.5);
18466 assert_eq_m128h(r, e);
18467 }
18468
18469 #[simd_test(enable = "avx512fp16")]
18470 unsafe fn test_mm_mask_div_round_sh() {
18471 let a = _mm_set_sh(1.0);
18472 let b = _mm_set_sh(2.0);
18473 let src = _mm_set_sh(4.0);
18474 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18475 src, 0, a, b,
18476 );
18477 let e = _mm_set_sh(4.0);
18478 assert_eq_m128h(r, e);
18479 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18480 src, 1, a, b,
18481 );
18482 let e = _mm_set_sh(0.5);
18483 assert_eq_m128h(r, e);
18484 }
18485
18486 #[simd_test(enable = "avx512fp16")]
18487 unsafe fn test_mm_maskz_div_round_sh() {
18488 let a = _mm_set_sh(1.0);
18489 let b = _mm_set_sh(2.0);
18490 let r =
18491 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18492 let e = _mm_set_sh(0.0);
18493 assert_eq_m128h(r, e);
18494 let r =
18495 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18496 let e = _mm_set_sh(0.5);
18497 assert_eq_m128h(r, e);
18498 }
18499
18500 #[simd_test(enable = "avx512fp16")]
18501 unsafe fn test_mm_div_sh() {
18502 let a = _mm_set_sh(1.0);
18503 let b = _mm_set_sh(2.0);
18504 let r = _mm_div_sh(a, b);
18505 let e = _mm_set_sh(0.5);
18506 assert_eq_m128h(r, e);
18507 }
18508
18509 #[simd_test(enable = "avx512fp16")]
18510 unsafe fn test_mm_mask_div_sh() {
18511 let a = _mm_set_sh(1.0);
18512 let b = _mm_set_sh(2.0);
18513 let src = _mm_set_sh(4.0);
18514 let r = _mm_mask_div_sh(src, 0, a, b);
18515 let e = _mm_set_sh(4.0);
18516 assert_eq_m128h(r, e);
18517 let r = _mm_mask_div_sh(src, 1, a, b);
18518 let e = _mm_set_sh(0.5);
18519 assert_eq_m128h(r, e);
18520 }
18521
18522 #[simd_test(enable = "avx512fp16")]
18523 unsafe fn test_mm_maskz_div_sh() {
18524 let a = _mm_set_sh(1.0);
18525 let b = _mm_set_sh(2.0);
18526 let r = _mm_maskz_div_sh(0, a, b);
18527 let e = _mm_set_sh(0.0);
18528 assert_eq_m128h(r, e);
18529 let r = _mm_maskz_div_sh(1, a, b);
18530 let e = _mm_set_sh(0.5);
18531 assert_eq_m128h(r, e);
18532 }
18533
18534 #[simd_test(enable = "avx512fp16,avx512vl")]
18535 unsafe fn test_mm_mul_pch() {
18536 let a = _mm_set1_pch(0.0, 1.0);
18537 let b = _mm_set1_pch(0.0, 1.0);
18538 let r = _mm_mul_pch(a, b);
18539 let e = _mm_set1_pch(-1.0, 0.0);
18540 assert_eq_m128h(r, e);
18541 }
18542
18543 #[simd_test(enable = "avx512fp16,avx512vl")]
18544 unsafe fn test_mm_mask_mul_pch() {
18545 let a = _mm_set1_pch(0.0, 1.0);
18546 let b = _mm_set1_pch(0.0, 1.0);
18547 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18548 let r = _mm_mask_mul_pch(src, 0b0101, a, b);
18549 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18550 assert_eq_m128h(r, e);
18551 }
18552
18553 #[simd_test(enable = "avx512fp16,avx512vl")]
18554 unsafe fn test_mm_maskz_mul_pch() {
18555 let a = _mm_set1_pch(0.0, 1.0);
18556 let b = _mm_set1_pch(0.0, 1.0);
18557 let r = _mm_maskz_mul_pch(0b0101, a, b);
18558 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18559 assert_eq_m128h(r, e);
18560 }
18561
18562 #[simd_test(enable = "avx512fp16,avx512vl")]
18563 unsafe fn test_mm256_mul_pch() {
18564 let a = _mm256_set1_pch(0.0, 1.0);
18565 let b = _mm256_set1_pch(0.0, 1.0);
18566 let r = _mm256_mul_pch(a, b);
18567 let e = _mm256_set1_pch(-1.0, 0.0);
18568 assert_eq_m256h(r, e);
18569 }
18570
18571 #[simd_test(enable = "avx512fp16,avx512vl")]
18572 unsafe fn test_mm256_mask_mul_pch() {
18573 let a = _mm256_set1_pch(0.0, 1.0);
18574 let b = _mm256_set1_pch(0.0, 1.0);
18575 let src = _mm256_setr_ph(
18576 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18577 );
18578 let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
18579 let e = _mm256_setr_ph(
18580 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18581 );
18582 assert_eq_m256h(r, e);
18583 }
18584
18585 #[simd_test(enable = "avx512fp16,avx512vl")]
18586 unsafe fn test_mm256_maskz_mul_pch() {
18587 let a = _mm256_set1_pch(0.0, 1.0);
18588 let b = _mm256_set1_pch(0.0, 1.0);
18589 let r = _mm256_maskz_mul_pch(0b01010101, a, b);
18590 let e = _mm256_setr_ph(
18591 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18592 );
18593 assert_eq_m256h(r, e);
18594 }
18595
18596 #[simd_test(enable = "avx512fp16")]
18597 unsafe fn test_mm512_mul_pch() {
18598 let a = _mm512_set1_pch(0.0, 1.0);
18599 let b = _mm512_set1_pch(0.0, 1.0);
18600 let r = _mm512_mul_pch(a, b);
18601 let e = _mm512_set1_pch(-1.0, 0.0);
18602 assert_eq_m512h(r, e);
18603 }
18604
18605 #[simd_test(enable = "avx512fp16")]
18606 unsafe fn test_mm512_mask_mul_pch() {
18607 let a = _mm512_set1_pch(0.0, 1.0);
18608 let b = _mm512_set1_pch(0.0, 1.0);
18609 let src = _mm512_setr_ph(
18610 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18611 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18612 32.0, 33.0,
18613 );
18614 let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
18615 let e = _mm512_setr_ph(
18616 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18617 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18618 33.0,
18619 );
18620 assert_eq_m512h(r, e);
18621 }
18622
18623 #[simd_test(enable = "avx512fp16")]
18624 unsafe fn test_mm512_maskz_mul_pch() {
18625 let a = _mm512_set1_pch(0.0, 1.0);
18626 let b = _mm512_set1_pch(0.0, 1.0);
18627 let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
18628 let e = _mm512_setr_ph(
18629 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18630 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18631 );
18632 assert_eq_m512h(r, e);
18633 }
18634
18635 #[simd_test(enable = "avx512fp16")]
18636 unsafe fn test_mm512_mul_round_pch() {
18637 let a = _mm512_set1_pch(0.0, 1.0);
18638 let b = _mm512_set1_pch(0.0, 1.0);
18639 let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18640 let e = _mm512_set1_pch(-1.0, 0.0);
18641 assert_eq_m512h(r, e);
18642 }
18643
18644 #[simd_test(enable = "avx512fp16")]
18645 unsafe fn test_mm512_mask_mul_round_pch() {
18646 let a = _mm512_set1_pch(0.0, 1.0);
18647 let b = _mm512_set1_pch(0.0, 1.0);
18648 let src = _mm512_setr_ph(
18649 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18650 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18651 32.0, 33.0,
18652 );
18653 let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18654 src,
18655 0b0101010101010101,
18656 a,
18657 b,
18658 );
18659 let e = _mm512_setr_ph(
18660 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18661 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18662 33.0,
18663 );
18664 assert_eq_m512h(r, e);
18665 }
18666
18667 #[simd_test(enable = "avx512fp16")]
18668 unsafe fn test_mm512_maskz_mul_round_pch() {
18669 let a = _mm512_set1_pch(0.0, 1.0);
18670 let b = _mm512_set1_pch(0.0, 1.0);
18671 let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18672 0b0101010101010101,
18673 a,
18674 b,
18675 );
18676 let e = _mm512_setr_ph(
18677 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18678 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18679 );
18680 assert_eq_m512h(r, e);
18681 }
18682
18683 #[simd_test(enable = "avx512fp16")]
18684 unsafe fn test_mm_mul_round_sch() {
18685 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18686 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18687 let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18688 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18689 assert_eq_m128h(r, e);
18690 }
18691
18692 #[simd_test(enable = "avx512fp16")]
18693 unsafe fn test_mm_mask_mul_round_sch() {
18694 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18695 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18696 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18697 let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18698 src, 0, a, b,
18699 );
18700 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18701 assert_eq_m128h(r, e);
18702 }
18703
18704 #[simd_test(enable = "avx512fp16")]
18705 unsafe fn test_mm_maskz_mul_round_sch() {
18706 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18707 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18708 let r =
18709 _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18710 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18711 assert_eq_m128h(r, e);
18712 }
18713
18714 #[simd_test(enable = "avx512fp16")]
18715 unsafe fn test_mm_mul_sch() {
18716 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18717 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18718 let r = _mm_mul_sch(a, b);
18719 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18720 assert_eq_m128h(r, e);
18721 }
18722
18723 #[simd_test(enable = "avx512fp16")]
18724 unsafe fn test_mm_mask_mul_sch() {
18725 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18726 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18727 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18728 let r = _mm_mask_mul_sch(src, 0, a, b);
18729 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18730 assert_eq_m128h(r, e);
18731 }
18732
18733 #[simd_test(enable = "avx512fp16")]
18734 unsafe fn test_mm_maskz_mul_sch() {
18735 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18736 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18737 let r = _mm_maskz_mul_sch(0, a, b);
18738 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18739 assert_eq_m128h(r, e);
18740 }
18741
18742 #[simd_test(enable = "avx512fp16,avx512vl")]
18743 unsafe fn test_mm_fmul_pch() {
18744 let a = _mm_set1_pch(0.0, 1.0);
18745 let b = _mm_set1_pch(0.0, 1.0);
18746 let r = _mm_fmul_pch(a, b);
18747 let e = _mm_set1_pch(-1.0, 0.0);
18748 assert_eq_m128h(r, e);
18749 }
18750
18751 #[simd_test(enable = "avx512fp16,avx512vl")]
18752 unsafe fn test_mm_mask_fmul_pch() {
18753 let a = _mm_set1_pch(0.0, 1.0);
18754 let b = _mm_set1_pch(0.0, 1.0);
18755 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18756 let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
18757 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18758 assert_eq_m128h(r, e);
18759 }
18760
18761 #[simd_test(enable = "avx512fp16,avx512vl")]
18762 unsafe fn test_mm_maskz_fmul_pch() {
18763 let a = _mm_set1_pch(0.0, 1.0);
18764 let b = _mm_set1_pch(0.0, 1.0);
18765 let r = _mm_maskz_fmul_pch(0b0101, a, b);
18766 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18767 assert_eq_m128h(r, e);
18768 }
18769
18770 #[simd_test(enable = "avx512fp16,avx512vl")]
18771 unsafe fn test_mm256_fmul_pch() {
18772 let a = _mm256_set1_pch(0.0, 1.0);
18773 let b = _mm256_set1_pch(0.0, 1.0);
18774 let r = _mm256_fmul_pch(a, b);
18775 let e = _mm256_set1_pch(-1.0, 0.0);
18776 assert_eq_m256h(r, e);
18777 }
18778
18779 #[simd_test(enable = "avx512fp16,avx512vl")]
18780 unsafe fn test_mm256_mask_fmul_pch() {
18781 let a = _mm256_set1_pch(0.0, 1.0);
18782 let b = _mm256_set1_pch(0.0, 1.0);
18783 let src = _mm256_setr_ph(
18784 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18785 );
18786 let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
18787 let e = _mm256_setr_ph(
18788 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18789 );
18790 assert_eq_m256h(r, e);
18791 }
18792
18793 #[simd_test(enable = "avx512fp16,avx512vl")]
18794 unsafe fn test_mm256_maskz_fmul_pch() {
18795 let a = _mm256_set1_pch(0.0, 1.0);
18796 let b = _mm256_set1_pch(0.0, 1.0);
18797 let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
18798 let e = _mm256_setr_ph(
18799 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18800 );
18801 assert_eq_m256h(r, e);
18802 }
18803
18804 #[simd_test(enable = "avx512fp16")]
18805 unsafe fn test_mm512_fmul_pch() {
18806 let a = _mm512_set1_pch(0.0, 1.0);
18807 let b = _mm512_set1_pch(0.0, 1.0);
18808 let r = _mm512_fmul_pch(a, b);
18809 let e = _mm512_set1_pch(-1.0, 0.0);
18810 assert_eq_m512h(r, e);
18811 }
18812
18813 #[simd_test(enable = "avx512fp16")]
18814 unsafe fn test_mm512_mask_fmul_pch() {
18815 let a = _mm512_set1_pch(0.0, 1.0);
18816 let b = _mm512_set1_pch(0.0, 1.0);
18817 let src = _mm512_setr_ph(
18818 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18819 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18820 32.0, 33.0,
18821 );
18822 let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
18823 let e = _mm512_setr_ph(
18824 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18825 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18826 33.0,
18827 );
18828 assert_eq_m512h(r, e);
18829 }
18830
18831 #[simd_test(enable = "avx512fp16")]
18832 unsafe fn test_mm512_maskz_fmul_pch() {
18833 let a = _mm512_set1_pch(0.0, 1.0);
18834 let b = _mm512_set1_pch(0.0, 1.0);
18835 let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
18836 let e = _mm512_setr_ph(
18837 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18838 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18839 );
18840 assert_eq_m512h(r, e);
18841 }
18842
18843 #[simd_test(enable = "avx512fp16")]
18844 unsafe fn test_mm512_fmul_round_pch() {
18845 let a = _mm512_set1_pch(0.0, 1.0);
18846 let b = _mm512_set1_pch(0.0, 1.0);
18847 let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18848 let e = _mm512_set1_pch(-1.0, 0.0);
18849 assert_eq_m512h(r, e);
18850 }
18851
18852 #[simd_test(enable = "avx512fp16")]
18853 unsafe fn test_mm512_mask_fmul_round_pch() {
18854 let a = _mm512_set1_pch(0.0, 1.0);
18855 let b = _mm512_set1_pch(0.0, 1.0);
18856 let src = _mm512_setr_ph(
18857 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18858 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18859 32.0, 33.0,
18860 );
18861 let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18862 src,
18863 0b0101010101010101,
18864 a,
18865 b,
18866 );
18867 let e = _mm512_setr_ph(
18868 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18869 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18870 33.0,
18871 );
18872 assert_eq_m512h(r, e);
18873 }
18874
18875 #[simd_test(enable = "avx512fp16")]
18876 unsafe fn test_mm512_maskz_fmul_round_pch() {
18877 let a = _mm512_set1_pch(0.0, 1.0);
18878 let b = _mm512_set1_pch(0.0, 1.0);
18879 let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18880 0b0101010101010101,
18881 a,
18882 b,
18883 );
18884 let e = _mm512_setr_ph(
18885 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18886 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18887 );
18888 assert_eq_m512h(r, e);
18889 }
18890
18891 #[simd_test(enable = "avx512fp16")]
18892 unsafe fn test_mm_fmul_round_sch() {
18893 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18894 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18895 let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18896 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18897 assert_eq_m128h(r, e);
18898 }
18899
18900 #[simd_test(enable = "avx512fp16")]
18901 unsafe fn test_mm_mask_fmul_round_sch() {
18902 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18903 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18904 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18905 let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18906 src, 0, a, b,
18907 );
18908 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18909 assert_eq_m128h(r, e);
18910 }
18911
18912 #[simd_test(enable = "avx512fp16")]
18913 unsafe fn test_mm_maskz_fmul_round_sch() {
18914 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18915 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18916 let r =
18917 _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18918 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18919 assert_eq_m128h(r, e);
18920 }
18921
18922 #[simd_test(enable = "avx512fp16")]
18923 unsafe fn test_mm_fmul_sch() {
18924 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18925 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18926 let r = _mm_fmul_sch(a, b);
18927 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18928 assert_eq_m128h(r, e);
18929 }
18930
18931 #[simd_test(enable = "avx512fp16")]
18932 unsafe fn test_mm_mask_fmul_sch() {
18933 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18934 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18935 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18936 let r = _mm_mask_fmul_sch(src, 0, a, b);
18937 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18938 assert_eq_m128h(r, e);
18939 }
18940
18941 #[simd_test(enable = "avx512fp16")]
18942 unsafe fn test_mm_maskz_fmul_sch() {
18943 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18944 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18945 let r = _mm_maskz_fmul_sch(0, a, b);
18946 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18947 assert_eq_m128h(r, e);
18948 }
18949
18950 #[simd_test(enable = "avx512fp16,avx512vl")]
18951 unsafe fn test_mm_cmul_pch() {
18952 let a = _mm_set1_pch(0.0, 1.0);
18953 let b = _mm_set1_pch(0.0, -1.0);
18954 let r = _mm_cmul_pch(a, b);
18955 let e = _mm_set1_pch(-1.0, 0.0);
18956 assert_eq_m128h(r, e);
18957 }
18958
18959 #[simd_test(enable = "avx512fp16,avx512vl")]
18960 unsafe fn test_mm_mask_cmul_pch() {
18961 let a = _mm_set1_pch(0.0, 1.0);
18962 let b = _mm_set1_pch(0.0, -1.0);
18963 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18964 let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
18965 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18966 assert_eq_m128h(r, e);
18967 }
18968
18969 #[simd_test(enable = "avx512fp16,avx512vl")]
18970 unsafe fn test_mm_maskz_cmul_pch() {
18971 let a = _mm_set1_pch(0.0, 1.0);
18972 let b = _mm_set1_pch(0.0, -1.0);
18973 let r = _mm_maskz_cmul_pch(0b0101, a, b);
18974 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18975 assert_eq_m128h(r, e);
18976 }
18977
18978 #[simd_test(enable = "avx512fp16,avx512vl")]
18979 unsafe fn test_mm256_cmul_pch() {
18980 let a = _mm256_set1_pch(0.0, 1.0);
18981 let b = _mm256_set1_pch(0.0, -1.0);
18982 let r = _mm256_cmul_pch(a, b);
18983 let e = _mm256_set1_pch(-1.0, 0.0);
18984 assert_eq_m256h(r, e);
18985 }
18986
18987 #[simd_test(enable = "avx512fp16,avx512vl")]
18988 unsafe fn test_mm256_mask_cmul_pch() {
18989 let a = _mm256_set1_pch(0.0, 1.0);
18990 let b = _mm256_set1_pch(0.0, -1.0);
18991 let src = _mm256_setr_ph(
18992 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18993 );
18994 let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
18995 let e = _mm256_setr_ph(
18996 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18997 );
18998 assert_eq_m256h(r, e);
18999 }
19000
19001 #[simd_test(enable = "avx512fp16,avx512vl")]
19002 unsafe fn test_mm256_maskz_cmul_pch() {
19003 let a = _mm256_set1_pch(0.0, 1.0);
19004 let b = _mm256_set1_pch(0.0, -1.0);
19005 let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
19006 let e = _mm256_setr_ph(
19007 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19008 );
19009 assert_eq_m256h(r, e);
19010 }
19011
19012 #[simd_test(enable = "avx512fp16")]
19013 unsafe fn test_mm512_cmul_pch() {
19014 let a = _mm512_set1_pch(0.0, 1.0);
19015 let b = _mm512_set1_pch(0.0, -1.0);
19016 let r = _mm512_cmul_pch(a, b);
19017 let e = _mm512_set1_pch(-1.0, 0.0);
19018 assert_eq_m512h(r, e);
19019 }
19020
19021 #[simd_test(enable = "avx512fp16")]
19022 unsafe fn test_mm512_mask_cmul_pch() {
19023 let a = _mm512_set1_pch(0.0, 1.0);
19024 let b = _mm512_set1_pch(0.0, -1.0);
19025 let src = _mm512_setr_ph(
19026 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19027 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19028 32.0, 33.0,
19029 );
19030 let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
19031 let e = _mm512_setr_ph(
19032 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19033 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19034 33.0,
19035 );
19036 assert_eq_m512h(r, e);
19037 }
19038
19039 #[simd_test(enable = "avx512fp16")]
19040 unsafe fn test_mm512_maskz_cmul_pch() {
19041 let a = _mm512_set1_pch(0.0, 1.0);
19042 let b = _mm512_set1_pch(0.0, -1.0);
19043 let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
19044 let e = _mm512_setr_ph(
19045 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19046 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19047 );
19048 assert_eq_m512h(r, e);
19049 }
19050
19051 #[simd_test(enable = "avx512fp16")]
19052 unsafe fn test_mm512_cmul_round_pch() {
19053 let a = _mm512_set1_pch(0.0, 1.0);
19054 let b = _mm512_set1_pch(0.0, -1.0);
19055 let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19056 let e = _mm512_set1_pch(-1.0, 0.0);
19057 assert_eq_m512h(r, e);
19058 }
19059
19060 #[simd_test(enable = "avx512fp16")]
19061 unsafe fn test_mm512_mask_cmul_round_pch() {
19062 let a = _mm512_set1_pch(0.0, 1.0);
19063 let b = _mm512_set1_pch(0.0, -1.0);
19064 let src = _mm512_setr_ph(
19065 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19066 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19067 32.0, 33.0,
19068 );
19069 let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19070 src,
19071 0b0101010101010101,
19072 a,
19073 b,
19074 );
19075 let e = _mm512_setr_ph(
19076 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19077 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19078 33.0,
19079 );
19080 assert_eq_m512h(r, e);
19081 }
19082
19083 #[simd_test(enable = "avx512fp16")]
19084 unsafe fn test_mm512_maskz_cmul_round_pch() {
19085 let a = _mm512_set1_pch(0.0, 1.0);
19086 let b = _mm512_set1_pch(0.0, -1.0);
19087 let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19088 0b0101010101010101,
19089 a,
19090 b,
19091 );
19092 let e = _mm512_setr_ph(
19093 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19094 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19095 );
19096 assert_eq_m512h(r, e);
19097 }
19098
19099 #[simd_test(enable = "avx512fp16")]
19100 unsafe fn test_mm_cmul_sch() {
19101 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19102 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19103 let r = _mm_cmul_sch(a, b);
19104 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19105 assert_eq_m128h(r, e);
19106 }
19107
19108 #[simd_test(enable = "avx512fp16")]
19109 unsafe fn test_mm_mask_cmul_sch() {
19110 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19111 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19112 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19113 let r = _mm_mask_cmul_sch(src, 0, a, b);
19114 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19115 assert_eq_m128h(r, e);
19116 }
19117
19118 #[simd_test(enable = "avx512fp16")]
19119 unsafe fn test_mm_maskz_cmul_sch() {
19120 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19121 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19122 let r = _mm_maskz_cmul_sch(0, a, b);
19123 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19124 assert_eq_m128h(r, e);
19125 }
19126
19127 #[simd_test(enable = "avx512fp16")]
19128 unsafe fn test_mm_cmul_round_sch() {
19129 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19130 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19131 let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19132 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19133 assert_eq_m128h(r, e);
19134 }
19135
19136 #[simd_test(enable = "avx512fp16")]
19137 unsafe fn test_mm_mask_cmul_round_sch() {
19138 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19139 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19140 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19141 let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19142 src, 0, a, b,
19143 );
19144 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19145 assert_eq_m128h(r, e);
19146 }
19147
19148 #[simd_test(enable = "avx512fp16")]
19149 unsafe fn test_mm_maskz_cmul_round_sch() {
19150 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19151 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19152 let r =
19153 _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19154 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19155 assert_eq_m128h(r, e);
19156 }
19157
19158 #[simd_test(enable = "avx512fp16,avx512vl")]
19159 unsafe fn test_mm_fcmul_pch() {
19160 let a = _mm_set1_pch(0.0, 1.0);
19161 let b = _mm_set1_pch(0.0, -1.0);
19162 let r = _mm_fcmul_pch(a, b);
19163 let e = _mm_set1_pch(-1.0, 0.0);
19164 assert_eq_m128h(r, e);
19165 }
19166
19167 #[simd_test(enable = "avx512fp16,avx512vl")]
19168 unsafe fn test_mm_mask_fcmul_pch() {
19169 let a = _mm_set1_pch(0.0, 1.0);
19170 let b = _mm_set1_pch(0.0, -1.0);
19171 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19172 let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
19173 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19174 assert_eq_m128h(r, e);
19175 }
19176
19177 #[simd_test(enable = "avx512fp16,avx512vl")]
19178 unsafe fn test_mm_maskz_fcmul_pch() {
19179 let a = _mm_set1_pch(0.0, 1.0);
19180 let b = _mm_set1_pch(0.0, -1.0);
19181 let r = _mm_maskz_fcmul_pch(0b0101, a, b);
19182 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19183 assert_eq_m128h(r, e);
19184 }
19185
19186 #[simd_test(enable = "avx512fp16,avx512vl")]
19187 unsafe fn test_mm256_fcmul_pch() {
19188 let a = _mm256_set1_pch(0.0, 1.0);
19189 let b = _mm256_set1_pch(0.0, -1.0);
19190 let r = _mm256_fcmul_pch(a, b);
19191 let e = _mm256_set1_pch(-1.0, 0.0);
19192 assert_eq_m256h(r, e);
19193 }
19194
19195 #[simd_test(enable = "avx512fp16,avx512vl")]
19196 unsafe fn test_mm256_mask_fcmul_pch() {
19197 let a = _mm256_set1_pch(0.0, 1.0);
19198 let b = _mm256_set1_pch(0.0, -1.0);
19199 let src = _mm256_setr_ph(
19200 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19201 );
19202 let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
19203 let e = _mm256_setr_ph(
19204 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19205 );
19206 assert_eq_m256h(r, e);
19207 }
19208
19209 #[simd_test(enable = "avx512fp16,avx512vl")]
19210 unsafe fn test_mm256_maskz_fcmul_pch() {
19211 let a = _mm256_set1_pch(0.0, 1.0);
19212 let b = _mm256_set1_pch(0.0, -1.0);
19213 let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
19214 let e = _mm256_setr_ph(
19215 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19216 );
19217 assert_eq_m256h(r, e);
19218 }
19219
19220 #[simd_test(enable = "avx512fp16")]
19221 unsafe fn test_mm512_fcmul_pch() {
19222 let a = _mm512_set1_pch(0.0, 1.0);
19223 let b = _mm512_set1_pch(0.0, -1.0);
19224 let r = _mm512_fcmul_pch(a, b);
19225 let e = _mm512_set1_pch(-1.0, 0.0);
19226 assert_eq_m512h(r, e);
19227 }
19228
19229 #[simd_test(enable = "avx512fp16")]
19230 unsafe fn test_mm512_mask_fcmul_pch() {
19231 let a = _mm512_set1_pch(0.0, 1.0);
19232 let b = _mm512_set1_pch(0.0, -1.0);
19233 let src = _mm512_setr_ph(
19234 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19235 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19236 32.0, 33.0,
19237 );
19238 let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
19239 let e = _mm512_setr_ph(
19240 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19241 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19242 33.0,
19243 );
19244 assert_eq_m512h(r, e);
19245 }
19246
19247 #[simd_test(enable = "avx512fp16")]
19248 unsafe fn test_mm512_maskz_fcmul_pch() {
19249 let a = _mm512_set1_pch(0.0, 1.0);
19250 let b = _mm512_set1_pch(0.0, -1.0);
19251 let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
19252 let e = _mm512_setr_ph(
19253 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19254 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19255 );
19256 assert_eq_m512h(r, e);
19257 }
19258
19259 #[simd_test(enable = "avx512fp16")]
19260 unsafe fn test_mm512_fcmul_round_pch() {
19261 let a = _mm512_set1_pch(0.0, 1.0);
19262 let b = _mm512_set1_pch(0.0, -1.0);
19263 let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19264 let e = _mm512_set1_pch(-1.0, 0.0);
19265 assert_eq_m512h(r, e);
19266 }
19267
19268 #[simd_test(enable = "avx512fp16")]
19269 unsafe fn test_mm512_mask_fcmul_round_pch() {
19270 let a = _mm512_set1_pch(0.0, 1.0);
19271 let b = _mm512_set1_pch(0.0, -1.0);
19272 let src = _mm512_setr_ph(
19273 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19274 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19275 32.0, 33.0,
19276 );
19277 let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19278 src,
19279 0b0101010101010101,
19280 a,
19281 b,
19282 );
19283 let e = _mm512_setr_ph(
19284 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19285 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19286 33.0,
19287 );
19288 assert_eq_m512h(r, e);
19289 }
19290
19291 #[simd_test(enable = "avx512fp16")]
19292 unsafe fn test_mm512_maskz_fcmul_round_pch() {
19293 let a = _mm512_set1_pch(0.0, 1.0);
19294 let b = _mm512_set1_pch(0.0, -1.0);
19295 let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19296 0b0101010101010101,
19297 a,
19298 b,
19299 );
19300 let e = _mm512_setr_ph(
19301 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19302 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19303 );
19304 assert_eq_m512h(r, e);
19305 }
19306
19307 #[simd_test(enable = "avx512fp16")]
19308 unsafe fn test_mm_fcmul_sch() {
19309 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19310 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19311 let r = _mm_fcmul_sch(a, b);
19312 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19313 assert_eq_m128h(r, e);
19314 }
19315
19316 #[simd_test(enable = "avx512fp16")]
19317 unsafe fn test_mm_mask_fcmul_sch() {
19318 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19319 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19320 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19321 let r = _mm_mask_fcmul_sch(src, 0, a, b);
19322 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19323 assert_eq_m128h(r, e);
19324 }
19325
19326 #[simd_test(enable = "avx512fp16")]
19327 unsafe fn test_mm_maskz_fcmul_sch() {
19328 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19329 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19330 let r = _mm_maskz_fcmul_sch(0, a, b);
19331 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19332 assert_eq_m128h(r, e);
19333 }
19334
19335 #[simd_test(enable = "avx512fp16")]
19336 unsafe fn test_mm_fcmul_round_sch() {
19337 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19338 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19339 let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19340 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19341 assert_eq_m128h(r, e);
19342 }
19343
19344 #[simd_test(enable = "avx512fp16")]
19345 unsafe fn test_mm_mask_fcmul_round_sch() {
19346 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19347 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19348 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19349 let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19350 src, 0, a, b,
19351 );
19352 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19353 assert_eq_m128h(r, e);
19354 }
19355
19356 #[simd_test(enable = "avx512fp16")]
19357 unsafe fn test_mm_maskz_fcmul_round_sch() {
19358 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19359 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19360 let r =
19361 _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19362 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19363 assert_eq_m128h(r, e);
19364 }
19365
19366 #[simd_test(enable = "avx512fp16,avx512vl")]
19367 unsafe fn test_mm_abs_ph() {
19368 let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
19369 let r = _mm_abs_ph(a);
19370 let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
19371 assert_eq_m128h(r, e);
19372 }
19373
19374 #[simd_test(enable = "avx512fp16,avx512vl")]
19375 unsafe fn test_mm256_abs_ph() {
19376 let a = _mm256_set_ph(
19377 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19378 -14.0,
19379 );
19380 let r = _mm256_abs_ph(a);
19381 let e = _mm256_set_ph(
19382 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19383 );
19384 assert_eq_m256h(r, e);
19385 }
19386
19387 #[simd_test(enable = "avx512fp16")]
19388 unsafe fn test_mm512_abs_ph() {
19389 let a = _mm512_set_ph(
19390 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19391 -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
19392 27.0, -28.0, 29.0, -30.0,
19393 );
19394 let r = _mm512_abs_ph(a);
19395 let e = _mm512_set_ph(
19396 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19397 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
19398 29.0, 30.0,
19399 );
19400 assert_eq_m512h(r, e);
19401 }
19402
19403 #[simd_test(enable = "avx512fp16,avx512vl")]
19404 unsafe fn test_mm_conj_pch() {
19405 let a = _mm_set1_pch(0.0, 1.0);
19406 let r = _mm_conj_pch(a);
19407 let e = _mm_set1_pch(0.0, -1.0);
19408 assert_eq_m128h(r, e);
19409 }
19410
19411 #[simd_test(enable = "avx512fp16,avx512vl")]
19412 unsafe fn test_mm_mask_conj_pch() {
19413 let a = _mm_set1_pch(0.0, 1.0);
19414 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19415 let r = _mm_mask_conj_pch(src, 0b0101, a);
19416 let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
19417 assert_eq_m128h(r, e);
19418 }
19419
19420 #[simd_test(enable = "avx512fp16,avx512vl")]
19421 unsafe fn test_mm_maskz_conj_pch() {
19422 let a = _mm_set1_pch(0.0, 1.0);
19423 let r = _mm_maskz_conj_pch(0b0101, a);
19424 let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
19425 assert_eq_m128h(r, e);
19426 }
19427
19428 #[simd_test(enable = "avx512fp16,avx512vl")]
19429 unsafe fn test_mm256_conj_pch() {
19430 let a = _mm256_set1_pch(0.0, 1.0);
19431 let r = _mm256_conj_pch(a);
19432 let e = _mm256_set1_pch(0.0, -1.0);
19433 assert_eq_m256h(r, e);
19434 }
19435
19436 #[simd_test(enable = "avx512fp16,avx512vl")]
19437 unsafe fn test_mm256_mask_conj_pch() {
19438 let a = _mm256_set1_pch(0.0, 1.0);
19439 let src = _mm256_setr_ph(
19440 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19441 );
19442 let r = _mm256_mask_conj_pch(src, 0b01010101, a);
19443 let e = _mm256_setr_ph(
19444 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19445 );
19446 assert_eq_m256h(r, e);
19447 }
19448
19449 #[simd_test(enable = "avx512fp16,avx512vl")]
19450 unsafe fn test_mm256_maskz_conj_pch() {
19451 let a = _mm256_set1_pch(0.0, 1.0);
19452 let r = _mm256_maskz_conj_pch(0b01010101, a);
19453 let e = _mm256_setr_ph(
19454 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19455 );
19456 assert_eq_m256h(r, e);
19457 }
19458
19459 #[simd_test(enable = "avx512fp16")]
19460 unsafe fn test_mm512_conj_pch() {
19461 let a = _mm512_set1_pch(0.0, 1.0);
19462 let r = _mm512_conj_pch(a);
19463 let e = _mm512_set1_pch(0.0, -1.0);
19464 assert_eq_m512h(r, e);
19465 }
19466
19467 #[simd_test(enable = "avx512fp16")]
19468 unsafe fn test_mm512_mask_conj_pch() {
19469 let a = _mm512_set1_pch(0.0, 1.0);
19470 let src = _mm512_setr_ph(
19471 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19472 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19473 32.0, 33.0,
19474 );
19475 let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
19476 let e = _mm512_setr_ph(
19477 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19478 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
19479 33.0,
19480 );
19481 assert_eq_m512h(r, e);
19482 }
19483
19484 #[simd_test(enable = "avx512fp16")]
19485 unsafe fn test_mm512_maskz_conj_pch() {
19486 let a = _mm512_set1_pch(0.0, 1.0);
19487 let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
19488 let e = _mm512_setr_ph(
19489 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19490 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19491 );
19492 assert_eq_m512h(r, e);
19493 }
19494
19495 #[simd_test(enable = "avx512fp16,avx512vl")]
19496 unsafe fn test_mm_fmadd_pch() {
19497 let a = _mm_set1_pch(0.0, 1.0);
19498 let b = _mm_set1_pch(0.0, 2.0);
19499 let c = _mm_set1_pch(0.0, 3.0);
19500 let r = _mm_fmadd_pch(a, b, c);
19501 let e = _mm_set1_pch(-2.0, 3.0);
19502 assert_eq_m128h(r, e);
19503 }
19504
19505 #[simd_test(enable = "avx512fp16,avx512vl")]
19506 unsafe fn test_mm_mask_fmadd_pch() {
19507 let a = _mm_set1_pch(0.0, 1.0);
19508 let b = _mm_set1_pch(0.0, 2.0);
19509 let c = _mm_set1_pch(0.0, 3.0);
19510 let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
19511 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
19512 assert_eq_m128h(r, e);
19513 }
19514
19515 #[simd_test(enable = "avx512fp16,avx512vl")]
19516 unsafe fn test_mm_mask3_fmadd_pch() {
19517 let a = _mm_set1_pch(0.0, 1.0);
19518 let b = _mm_set1_pch(0.0, 2.0);
19519 let c = _mm_set1_pch(0.0, 3.0);
19520 let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
19521 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
19522 assert_eq_m128h(r, e);
19523 }
19524
19525 #[simd_test(enable = "avx512fp16,avx512vl")]
19526 unsafe fn test_mm_maskz_fmadd_pch() {
19527 let a = _mm_set1_pch(0.0, 1.0);
19528 let b = _mm_set1_pch(0.0, 2.0);
19529 let c = _mm_set1_pch(0.0, 3.0);
19530 let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
19531 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
19532 assert_eq_m128h(r, e);
19533 }
19534
19535 #[simd_test(enable = "avx512fp16,avx512vl")]
19536 unsafe fn test_mm256_fmadd_pch() {
19537 let a = _mm256_set1_pch(0.0, 1.0);
19538 let b = _mm256_set1_pch(0.0, 2.0);
19539 let c = _mm256_set1_pch(0.0, 3.0);
19540 let r = _mm256_fmadd_pch(a, b, c);
19541 let e = _mm256_set1_pch(-2.0, 3.0);
19542 assert_eq_m256h(r, e);
19543 }
19544
19545 #[simd_test(enable = "avx512fp16,avx512vl")]
19546 unsafe fn test_mm256_mask_fmadd_pch() {
19547 let a = _mm256_set1_pch(0.0, 1.0);
19548 let b = _mm256_set1_pch(0.0, 2.0);
19549 let c = _mm256_set1_pch(0.0, 3.0);
19550 let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
19551 let e = _mm256_setr_ph(
19552 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19553 );
19554 assert_eq_m256h(r, e);
19555 }
19556
19557 #[simd_test(enable = "avx512fp16,avx512vl")]
19558 unsafe fn test_mm256_mask3_fmadd_pch() {
19559 let a = _mm256_set1_pch(0.0, 1.0);
19560 let b = _mm256_set1_pch(0.0, 2.0);
19561 let c = _mm256_set1_pch(0.0, 3.0);
19562 let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
19563 let e = _mm256_setr_ph(
19564 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19565 );
19566 assert_eq_m256h(r, e);
19567 }
19568
19569 #[simd_test(enable = "avx512fp16,avx512vl")]
19570 unsafe fn test_mm256_maskz_fmadd_pch() {
19571 let a = _mm256_set1_pch(0.0, 1.0);
19572 let b = _mm256_set1_pch(0.0, 2.0);
19573 let c = _mm256_set1_pch(0.0, 3.0);
19574 let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
19575 let e = _mm256_setr_ph(
19576 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19577 );
19578 assert_eq_m256h(r, e);
19579 }
19580
19581 #[simd_test(enable = "avx512fp16")]
19582 unsafe fn test_mm512_fmadd_pch() {
19583 let a = _mm512_set1_pch(0.0, 1.0);
19584 let b = _mm512_set1_pch(0.0, 2.0);
19585 let c = _mm512_set1_pch(0.0, 3.0);
19586 let r = _mm512_fmadd_pch(a, b, c);
19587 let e = _mm512_set1_pch(-2.0, 3.0);
19588 assert_eq_m512h(r, e);
19589 }
19590
19591 #[simd_test(enable = "avx512fp16")]
19592 unsafe fn test_mm512_mask_fmadd_pch() {
19593 let a = _mm512_set1_pch(0.0, 1.0);
19594 let b = _mm512_set1_pch(0.0, 2.0);
19595 let c = _mm512_set1_pch(0.0, 3.0);
19596 let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
19597 let e = _mm512_setr_ph(
19598 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19599 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19600 );
19601 assert_eq_m512h(r, e);
19602 }
19603
19604 #[simd_test(enable = "avx512fp16")]
19605 unsafe fn test_mm512_mask3_fmadd_pch() {
19606 let a = _mm512_set1_pch(0.0, 1.0);
19607 let b = _mm512_set1_pch(0.0, 2.0);
19608 let c = _mm512_set1_pch(0.0, 3.0);
19609 let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
19610 let e = _mm512_setr_ph(
19611 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19612 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19613 );
19614 assert_eq_m512h(r, e);
19615 }
19616
19617 #[simd_test(enable = "avx512fp16")]
19618 unsafe fn test_mm512_maskz_fmadd_pch() {
19619 let a = _mm512_set1_pch(0.0, 1.0);
19620 let b = _mm512_set1_pch(0.0, 2.0);
19621 let c = _mm512_set1_pch(0.0, 3.0);
19622 let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
19623 let e = _mm512_setr_ph(
19624 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19625 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19626 );
19627 assert_eq_m512h(r, e);
19628 }
19629
19630 #[simd_test(enable = "avx512fp16")]
19631 unsafe fn test_mm512_fmadd_round_pch() {
19632 let a = _mm512_set1_pch(0.0, 1.0);
19633 let b = _mm512_set1_pch(0.0, 2.0);
19634 let c = _mm512_set1_pch(0.0, 3.0);
19635 let r =
19636 _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19637 let e = _mm512_set1_pch(-2.0, 3.0);
19638 assert_eq_m512h(r, e);
19639 }
19640
19641 #[simd_test(enable = "avx512fp16")]
19642 unsafe fn test_mm512_mask_fmadd_round_pch() {
19643 let a = _mm512_set1_pch(0.0, 1.0);
19644 let b = _mm512_set1_pch(0.0, 2.0);
19645 let c = _mm512_set1_pch(0.0, 3.0);
19646 let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19647 a,
19648 0b0101010101010101,
19649 b,
19650 c,
19651 );
19652 let e = _mm512_setr_ph(
19653 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19654 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19655 );
19656 assert_eq_m512h(r, e);
19657 }
19658
19659 #[simd_test(enable = "avx512fp16")]
19660 unsafe fn test_mm512_mask3_fmadd_round_pch() {
19661 let a = _mm512_set1_pch(0.0, 1.0);
19662 let b = _mm512_set1_pch(0.0, 2.0);
19663 let c = _mm512_set1_pch(0.0, 3.0);
19664 let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19665 a,
19666 b,
19667 c,
19668 0b0101010101010101,
19669 );
19670 let e = _mm512_setr_ph(
19671 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19672 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19673 );
19674 assert_eq_m512h(r, e);
19675 }
19676
19677 #[simd_test(enable = "avx512fp16")]
19678 unsafe fn test_mm512_maskz_fmadd_round_pch() {
19679 let a = _mm512_set1_pch(0.0, 1.0);
19680 let b = _mm512_set1_pch(0.0, 2.0);
19681 let c = _mm512_set1_pch(0.0, 3.0);
19682 let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19683 0b0101010101010101,
19684 a,
19685 b,
19686 c,
19687 );
19688 let e = _mm512_setr_ph(
19689 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19690 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19691 );
19692 assert_eq_m512h(r, e);
19693 }
19694
19695 #[simd_test(enable = "avx512fp16")]
19696 unsafe fn test_mm_fmadd_sch() {
19697 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19698 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19699 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19700 let r = _mm_fmadd_sch(a, b, c);
19701 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19702 assert_eq_m128h(r, e);
19703 }
19704
19705 #[simd_test(enable = "avx512fp16")]
19706 unsafe fn test_mm_mask_fmadd_sch() {
19707 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19708 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19709 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19710 let r = _mm_mask_fmadd_sch(a, 0, b, c);
19711 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19712 assert_eq_m128h(r, e);
19713 let r = _mm_mask_fmadd_sch(a, 1, b, c);
19714 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19715 assert_eq_m128h(r, e);
19716 }
19717
19718 #[simd_test(enable = "avx512fp16")]
19719 unsafe fn test_mm_mask3_fmadd_sch() {
19720 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19721 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19722 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19723 let r = _mm_mask3_fmadd_sch(a, b, c, 0);
19724 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19725 assert_eq_m128h(r, e);
19726 let r = _mm_mask3_fmadd_sch(a, b, c, 1);
19727 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19728 assert_eq_m128h(r, e);
19729 }
19730
19731 #[simd_test(enable = "avx512fp16")]
19732 unsafe fn test_mm_maskz_fmadd_sch() {
19733 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19734 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19735 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19736 let r = _mm_maskz_fmadd_sch(0, a, b, c);
19737 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19738 assert_eq_m128h(r, e);
19739 let r = _mm_maskz_fmadd_sch(1, a, b, c);
19740 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19741 assert_eq_m128h(r, e);
19742 }
19743
19744 #[simd_test(enable = "avx512fp16")]
19745 unsafe fn test_mm_fmadd_round_sch() {
19746 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19747 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19748 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19749 let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19750 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19751 assert_eq_m128h(r, e);
19752 }
19753
19754 #[simd_test(enable = "avx512fp16")]
19755 unsafe fn test_mm_mask_fmadd_round_sch() {
19756 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19757 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19758 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19759 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19760 a, 0, b, c,
19761 );
19762 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19763 assert_eq_m128h(r, e);
19764 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19765 a, 1, b, c,
19766 );
19767 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19768 assert_eq_m128h(r, e);
19769 }
19770
19771 #[simd_test(enable = "avx512fp16")]
19772 unsafe fn test_mm_mask3_fmadd_round_sch() {
19773 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19774 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19775 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19776 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19777 a, b, c, 0,
19778 );
19779 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19780 assert_eq_m128h(r, e);
19781 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19782 a, b, c, 1,
19783 );
19784 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19785 assert_eq_m128h(r, e);
19786 }
19787
19788 #[simd_test(enable = "avx512fp16")]
19789 unsafe fn test_mm_maskz_fmadd_round_sch() {
19790 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19791 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19792 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19793 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19794 0, a, b, c,
19795 );
19796 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19797 assert_eq_m128h(r, e);
19798 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19799 1, a, b, c,
19800 );
19801 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19802 assert_eq_m128h(r, e);
19803 }
19804
19805 #[simd_test(enable = "avx512fp16,avx512vl")]
19806 unsafe fn test_mm_fcmadd_pch() {
19807 let a = _mm_set1_pch(0.0, 1.0);
19808 let b = _mm_set1_pch(0.0, 2.0);
19809 let c = _mm_set1_pch(0.0, 3.0);
19810 let r = _mm_fcmadd_pch(a, b, c);
19811 let e = _mm_set1_pch(2.0, 3.0);
19812 assert_eq_m128h(r, e);
19813 }
19814
19815 #[simd_test(enable = "avx512fp16,avx512vl")]
19816 unsafe fn test_mm_mask_fcmadd_pch() {
19817 let a = _mm_set1_pch(0.0, 1.0);
19818 let b = _mm_set1_pch(0.0, 2.0);
19819 let c = _mm_set1_pch(0.0, 3.0);
19820 let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
19821 let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
19822 assert_eq_m128h(r, e);
19823 }
19824
19825 #[simd_test(enable = "avx512fp16,avx512vl")]
19826 unsafe fn test_mm_mask3_fcmadd_pch() {
19827 let a = _mm_set1_pch(0.0, 1.0);
19828 let b = _mm_set1_pch(0.0, 2.0);
19829 let c = _mm_set1_pch(0.0, 3.0);
19830 let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
19831 let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
19832 assert_eq_m128h(r, e);
19833 }
19834
19835 #[simd_test(enable = "avx512fp16,avx512vl")]
19836 unsafe fn test_mm_maskz_fcmadd_pch() {
19837 let a = _mm_set1_pch(0.0, 1.0);
19838 let b = _mm_set1_pch(0.0, 2.0);
19839 let c = _mm_set1_pch(0.0, 3.0);
19840 let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
19841 let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
19842 assert_eq_m128h(r, e);
19843 }
19844
19845 #[simd_test(enable = "avx512fp16,avx512vl")]
19846 unsafe fn test_mm256_fcmadd_pch() {
19847 let a = _mm256_set1_pch(0.0, 1.0);
19848 let b = _mm256_set1_pch(0.0, 2.0);
19849 let c = _mm256_set1_pch(0.0, 3.0);
19850 let r = _mm256_fcmadd_pch(a, b, c);
19851 let e = _mm256_set1_pch(2.0, 3.0);
19852 assert_eq_m256h(r, e);
19853 }
19854
19855 #[simd_test(enable = "avx512fp16,avx512vl")]
19856 unsafe fn test_mm256_mask_fcmadd_pch() {
19857 let a = _mm256_set1_pch(0.0, 1.0);
19858 let b = _mm256_set1_pch(0.0, 2.0);
19859 let c = _mm256_set1_pch(0.0, 3.0);
19860 let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
19861 let e = _mm256_setr_ph(
19862 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19863 );
19864 assert_eq_m256h(r, e);
19865 }
19866
19867 #[simd_test(enable = "avx512fp16,avx512vl")]
19868 unsafe fn test_mm256_mask3_fcmadd_pch() {
19869 let a = _mm256_set1_pch(0.0, 1.0);
19870 let b = _mm256_set1_pch(0.0, 2.0);
19871 let c = _mm256_set1_pch(0.0, 3.0);
19872 let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
19873 let e = _mm256_setr_ph(
19874 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19875 );
19876 assert_eq_m256h(r, e);
19877 }
19878
19879 #[simd_test(enable = "avx512fp16,avx512vl")]
19880 unsafe fn test_mm256_maskz_fcmadd_pch() {
19881 let a = _mm256_set1_pch(0.0, 1.0);
19882 let b = _mm256_set1_pch(0.0, 2.0);
19883 let c = _mm256_set1_pch(0.0, 3.0);
19884 let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
19885 let e = _mm256_setr_ph(
19886 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
19887 );
19888 assert_eq_m256h(r, e);
19889 }
19890
19891 #[simd_test(enable = "avx512fp16")]
19892 unsafe fn test_mm512_fcmadd_pch() {
19893 let a = _mm512_set1_pch(0.0, 1.0);
19894 let b = _mm512_set1_pch(0.0, 2.0);
19895 let c = _mm512_set1_pch(0.0, 3.0);
19896 let r = _mm512_fcmadd_pch(a, b, c);
19897 let e = _mm512_set1_pch(2.0, 3.0);
19898 assert_eq_m512h(r, e);
19899 }
19900
19901 #[simd_test(enable = "avx512fp16")]
19902 unsafe fn test_mm512_mask_fcmadd_pch() {
19903 let a = _mm512_set1_pch(0.0, 1.0);
19904 let b = _mm512_set1_pch(0.0, 2.0);
19905 let c = _mm512_set1_pch(0.0, 3.0);
19906 let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
19907 let e = _mm512_setr_ph(
19908 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
19909 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19910 );
19911 assert_eq_m512h(r, e);
19912 }
19913
19914 #[simd_test(enable = "avx512fp16")]
19915 unsafe fn test_mm512_mask3_fcmadd_pch() {
19916 let a = _mm512_set1_pch(0.0, 1.0);
19917 let b = _mm512_set1_pch(0.0, 2.0);
19918 let c = _mm512_set1_pch(0.0, 3.0);
19919 let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
19920 let e = _mm512_setr_ph(
19921 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
19922 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19923 );
19924 assert_eq_m512h(r, e);
19925 }
19926
19927 #[simd_test(enable = "avx512fp16")]
19928 unsafe fn test_mm512_maskz_fcmadd_pch() {
19929 let a = _mm512_set1_pch(0.0, 1.0);
19930 let b = _mm512_set1_pch(0.0, 2.0);
19931 let c = _mm512_set1_pch(0.0, 3.0);
19932 let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
19933 let e = _mm512_setr_ph(
19934 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
19935 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
19936 );
19937 assert_eq_m512h(r, e);
19938 }
19939
19940 #[simd_test(enable = "avx512fp16")]
19941 unsafe fn test_mm512_fcmadd_round_pch() {
19942 let a = _mm512_set1_pch(0.0, 1.0);
19943 let b = _mm512_set1_pch(0.0, 2.0);
19944 let c = _mm512_set1_pch(0.0, 3.0);
19945 let r =
19946 _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19947 let e = _mm512_set1_pch(2.0, 3.0);
19948 assert_eq_m512h(r, e);
19949 }
19950
19951 #[simd_test(enable = "avx512fp16")]
19952 unsafe fn test_mm512_mask_fcmadd_round_pch() {
19953 let a = _mm512_set1_pch(0.0, 1.0);
19954 let b = _mm512_set1_pch(0.0, 2.0);
19955 let c = _mm512_set1_pch(0.0, 3.0);
19956 let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19957 a,
19958 0b0101010101010101,
19959 b,
19960 c,
19961 );
19962 let e = _mm512_setr_ph(
19963 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
19964 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19965 );
19966 assert_eq_m512h(r, e);
19967 }
19968
19969 #[simd_test(enable = "avx512fp16")]
19970 unsafe fn test_mm512_mask3_fcmadd_round_pch() {
19971 let a = _mm512_set1_pch(0.0, 1.0);
19972 let b = _mm512_set1_pch(0.0, 2.0);
19973 let c = _mm512_set1_pch(0.0, 3.0);
19974 let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19975 a,
19976 b,
19977 c,
19978 0b0101010101010101,
19979 );
19980 let e = _mm512_setr_ph(
19981 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
19982 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19983 );
19984 assert_eq_m512h(r, e);
19985 }
19986
19987 #[simd_test(enable = "avx512fp16")]
19988 unsafe fn test_mm512_maskz_fcmadd_round_pch() {
19989 let a = _mm512_set1_pch(0.0, 1.0);
19990 let b = _mm512_set1_pch(0.0, 2.0);
19991 let c = _mm512_set1_pch(0.0, 3.0);
19992 let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19993 0b0101010101010101,
19994 a,
19995 b,
19996 c,
19997 );
19998 let e = _mm512_setr_ph(
19999 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20000 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20001 );
20002 assert_eq_m512h(r, e);
20003 }
20004
20005 #[simd_test(enable = "avx512fp16")]
20006 unsafe fn test_mm_fcmadd_sch() {
20007 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20008 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20009 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20010 let r = _mm_fcmadd_sch(a, b, c);
20011 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20012 assert_eq_m128h(r, e);
20013 }
20014
20015 #[simd_test(enable = "avx512fp16")]
20016 unsafe fn test_mm_mask_fcmadd_sch() {
20017 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20018 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20019 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20020 let r = _mm_mask_fcmadd_sch(a, 0, b, c);
20021 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20022 assert_eq_m128h(r, e);
20023 let r = _mm_mask_fcmadd_sch(a, 1, b, c);
20024 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20025 assert_eq_m128h(r, e);
20026 }
20027
20028 #[simd_test(enable = "avx512fp16")]
20029 unsafe fn test_mm_mask3_fcmadd_sch() {
20030 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20031 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20032 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20033 let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
20034 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20035 assert_eq_m128h(r, e);
20036 let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
20037 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20038 assert_eq_m128h(r, e);
20039 }
20040
20041 #[simd_test(enable = "avx512fp16")]
20042 unsafe fn test_mm_maskz_fcmadd_sch() {
20043 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20044 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20045 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20046 let r = _mm_maskz_fcmadd_sch(0, a, b, c);
20047 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20048 assert_eq_m128h(r, e);
20049 let r = _mm_maskz_fcmadd_sch(1, a, b, c);
20050 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20051 assert_eq_m128h(r, e);
20052 }
20053
20054 #[simd_test(enable = "avx512fp16")]
20055 unsafe fn test_mm_fcmadd_round_sch() {
20056 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20057 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20058 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20059 let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20060 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20061 assert_eq_m128h(r, e);
20062 }
20063
20064 #[simd_test(enable = "avx512fp16")]
20065 unsafe fn test_mm_mask_fcmadd_round_sch() {
20066 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20067 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20068 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20069 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20070 a, 0, b, c,
20071 );
20072 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20073 assert_eq_m128h(r, e);
20074 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20075 a, 1, b, c,
20076 );
20077 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20078 assert_eq_m128h(r, e);
20079 }
20080
20081 #[simd_test(enable = "avx512fp16")]
20082 unsafe fn test_mm_mask3_fcmadd_round_sch() {
20083 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20084 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20085 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20086 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20087 a, b, c, 0,
20088 );
20089 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20090 assert_eq_m128h(r, e);
20091 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20092 a, b, c, 1,
20093 );
20094 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20095 assert_eq_m128h(r, e);
20096 }
20097
20098 #[simd_test(enable = "avx512fp16")]
20099 unsafe fn test_mm_maskz_fcmadd_round_sch() {
20100 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20101 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20102 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20103 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20104 0, a, b, c,
20105 );
20106 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20107 assert_eq_m128h(r, e);
20108 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20109 1, a, b, c,
20110 );
20111 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20112 assert_eq_m128h(r, e);
20113 }
20114
20115 #[simd_test(enable = "avx512fp16,avx512vl")]
20116 unsafe fn test_mm_fmadd_ph() {
20117 let a = _mm_set1_ph(1.0);
20118 let b = _mm_set1_ph(2.0);
20119 let c = _mm_set1_ph(3.0);
20120 let r = _mm_fmadd_ph(a, b, c);
20121 let e = _mm_set1_ph(5.0);
20122 assert_eq_m128h(r, e);
20123 }
20124
20125 #[simd_test(enable = "avx512fp16,avx512vl")]
20126 unsafe fn test_mm_mask_fmadd_ph() {
20127 let a = _mm_set1_ph(1.0);
20128 let b = _mm_set1_ph(2.0);
20129 let c = _mm_set1_ph(3.0);
20130 let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
20131 let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
20132 assert_eq_m128h(r, e);
20133 }
20134
20135 #[simd_test(enable = "avx512fp16,avx512vl")]
20136 unsafe fn test_mm_mask3_fmadd_ph() {
20137 let a = _mm_set1_ph(1.0);
20138 let b = _mm_set1_ph(2.0);
20139 let c = _mm_set1_ph(3.0);
20140 let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
20141 let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
20142 assert_eq_m128h(r, e);
20143 }
20144
20145 #[simd_test(enable = "avx512fp16,avx512vl")]
20146 unsafe fn test_mm_maskz_fmadd_ph() {
20147 let a = _mm_set1_ph(1.0);
20148 let b = _mm_set1_ph(2.0);
20149 let c = _mm_set1_ph(3.0);
20150 let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
20151 let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
20152 assert_eq_m128h(r, e);
20153 }
20154
20155 #[simd_test(enable = "avx512fp16,avx512vl")]
20156 unsafe fn test_mm256_fmadd_ph() {
20157 let a = _mm256_set1_ph(1.0);
20158 let b = _mm256_set1_ph(2.0);
20159 let c = _mm256_set1_ph(3.0);
20160 let r = _mm256_fmadd_ph(a, b, c);
20161 let e = _mm256_set1_ph(5.0);
20162 assert_eq_m256h(r, e);
20163 }
20164
20165 #[simd_test(enable = "avx512fp16,avx512vl")]
20166 unsafe fn test_mm256_mask_fmadd_ph() {
20167 let a = _mm256_set1_ph(1.0);
20168 let b = _mm256_set1_ph(2.0);
20169 let c = _mm256_set1_ph(3.0);
20170 let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
20171 let e = _mm256_set_ph(
20172 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20173 );
20174 assert_eq_m256h(r, e);
20175 }
20176
20177 #[simd_test(enable = "avx512fp16,avx512vl")]
20178 unsafe fn test_mm256_mask3_fmadd_ph() {
20179 let a = _mm256_set1_ph(1.0);
20180 let b = _mm256_set1_ph(2.0);
20181 let c = _mm256_set1_ph(3.0);
20182 let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
20183 let e = _mm256_set_ph(
20184 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20185 );
20186 assert_eq_m256h(r, e);
20187 }
20188
20189 #[simd_test(enable = "avx512fp16,avx512vl")]
20190 unsafe fn test_mm256_maskz_fmadd_ph() {
20191 let a = _mm256_set1_ph(1.0);
20192 let b = _mm256_set1_ph(2.0);
20193 let c = _mm256_set1_ph(3.0);
20194 let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
20195 let e = _mm256_set_ph(
20196 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20197 );
20198 assert_eq_m256h(r, e);
20199 }
20200
20201 #[simd_test(enable = "avx512fp16")]
20202 unsafe fn test_mm512_fmadd_ph() {
20203 let a = _mm512_set1_ph(1.0);
20204 let b = _mm512_set1_ph(2.0);
20205 let c = _mm512_set1_ph(3.0);
20206 let r = _mm512_fmadd_ph(a, b, c);
20207 let e = _mm512_set1_ph(5.0);
20208 assert_eq_m512h(r, e);
20209 }
20210
20211 #[simd_test(enable = "avx512fp16")]
20212 unsafe fn test_mm512_mask_fmadd_ph() {
20213 let a = _mm512_set1_ph(1.0);
20214 let b = _mm512_set1_ph(2.0);
20215 let c = _mm512_set1_ph(3.0);
20216 let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20217 let e = _mm512_set_ph(
20218 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20219 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20220 );
20221 assert_eq_m512h(r, e);
20222 }
20223
20224 #[simd_test(enable = "avx512fp16")]
20225 unsafe fn test_mm512_mask3_fmadd_ph() {
20226 let a = _mm512_set1_ph(1.0);
20227 let b = _mm512_set1_ph(2.0);
20228 let c = _mm512_set1_ph(3.0);
20229 let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20230 let e = _mm512_set_ph(
20231 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20232 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20233 );
20234 assert_eq_m512h(r, e);
20235 }
20236
20237 #[simd_test(enable = "avx512fp16")]
20238 unsafe fn test_mm512_maskz_fmadd_ph() {
20239 let a = _mm512_set1_ph(1.0);
20240 let b = _mm512_set1_ph(2.0);
20241 let c = _mm512_set1_ph(3.0);
20242 let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
20243 let e = _mm512_set_ph(
20244 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20245 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20246 );
20247 assert_eq_m512h(r, e);
20248 }
20249
20250 #[simd_test(enable = "avx512fp16")]
20251 unsafe fn test_mm512_fmadd_round_ph() {
20252 let a = _mm512_set1_ph(1.0);
20253 let b = _mm512_set1_ph(2.0);
20254 let c = _mm512_set1_ph(3.0);
20255 let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20256 let e = _mm512_set1_ph(5.0);
20257 assert_eq_m512h(r, e);
20258 }
20259
20260 #[simd_test(enable = "avx512fp16")]
20261 unsafe fn test_mm512_mask_fmadd_round_ph() {
20262 let a = _mm512_set1_ph(1.0);
20263 let b = _mm512_set1_ph(2.0);
20264 let c = _mm512_set1_ph(3.0);
20265 let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20266 a,
20267 0b01010101010101010101010101010101,
20268 b,
20269 c,
20270 );
20271 let e = _mm512_set_ph(
20272 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20273 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20274 );
20275 assert_eq_m512h(r, e);
20276 }
20277
20278 #[simd_test(enable = "avx512fp16")]
20279 unsafe fn test_mm512_mask3_fmadd_round_ph() {
20280 let a = _mm512_set1_ph(1.0);
20281 let b = _mm512_set1_ph(2.0);
20282 let c = _mm512_set1_ph(3.0);
20283 let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20284 a,
20285 b,
20286 c,
20287 0b01010101010101010101010101010101,
20288 );
20289 let e = _mm512_set_ph(
20290 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20291 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20292 );
20293 assert_eq_m512h(r, e);
20294 }
20295
20296 #[simd_test(enable = "avx512fp16")]
20297 unsafe fn test_mm512_maskz_fmadd_round_ph() {
20298 let a = _mm512_set1_ph(1.0);
20299 let b = _mm512_set1_ph(2.0);
20300 let c = _mm512_set1_ph(3.0);
20301 let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20302 0b01010101010101010101010101010101,
20303 a,
20304 b,
20305 c,
20306 );
20307 let e = _mm512_set_ph(
20308 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20309 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20310 );
20311 assert_eq_m512h(r, e);
20312 }
20313
20314 #[simd_test(enable = "avx512fp16")]
20315 unsafe fn test_mm_fmadd_sh() {
20316 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20317 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20318 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20319 let r = _mm_fmadd_sh(a, b, c);
20320 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20321 assert_eq_m128h(r, e);
20322 }
20323
20324 #[simd_test(enable = "avx512fp16")]
20325 unsafe fn test_mm_mask_fmadd_sh() {
20326 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20327 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20328 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20329 let r = _mm_mask_fmadd_sh(a, 0, b, c);
20330 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20331 assert_eq_m128h(r, e);
20332 let r = _mm_mask_fmadd_sh(a, 1, b, c);
20333 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20334 assert_eq_m128h(r, e);
20335 }
20336
20337 #[simd_test(enable = "avx512fp16")]
20338 unsafe fn test_mm_mask3_fmadd_sh() {
20339 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20340 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20341 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20342 let r = _mm_mask3_fmadd_sh(a, b, c, 0);
20343 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20344 assert_eq_m128h(r, e);
20345 let r = _mm_mask3_fmadd_sh(a, b, c, 1);
20346 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20347 assert_eq_m128h(r, e);
20348 }
20349
20350 #[simd_test(enable = "avx512fp16")]
20351 unsafe fn test_mm_maskz_fmadd_sh() {
20352 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20353 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20354 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20355 let r = _mm_maskz_fmadd_sh(0, a, b, c);
20356 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20357 assert_eq_m128h(r, e);
20358 let r = _mm_maskz_fmadd_sh(1, a, b, c);
20359 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20360 assert_eq_m128h(r, e);
20361 }
20362
20363 #[simd_test(enable = "avx512fp16")]
20364 unsafe fn test_mm_fmadd_round_sh() {
20365 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20366 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20367 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20368 let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20369 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20370 assert_eq_m128h(r, e);
20371 }
20372
20373 #[simd_test(enable = "avx512fp16")]
20374 unsafe fn test_mm_mask_fmadd_round_sh() {
20375 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20376 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20377 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20378 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20379 a, 0, b, c,
20380 );
20381 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20382 assert_eq_m128h(r, e);
20383 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20384 a, 1, b, c,
20385 );
20386 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20387 assert_eq_m128h(r, e);
20388 }
20389
20390 #[simd_test(enable = "avx512fp16")]
20391 unsafe fn test_mm_mask3_fmadd_round_sh() {
20392 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20393 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20394 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20395 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20396 a, b, c, 0,
20397 );
20398 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20399 assert_eq_m128h(r, e);
20400 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20401 a, b, c, 1,
20402 );
20403 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20404 assert_eq_m128h(r, e);
20405 }
20406
20407 #[simd_test(enable = "avx512fp16")]
20408 unsafe fn test_mm_maskz_fmadd_round_sh() {
20409 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20410 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20411 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20412 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20413 0, a, b, c,
20414 );
20415 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20416 assert_eq_m128h(r, e);
20417 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20418 1, a, b, c,
20419 );
20420 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20421 assert_eq_m128h(r, e);
20422 }
20423
20424 #[simd_test(enable = "avx512fp16,avx512vl")]
20425 unsafe fn test_mm_fmsub_ph() {
20426 let a = _mm_set1_ph(1.0);
20427 let b = _mm_set1_ph(2.0);
20428 let c = _mm_set1_ph(3.0);
20429 let r = _mm_fmsub_ph(a, b, c);
20430 let e = _mm_set1_ph(-1.0);
20431 assert_eq_m128h(r, e);
20432 }
20433
20434 #[simd_test(enable = "avx512fp16,avx512vl")]
20435 unsafe fn test_mm_mask_fmsub_ph() {
20436 let a = _mm_set1_ph(1.0);
20437 let b = _mm_set1_ph(2.0);
20438 let c = _mm_set1_ph(3.0);
20439 let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
20440 let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
20441 assert_eq_m128h(r, e);
20442 }
20443
20444 #[simd_test(enable = "avx512fp16,avx512vl")]
20445 unsafe fn test_mm_mask3_fmsub_ph() {
20446 let a = _mm_set1_ph(1.0);
20447 let b = _mm_set1_ph(2.0);
20448 let c = _mm_set1_ph(3.0);
20449 let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
20450 let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
20451 assert_eq_m128h(r, e);
20452 }
20453
20454 #[simd_test(enable = "avx512fp16,avx512vl")]
20455 unsafe fn test_mm_maskz_fmsub_ph() {
20456 let a = _mm_set1_ph(1.0);
20457 let b = _mm_set1_ph(2.0);
20458 let c = _mm_set1_ph(3.0);
20459 let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
20460 let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
20461 assert_eq_m128h(r, e);
20462 }
20463
20464 #[simd_test(enable = "avx512fp16,avx512vl")]
20465 unsafe fn test_mm256_fmsub_ph() {
20466 let a = _mm256_set1_ph(1.0);
20467 let b = _mm256_set1_ph(2.0);
20468 let c = _mm256_set1_ph(3.0);
20469 let r = _mm256_fmsub_ph(a, b, c);
20470 let e = _mm256_set1_ph(-1.0);
20471 assert_eq_m256h(r, e);
20472 }
20473
20474 #[simd_test(enable = "avx512fp16,avx512vl")]
20475 unsafe fn test_mm256_mask_fmsub_ph() {
20476 let a = _mm256_set1_ph(1.0);
20477 let b = _mm256_set1_ph(2.0);
20478 let c = _mm256_set1_ph(3.0);
20479 let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
20480 let e = _mm256_set_ph(
20481 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20482 );
20483 assert_eq_m256h(r, e);
20484 }
20485
20486 #[simd_test(enable = "avx512fp16,avx512vl")]
20487 unsafe fn test_mm256_mask3_fmsub_ph() {
20488 let a = _mm256_set1_ph(1.0);
20489 let b = _mm256_set1_ph(2.0);
20490 let c = _mm256_set1_ph(3.0);
20491 let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
20492 let e = _mm256_set_ph(
20493 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20494 );
20495 assert_eq_m256h(r, e);
20496 }
20497
20498 #[simd_test(enable = "avx512fp16,avx512vl")]
20499 unsafe fn test_mm256_maskz_fmsub_ph() {
20500 let a = _mm256_set1_ph(1.0);
20501 let b = _mm256_set1_ph(2.0);
20502 let c = _mm256_set1_ph(3.0);
20503 let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
20504 let e = _mm256_set_ph(
20505 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20506 );
20507 assert_eq_m256h(r, e);
20508 }
20509
20510 #[simd_test(enable = "avx512fp16")]
20511 unsafe fn test_mm512_fmsub_ph() {
20512 let a = _mm512_set1_ph(1.0);
20513 let b = _mm512_set1_ph(2.0);
20514 let c = _mm512_set1_ph(3.0);
20515 let r = _mm512_fmsub_ph(a, b, c);
20516 let e = _mm512_set1_ph(-1.0);
20517 assert_eq_m512h(r, e);
20518 }
20519
20520 #[simd_test(enable = "avx512fp16")]
20521 unsafe fn test_mm512_mask_fmsub_ph() {
20522 let a = _mm512_set1_ph(1.0);
20523 let b = _mm512_set1_ph(2.0);
20524 let c = _mm512_set1_ph(3.0);
20525 let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
20526 let e = _mm512_set_ph(
20527 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20528 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20529 );
20530 assert_eq_m512h(r, e);
20531 }
20532
20533 #[simd_test(enable = "avx512fp16")]
20534 unsafe fn test_mm512_mask3_fmsub_ph() {
20535 let a = _mm512_set1_ph(1.0);
20536 let b = _mm512_set1_ph(2.0);
20537 let c = _mm512_set1_ph(3.0);
20538 let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
20539 let e = _mm512_set_ph(
20540 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20541 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20542 );
20543 assert_eq_m512h(r, e);
20544 }
20545
20546 #[simd_test(enable = "avx512fp16")]
20547 unsafe fn test_mm512_maskz_fmsub_ph() {
20548 let a = _mm512_set1_ph(1.0);
20549 let b = _mm512_set1_ph(2.0);
20550 let c = _mm512_set1_ph(3.0);
20551 let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
20552 let e = _mm512_set_ph(
20553 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20554 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20555 );
20556 assert_eq_m512h(r, e);
20557 }
20558
20559 #[simd_test(enable = "avx512fp16")]
20560 unsafe fn test_mm512_fmsub_round_ph() {
20561 let a = _mm512_set1_ph(1.0);
20562 let b = _mm512_set1_ph(2.0);
20563 let c = _mm512_set1_ph(3.0);
20564 let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20565 let e = _mm512_set1_ph(-1.0);
20566 assert_eq_m512h(r, e);
20567 }
20568
20569 #[simd_test(enable = "avx512fp16")]
20570 unsafe fn test_mm512_mask_fmsub_round_ph() {
20571 let a = _mm512_set1_ph(1.0);
20572 let b = _mm512_set1_ph(2.0);
20573 let c = _mm512_set1_ph(3.0);
20574 let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20575 a,
20576 0b01010101010101010101010101010101,
20577 b,
20578 c,
20579 );
20580 let e = _mm512_set_ph(
20581 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20582 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20583 );
20584 assert_eq_m512h(r, e);
20585 }
20586
20587 #[simd_test(enable = "avx512fp16")]
20588 unsafe fn test_mm512_mask3_fmsub_round_ph() {
20589 let a = _mm512_set1_ph(1.0);
20590 let b = _mm512_set1_ph(2.0);
20591 let c = _mm512_set1_ph(3.0);
20592 let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20593 a,
20594 b,
20595 c,
20596 0b01010101010101010101010101010101,
20597 );
20598 let e = _mm512_set_ph(
20599 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20600 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20601 );
20602 assert_eq_m512h(r, e);
20603 }
20604
20605 #[simd_test(enable = "avx512fp16")]
20606 unsafe fn test_mm512_maskz_fmsub_round_ph() {
20607 let a = _mm512_set1_ph(1.0);
20608 let b = _mm512_set1_ph(2.0);
20609 let c = _mm512_set1_ph(3.0);
20610 let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20611 0b01010101010101010101010101010101,
20612 a,
20613 b,
20614 c,
20615 );
20616 let e = _mm512_set_ph(
20617 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20618 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20619 );
20620 assert_eq_m512h(r, e);
20621 }
20622
20623 #[simd_test(enable = "avx512fp16")]
20624 unsafe fn test_mm_fmsub_sh() {
20625 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20626 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20627 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20628 let r = _mm_fmsub_sh(a, b, c);
20629 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20630 assert_eq_m128h(r, e);
20631 }
20632
20633 #[simd_test(enable = "avx512fp16")]
20634 unsafe fn test_mm_mask_fmsub_sh() {
20635 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20636 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20637 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20638 let r = _mm_mask_fmsub_sh(a, 0, b, c);
20639 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20640 assert_eq_m128h(r, e);
20641 let r = _mm_mask_fmsub_sh(a, 1, b, c);
20642 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20643 assert_eq_m128h(r, e);
20644 }
20645
20646 #[simd_test(enable = "avx512fp16")]
20647 unsafe fn test_mm_mask3_fmsub_sh() {
20648 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20649 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20650 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20651 let r = _mm_mask3_fmsub_sh(a, b, c, 0);
20652 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20653 assert_eq_m128h(r, e);
20654 let r = _mm_mask3_fmsub_sh(a, b, c, 1);
20655 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20656 assert_eq_m128h(r, e);
20657 }
20658
20659 #[simd_test(enable = "avx512fp16")]
20660 unsafe fn test_mm_maskz_fmsub_sh() {
20661 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20662 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20663 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20664 let r = _mm_maskz_fmsub_sh(0, a, b, c);
20665 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20666 assert_eq_m128h(r, e);
20667 let r = _mm_maskz_fmsub_sh(1, a, b, c);
20668 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20669 assert_eq_m128h(r, e);
20670 }
20671
20672 #[simd_test(enable = "avx512fp16")]
20673 unsafe fn test_mm_fmsub_round_sh() {
20674 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20675 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20676 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20677 let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20678 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20679 assert_eq_m128h(r, e);
20680 }
20681
20682 #[simd_test(enable = "avx512fp16")]
20683 unsafe fn test_mm_mask_fmsub_round_sh() {
20684 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20685 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20686 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20687 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20688 a, 0, b, c,
20689 );
20690 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20691 assert_eq_m128h(r, e);
20692 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20693 a, 1, b, c,
20694 );
20695 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20696 assert_eq_m128h(r, e);
20697 }
20698
20699 #[simd_test(enable = "avx512fp16")]
20700 unsafe fn test_mm_mask3_fmsub_round_sh() {
20701 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20702 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20703 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20704 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20705 a, b, c, 0,
20706 );
20707 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20708 assert_eq_m128h(r, e);
20709 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20710 a, b, c, 1,
20711 );
20712 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20713 assert_eq_m128h(r, e);
20714 }
20715
20716 #[simd_test(enable = "avx512fp16")]
20717 unsafe fn test_mm_maskz_fmsub_round_sh() {
20718 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20719 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20720 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20721 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20722 0, a, b, c,
20723 );
20724 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20725 assert_eq_m128h(r, e);
20726 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20727 1, a, b, c,
20728 );
20729 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20730 assert_eq_m128h(r, e);
20731 }
20732
20733 #[simd_test(enable = "avx512fp16,avx512vl")]
20734 unsafe fn test_mm_fnmadd_ph() {
20735 let a = _mm_set1_ph(1.0);
20736 let b = _mm_set1_ph(2.0);
20737 let c = _mm_set1_ph(3.0);
20738 let r = _mm_fnmadd_ph(a, b, c);
20739 let e = _mm_set1_ph(1.0);
20740 assert_eq_m128h(r, e);
20741 }
20742
20743 #[simd_test(enable = "avx512fp16,avx512vl")]
20744 unsafe fn test_mm_mask_fnmadd_ph() {
20745 let a = _mm_set1_ph(1.0);
20746 let b = _mm_set1_ph(2.0);
20747 let c = _mm_set1_ph(3.0);
20748 let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
20749 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
20750 assert_eq_m128h(r, e);
20751 }
20752
20753 #[simd_test(enable = "avx512fp16,avx512vl")]
20754 unsafe fn test_mm_mask3_fnmadd_ph() {
20755 let a = _mm_set1_ph(1.0);
20756 let b = _mm_set1_ph(2.0);
20757 let c = _mm_set1_ph(3.0);
20758 let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
20759 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
20760 assert_eq_m128h(r, e);
20761 }
20762
20763 #[simd_test(enable = "avx512fp16,avx512vl")]
20764 unsafe fn test_mm_maskz_fnmadd_ph() {
20765 let a = _mm_set1_ph(1.0);
20766 let b = _mm_set1_ph(2.0);
20767 let c = _mm_set1_ph(3.0);
20768 let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
20769 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
20770 assert_eq_m128h(r, e);
20771 }
20772
20773 #[simd_test(enable = "avx512fp16,avx512vl")]
20774 unsafe fn test_mm256_fnmadd_ph() {
20775 let a = _mm256_set1_ph(1.0);
20776 let b = _mm256_set1_ph(2.0);
20777 let c = _mm256_set1_ph(3.0);
20778 let r = _mm256_fnmadd_ph(a, b, c);
20779 let e = _mm256_set1_ph(1.0);
20780 assert_eq_m256h(r, e);
20781 }
20782
20783 #[simd_test(enable = "avx512fp16,avx512vl")]
20784 unsafe fn test_mm256_mask_fnmadd_ph() {
20785 let a = _mm256_set1_ph(1.0);
20786 let b = _mm256_set1_ph(2.0);
20787 let c = _mm256_set1_ph(3.0);
20788 let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
20789 let e = _mm256_set_ph(
20790 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20791 );
20792 assert_eq_m256h(r, e);
20793 }
20794
20795 #[simd_test(enable = "avx512fp16,avx512vl")]
20796 unsafe fn test_mm256_mask3_fnmadd_ph() {
20797 let a = _mm256_set1_ph(1.0);
20798 let b = _mm256_set1_ph(2.0);
20799 let c = _mm256_set1_ph(3.0);
20800 let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
20801 let e = _mm256_set_ph(
20802 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20803 );
20804 assert_eq_m256h(r, e);
20805 }
20806
20807 #[simd_test(enable = "avx512fp16,avx512vl")]
20808 unsafe fn test_mm256_maskz_fnmadd_ph() {
20809 let a = _mm256_set1_ph(1.0);
20810 let b = _mm256_set1_ph(2.0);
20811 let c = _mm256_set1_ph(3.0);
20812 let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
20813 let e = _mm256_set_ph(
20814 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20815 );
20816 assert_eq_m256h(r, e);
20817 }
20818
20819 #[simd_test(enable = "avx512fp16")]
20820 unsafe fn test_mm512_fnmadd_ph() {
20821 let a = _mm512_set1_ph(1.0);
20822 let b = _mm512_set1_ph(2.0);
20823 let c = _mm512_set1_ph(3.0);
20824 let r = _mm512_fnmadd_ph(a, b, c);
20825 let e = _mm512_set1_ph(1.0);
20826 assert_eq_m512h(r, e);
20827 }
20828
20829 #[simd_test(enable = "avx512fp16")]
20830 unsafe fn test_mm512_mask_fnmadd_ph() {
20831 let a = _mm512_set1_ph(1.0);
20832 let b = _mm512_set1_ph(2.0);
20833 let c = _mm512_set1_ph(3.0);
20834 let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20835 let e = _mm512_set_ph(
20836 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20837 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20838 );
20839 assert_eq_m512h(r, e);
20840 }
20841
20842 #[simd_test(enable = "avx512fp16")]
20843 unsafe fn test_mm512_mask3_fnmadd_ph() {
20844 let a = _mm512_set1_ph(1.0);
20845 let b = _mm512_set1_ph(2.0);
20846 let c = _mm512_set1_ph(3.0);
20847 let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20848 let e = _mm512_set_ph(
20849 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20850 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20851 );
20852 assert_eq_m512h(r, e);
20853 }
20854
20855 #[simd_test(enable = "avx512fp16")]
20856 unsafe fn test_mm512_maskz_fnmadd_ph() {
20857 let a = _mm512_set1_ph(1.0);
20858 let b = _mm512_set1_ph(2.0);
20859 let c = _mm512_set1_ph(3.0);
20860 let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
20861 let e = _mm512_set_ph(
20862 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
20863 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20864 );
20865 assert_eq_m512h(r, e);
20866 }
20867
20868 #[simd_test(enable = "avx512fp16")]
20869 unsafe fn test_mm512_fnmadd_round_ph() {
20870 let a = _mm512_set1_ph(1.0);
20871 let b = _mm512_set1_ph(2.0);
20872 let c = _mm512_set1_ph(3.0);
20873 let r =
20874 _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20875 let e = _mm512_set1_ph(1.0);
20876 assert_eq_m512h(r, e);
20877 }
20878
20879 #[simd_test(enable = "avx512fp16")]
20880 unsafe fn test_mm512_mask_fnmadd_round_ph() {
20881 let a = _mm512_set1_ph(1.0);
20882 let b = _mm512_set1_ph(2.0);
20883 let c = _mm512_set1_ph(3.0);
20884 let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20885 a,
20886 0b01010101010101010101010101010101,
20887 b,
20888 c,
20889 );
20890 let e = _mm512_set_ph(
20891 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20892 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20893 );
20894 assert_eq_m512h(r, e);
20895 }
20896
20897 #[simd_test(enable = "avx512fp16")]
20898 unsafe fn test_mm512_mask3_fnmadd_round_ph() {
20899 let a = _mm512_set1_ph(1.0);
20900 let b = _mm512_set1_ph(2.0);
20901 let c = _mm512_set1_ph(3.0);
20902 let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20903 a,
20904 b,
20905 c,
20906 0b01010101010101010101010101010101,
20907 );
20908 let e = _mm512_set_ph(
20909 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20910 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20911 );
20912 assert_eq_m512h(r, e);
20913 }
20914
20915 #[simd_test(enable = "avx512fp16")]
20916 unsafe fn test_mm512_maskz_fnmadd_round_ph() {
20917 let a = _mm512_set1_ph(1.0);
20918 let b = _mm512_set1_ph(2.0);
20919 let c = _mm512_set1_ph(3.0);
20920 let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20921 0b01010101010101010101010101010101,
20922 a,
20923 b,
20924 c,
20925 );
20926 let e = _mm512_set_ph(
20927 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
20928 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20929 );
20930 assert_eq_m512h(r, e);
20931 }
20932
20933 #[simd_test(enable = "avx512fp16")]
20934 unsafe fn test_mm_fnmadd_sh() {
20935 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20936 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20937 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20938 let r = _mm_fnmadd_sh(a, b, c);
20939 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20940 assert_eq_m128h(r, e);
20941 }
20942
20943 #[simd_test(enable = "avx512fp16")]
20944 unsafe fn test_mm_mask_fnmadd_sh() {
20945 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20946 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20947 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20948 let r = _mm_mask_fnmadd_sh(a, 0, b, c);
20949 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20950 assert_eq_m128h(r, e);
20951 let r = _mm_mask_fnmadd_sh(a, 1, b, c);
20952 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20953 assert_eq_m128h(r, e);
20954 }
20955
20956 #[simd_test(enable = "avx512fp16")]
20957 unsafe fn test_mm_mask3_fnmadd_sh() {
20958 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20959 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20960 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20961 let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
20962 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20963 assert_eq_m128h(r, e);
20964 let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
20965 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
20966 assert_eq_m128h(r, e);
20967 }
20968
20969 #[simd_test(enable = "avx512fp16")]
20970 unsafe fn test_mm_maskz_fnmadd_sh() {
20971 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20972 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20973 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20974 let r = _mm_maskz_fnmadd_sh(0, a, b, c);
20975 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20976 assert_eq_m128h(r, e);
20977 let r = _mm_maskz_fnmadd_sh(1, a, b, c);
20978 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20979 assert_eq_m128h(r, e);
20980 }
20981
20982 #[simd_test(enable = "avx512fp16")]
20983 unsafe fn test_mm_fnmadd_round_sh() {
20984 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20985 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20986 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20987 let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20988 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20989 assert_eq_m128h(r, e);
20990 }
20991
20992 #[simd_test(enable = "avx512fp16")]
20993 unsafe fn test_mm_mask_fnmadd_round_sh() {
20994 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20995 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20996 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20997 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20998 a, 0, b, c,
20999 );
21000 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21001 assert_eq_m128h(r, e);
21002 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21003 a, 1, b, c,
21004 );
21005 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21006 assert_eq_m128h(r, e);
21007 }
21008
21009 #[simd_test(enable = "avx512fp16")]
21010 unsafe fn test_mm_mask3_fnmadd_round_sh() {
21011 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21012 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21013 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21014 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21015 a, b, c, 0,
21016 );
21017 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21018 assert_eq_m128h(r, e);
21019 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21020 a, b, c, 1,
21021 );
21022 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21023 assert_eq_m128h(r, e);
21024 }
21025
21026 #[simd_test(enable = "avx512fp16")]
21027 unsafe fn test_mm_maskz_fnmadd_round_sh() {
21028 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21029 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21030 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21031 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21032 0, a, b, c,
21033 );
21034 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21035 assert_eq_m128h(r, e);
21036 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21037 1, a, b, c,
21038 );
21039 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21040 assert_eq_m128h(r, e);
21041 }
21042
21043 #[simd_test(enable = "avx512fp16,avx512vl")]
21044 unsafe fn test_mm_fnmsub_ph() {
21045 let a = _mm_set1_ph(1.0);
21046 let b = _mm_set1_ph(2.0);
21047 let c = _mm_set1_ph(3.0);
21048 let r = _mm_fnmsub_ph(a, b, c);
21049 let e = _mm_set1_ph(-5.0);
21050 assert_eq_m128h(r, e);
21051 }
21052
21053 #[simd_test(enable = "avx512fp16,avx512vl")]
21054 unsafe fn test_mm_mask_fnmsub_ph() {
21055 let a = _mm_set1_ph(1.0);
21056 let b = _mm_set1_ph(2.0);
21057 let c = _mm_set1_ph(3.0);
21058 let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
21059 let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
21060 assert_eq_m128h(r, e);
21061 }
21062
21063 #[simd_test(enable = "avx512fp16,avx512vl")]
21064 unsafe fn test_mm_mask3_fnmsub_ph() {
21065 let a = _mm_set1_ph(1.0);
21066 let b = _mm_set1_ph(2.0);
21067 let c = _mm_set1_ph(3.0);
21068 let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
21069 let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
21070 assert_eq_m128h(r, e);
21071 }
21072
21073 #[simd_test(enable = "avx512fp16,avx512vl")]
21074 unsafe fn test_mm_maskz_fnmsub_ph() {
21075 let a = _mm_set1_ph(1.0);
21076 let b = _mm_set1_ph(2.0);
21077 let c = _mm_set1_ph(3.0);
21078 let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
21079 let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
21080 assert_eq_m128h(r, e);
21081 }
21082
21083 #[simd_test(enable = "avx512fp16,avx512vl")]
21084 unsafe fn test_mm256_fnmsub_ph() {
21085 let a = _mm256_set1_ph(1.0);
21086 let b = _mm256_set1_ph(2.0);
21087 let c = _mm256_set1_ph(3.0);
21088 let r = _mm256_fnmsub_ph(a, b, c);
21089 let e = _mm256_set1_ph(-5.0);
21090 assert_eq_m256h(r, e);
21091 }
21092
21093 #[simd_test(enable = "avx512fp16,avx512vl")]
21094 unsafe fn test_mm256_mask_fnmsub_ph() {
21095 let a = _mm256_set1_ph(1.0);
21096 let b = _mm256_set1_ph(2.0);
21097 let c = _mm256_set1_ph(3.0);
21098 let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
21099 let e = _mm256_set_ph(
21100 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21101 );
21102 assert_eq_m256h(r, e);
21103 }
21104
21105 #[simd_test(enable = "avx512fp16,avx512vl")]
21106 unsafe fn test_mm256_mask3_fnmsub_ph() {
21107 let a = _mm256_set1_ph(1.0);
21108 let b = _mm256_set1_ph(2.0);
21109 let c = _mm256_set1_ph(3.0);
21110 let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
21111 let e = _mm256_set_ph(
21112 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21113 );
21114 assert_eq_m256h(r, e);
21115 }
21116
21117 #[simd_test(enable = "avx512fp16,avx512vl")]
21118 unsafe fn test_mm256_maskz_fnmsub_ph() {
21119 let a = _mm256_set1_ph(1.0);
21120 let b = _mm256_set1_ph(2.0);
21121 let c = _mm256_set1_ph(3.0);
21122 let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
21123 let e = _mm256_set_ph(
21124 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21125 );
21126 assert_eq_m256h(r, e);
21127 }
21128
21129 #[simd_test(enable = "avx512fp16")]
21130 unsafe fn test_mm512_fnmsub_ph() {
21131 let a = _mm512_set1_ph(1.0);
21132 let b = _mm512_set1_ph(2.0);
21133 let c = _mm512_set1_ph(3.0);
21134 let r = _mm512_fnmsub_ph(a, b, c);
21135 let e = _mm512_set1_ph(-5.0);
21136 assert_eq_m512h(r, e);
21137 }
21138
21139 #[simd_test(enable = "avx512fp16")]
21140 unsafe fn test_mm512_mask_fnmsub_ph() {
21141 let a = _mm512_set1_ph(1.0);
21142 let b = _mm512_set1_ph(2.0);
21143 let c = _mm512_set1_ph(3.0);
21144 let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
21145 let e = _mm512_set_ph(
21146 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21147 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21148 );
21149 assert_eq_m512h(r, e);
21150 }
21151
21152 #[simd_test(enable = "avx512fp16")]
21153 unsafe fn test_mm512_mask3_fnmsub_ph() {
21154 let a = _mm512_set1_ph(1.0);
21155 let b = _mm512_set1_ph(2.0);
21156 let c = _mm512_set1_ph(3.0);
21157 let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
21158 let e = _mm512_set_ph(
21159 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21160 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21161 );
21162 assert_eq_m512h(r, e);
21163 }
21164
21165 #[simd_test(enable = "avx512fp16")]
21166 unsafe fn test_mm512_maskz_fnmsub_ph() {
21167 let a = _mm512_set1_ph(1.0);
21168 let b = _mm512_set1_ph(2.0);
21169 let c = _mm512_set1_ph(3.0);
21170 let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
21171 let e = _mm512_set_ph(
21172 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21173 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21174 );
21175 assert_eq_m512h(r, e);
21176 }
21177
21178 #[simd_test(enable = "avx512fp16")]
21179 unsafe fn test_mm512_fnmsub_round_ph() {
21180 let a = _mm512_set1_ph(1.0);
21181 let b = _mm512_set1_ph(2.0);
21182 let c = _mm512_set1_ph(3.0);
21183 let r =
21184 _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21185 let e = _mm512_set1_ph(-5.0);
21186 assert_eq_m512h(r, e);
21187 }
21188
21189 #[simd_test(enable = "avx512fp16")]
21190 unsafe fn test_mm512_mask_fnmsub_round_ph() {
21191 let a = _mm512_set1_ph(1.0);
21192 let b = _mm512_set1_ph(2.0);
21193 let c = _mm512_set1_ph(3.0);
21194 let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21195 a,
21196 0b01010101010101010101010101010101,
21197 b,
21198 c,
21199 );
21200 let e = _mm512_set_ph(
21201 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21202 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21203 );
21204 assert_eq_m512h(r, e);
21205 }
21206
21207 #[simd_test(enable = "avx512fp16")]
21208 unsafe fn test_mm512_mask3_fnmsub_round_ph() {
21209 let a = _mm512_set1_ph(1.0);
21210 let b = _mm512_set1_ph(2.0);
21211 let c = _mm512_set1_ph(3.0);
21212 let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21213 a,
21214 b,
21215 c,
21216 0b01010101010101010101010101010101,
21217 );
21218 let e = _mm512_set_ph(
21219 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21220 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21221 );
21222 assert_eq_m512h(r, e);
21223 }
21224
21225 #[simd_test(enable = "avx512fp16")]
21226 unsafe fn test_mm512_maskz_fnmsub_round_ph() {
21227 let a = _mm512_set1_ph(1.0);
21228 let b = _mm512_set1_ph(2.0);
21229 let c = _mm512_set1_ph(3.0);
21230 let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21231 0b01010101010101010101010101010101,
21232 a,
21233 b,
21234 c,
21235 );
21236 let e = _mm512_set_ph(
21237 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21238 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21239 );
21240 assert_eq_m512h(r, e);
21241 }
21242
21243 #[simd_test(enable = "avx512fp16")]
21244 unsafe fn test_mm_fnmsub_sh() {
21245 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21246 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21247 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21248 let r = _mm_fnmsub_sh(a, b, c);
21249 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21250 assert_eq_m128h(r, e);
21251 }
21252
21253 #[simd_test(enable = "avx512fp16")]
21254 unsafe fn test_mm_mask_fnmsub_sh() {
21255 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21256 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21257 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21258 let r = _mm_mask_fnmsub_sh(a, 0, b, c);
21259 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21260 assert_eq_m128h(r, e);
21261 let r = _mm_mask_fnmsub_sh(a, 1, b, c);
21262 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21263 assert_eq_m128h(r, e);
21264 }
21265
21266 #[simd_test(enable = "avx512fp16")]
21267 unsafe fn test_mm_mask3_fnmsub_sh() {
21268 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21269 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21270 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21271 let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
21272 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21273 assert_eq_m128h(r, e);
21274 let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
21275 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21276 assert_eq_m128h(r, e);
21277 }
21278
21279 #[simd_test(enable = "avx512fp16")]
21280 unsafe fn test_mm_maskz_fnmsub_sh() {
21281 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21282 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21283 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21284 let r = _mm_maskz_fnmsub_sh(0, a, b, c);
21285 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21286 assert_eq_m128h(r, e);
21287 let r = _mm_maskz_fnmsub_sh(1, a, b, c);
21288 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21289 assert_eq_m128h(r, e);
21290 }
21291
21292 #[simd_test(enable = "avx512fp16")]
21293 unsafe fn test_mm_fnmsub_round_sh() {
21294 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21295 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21296 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21297 let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21298 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21299 assert_eq_m128h(r, e);
21300 }
21301
21302 #[simd_test(enable = "avx512fp16")]
21303 unsafe fn test_mm_mask_fnmsub_round_sh() {
21304 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21305 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21306 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21307 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21308 a, 0, b, c,
21309 );
21310 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21311 assert_eq_m128h(r, e);
21312 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21313 a, 1, b, c,
21314 );
21315 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21316 assert_eq_m128h(r, e);
21317 }
21318
21319 #[simd_test(enable = "avx512fp16")]
21320 unsafe fn test_mm_mask3_fnmsub_round_sh() {
21321 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21322 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21323 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21324 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21325 a, b, c, 0,
21326 );
21327 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21328 assert_eq_m128h(r, e);
21329 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21330 a, b, c, 1,
21331 );
21332 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21333 assert_eq_m128h(r, e);
21334 }
21335
21336 #[simd_test(enable = "avx512fp16")]
21337 unsafe fn test_mm_maskz_fnmsub_round_sh() {
21338 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21339 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21340 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21341 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21342 0, a, b, c,
21343 );
21344 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21345 assert_eq_m128h(r, e);
21346 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21347 1, a, b, c,
21348 );
21349 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21350 assert_eq_m128h(r, e);
21351 }
21352
21353 #[simd_test(enable = "avx512fp16,avx512vl")]
21354 unsafe fn test_mm_fmaddsub_ph() {
21355 let a = _mm_set1_ph(1.0);
21356 let b = _mm_set1_ph(2.0);
21357 let c = _mm_set1_ph(3.0);
21358 let r = _mm_fmaddsub_ph(a, b, c);
21359 let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
21360 assert_eq_m128h(r, e);
21361 }
21362
21363 #[simd_test(enable = "avx512fp16,avx512vl")]
21364 unsafe fn test_mm_mask_fmaddsub_ph() {
21365 let a = _mm_set1_ph(1.0);
21366 let b = _mm_set1_ph(2.0);
21367 let c = _mm_set1_ph(3.0);
21368 let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
21369 let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
21370 assert_eq_m128h(r, e);
21371 }
21372
21373 #[simd_test(enable = "avx512fp16,avx512vl")]
21374 unsafe fn test_mm_mask3_fmaddsub_ph() {
21375 let a = _mm_set1_ph(1.0);
21376 let b = _mm_set1_ph(2.0);
21377 let c = _mm_set1_ph(3.0);
21378 let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
21379 let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
21380 assert_eq_m128h(r, e);
21381 }
21382
21383 #[simd_test(enable = "avx512fp16,avx512vl")]
21384 unsafe fn test_mm_maskz_fmaddsub_ph() {
21385 let a = _mm_set1_ph(1.0);
21386 let b = _mm_set1_ph(2.0);
21387 let c = _mm_set1_ph(3.0);
21388 let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
21389 let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
21390 assert_eq_m128h(r, e);
21391 }
21392
21393 #[simd_test(enable = "avx512fp16,avx512vl")]
21394 unsafe fn test_mm256_fmaddsub_ph() {
21395 let a = _mm256_set1_ph(1.0);
21396 let b = _mm256_set1_ph(2.0);
21397 let c = _mm256_set1_ph(3.0);
21398 let r = _mm256_fmaddsub_ph(a, b, c);
21399 let e = _mm256_set_ph(
21400 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21401 );
21402 assert_eq_m256h(r, e);
21403 }
21404
21405 #[simd_test(enable = "avx512fp16,avx512vl")]
21406 unsafe fn test_mm256_mask_fmaddsub_ph() {
21407 let a = _mm256_set1_ph(1.0);
21408 let b = _mm256_set1_ph(2.0);
21409 let c = _mm256_set1_ph(3.0);
21410 let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
21411 let e = _mm256_set_ph(
21412 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21413 );
21414 assert_eq_m256h(r, e);
21415 }
21416
21417 #[simd_test(enable = "avx512fp16,avx512vl")]
21418 unsafe fn test_mm256_mask3_fmaddsub_ph() {
21419 let a = _mm256_set1_ph(1.0);
21420 let b = _mm256_set1_ph(2.0);
21421 let c = _mm256_set1_ph(3.0);
21422 let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
21423 let e = _mm256_set_ph(
21424 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21425 );
21426 assert_eq_m256h(r, e);
21427 }
21428
21429 #[simd_test(enable = "avx512fp16,avx512vl")]
21430 unsafe fn test_mm256_maskz_fmaddsub_ph() {
21431 let a = _mm256_set1_ph(1.0);
21432 let b = _mm256_set1_ph(2.0);
21433 let c = _mm256_set1_ph(3.0);
21434 let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
21435 let e = _mm256_set_ph(
21436 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21437 );
21438 assert_eq_m256h(r, e);
21439 }
21440
21441 #[simd_test(enable = "avx512fp16")]
21442 unsafe fn test_mm512_fmaddsub_ph() {
21443 let a = _mm512_set1_ph(1.0);
21444 let b = _mm512_set1_ph(2.0);
21445 let c = _mm512_set1_ph(3.0);
21446 let r = _mm512_fmaddsub_ph(a, b, c);
21447 let e = _mm512_set_ph(
21448 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21449 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21450 );
21451 assert_eq_m512h(r, e);
21452 }
21453
21454 #[simd_test(enable = "avx512fp16")]
21455 unsafe fn test_mm512_mask_fmaddsub_ph() {
21456 let a = _mm512_set1_ph(1.0);
21457 let b = _mm512_set1_ph(2.0);
21458 let c = _mm512_set1_ph(3.0);
21459 let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
21460 let e = _mm512_set_ph(
21461 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21462 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21463 );
21464 assert_eq_m512h(r, e);
21465 }
21466
21467 #[simd_test(enable = "avx512fp16")]
21468 unsafe fn test_mm512_mask3_fmaddsub_ph() {
21469 let a = _mm512_set1_ph(1.0);
21470 let b = _mm512_set1_ph(2.0);
21471 let c = _mm512_set1_ph(3.0);
21472 let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
21473 let e = _mm512_set_ph(
21474 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21475 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21476 );
21477 assert_eq_m512h(r, e);
21478 }
21479
21480 #[simd_test(enable = "avx512fp16")]
21481 unsafe fn test_mm512_maskz_fmaddsub_ph() {
21482 let a = _mm512_set1_ph(1.0);
21483 let b = _mm512_set1_ph(2.0);
21484 let c = _mm512_set1_ph(3.0);
21485 let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
21486 let e = _mm512_set_ph(
21487 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21488 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21489 );
21490 assert_eq_m512h(r, e);
21491 }
21492
21493 #[simd_test(enable = "avx512fp16")]
21494 unsafe fn test_mm512_fmaddsub_round_ph() {
21495 let a = _mm512_set1_ph(1.0);
21496 let b = _mm512_set1_ph(2.0);
21497 let c = _mm512_set1_ph(3.0);
21498 let r =
21499 _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21500 let e = _mm512_set_ph(
21501 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21502 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21503 );
21504 assert_eq_m512h(r, e);
21505 }
21506
21507 #[simd_test(enable = "avx512fp16")]
21508 unsafe fn test_mm512_mask_fmaddsub_round_ph() {
21509 let a = _mm512_set1_ph(1.0);
21510 let b = _mm512_set1_ph(2.0);
21511 let c = _mm512_set1_ph(3.0);
21512 let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21513 a,
21514 0b00110011001100110011001100110011,
21515 b,
21516 c,
21517 );
21518 let e = _mm512_set_ph(
21519 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21520 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21521 );
21522 assert_eq_m512h(r, e);
21523 }
21524
21525 #[simd_test(enable = "avx512fp16")]
21526 unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
21527 let a = _mm512_set1_ph(1.0);
21528 let b = _mm512_set1_ph(2.0);
21529 let c = _mm512_set1_ph(3.0);
21530 let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21531 a,
21532 b,
21533 c,
21534 0b00110011001100110011001100110011,
21535 );
21536 let e = _mm512_set_ph(
21537 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21538 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21539 );
21540 assert_eq_m512h(r, e);
21541 }
21542
21543 #[simd_test(enable = "avx512fp16")]
21544 unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
21545 let a = _mm512_set1_ph(1.0);
21546 let b = _mm512_set1_ph(2.0);
21547 let c = _mm512_set1_ph(3.0);
21548 let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21549 0b00110011001100110011001100110011,
21550 a,
21551 b,
21552 c,
21553 );
21554 let e = _mm512_set_ph(
21555 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21556 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21557 );
21558 assert_eq_m512h(r, e);
21559 }
21560
21561 #[simd_test(enable = "avx512fp16,avx512vl")]
21562 unsafe fn test_mm_fmsubadd_ph() {
21563 let a = _mm_set1_ph(1.0);
21564 let b = _mm_set1_ph(2.0);
21565 let c = _mm_set1_ph(3.0);
21566 let r = _mm_fmsubadd_ph(a, b, c);
21567 let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
21568 assert_eq_m128h(r, e);
21569 }
21570
21571 #[simd_test(enable = "avx512fp16,avx512vl")]
21572 unsafe fn test_mm_mask_fmsubadd_ph() {
21573 let a = _mm_set1_ph(1.0);
21574 let b = _mm_set1_ph(2.0);
21575 let c = _mm_set1_ph(3.0);
21576 let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
21577 let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
21578 assert_eq_m128h(r, e);
21579 }
21580
21581 #[simd_test(enable = "avx512fp16,avx512vl")]
21582 unsafe fn test_mm_mask3_fmsubadd_ph() {
21583 let a = _mm_set1_ph(1.0);
21584 let b = _mm_set1_ph(2.0);
21585 let c = _mm_set1_ph(3.0);
21586 let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
21587 let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
21588 assert_eq_m128h(r, e);
21589 }
21590
21591 #[simd_test(enable = "avx512fp16,avx512vl")]
21592 unsafe fn test_mm_maskz_fmsubadd_ph() {
21593 let a = _mm_set1_ph(1.0);
21594 let b = _mm_set1_ph(2.0);
21595 let c = _mm_set1_ph(3.0);
21596 let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
21597 let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
21598 assert_eq_m128h(r, e);
21599 }
21600
21601 #[simd_test(enable = "avx512fp16,avx512vl")]
21602 unsafe fn test_mm256_fmsubadd_ph() {
21603 let a = _mm256_set1_ph(1.0);
21604 let b = _mm256_set1_ph(2.0);
21605 let c = _mm256_set1_ph(3.0);
21606 let r = _mm256_fmsubadd_ph(a, b, c);
21607 let e = _mm256_set_ph(
21608 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21609 );
21610 assert_eq_m256h(r, e);
21611 }
21612
21613 #[simd_test(enable = "avx512fp16,avx512vl")]
21614 unsafe fn test_mm256_mask_fmsubadd_ph() {
21615 let a = _mm256_set1_ph(1.0);
21616 let b = _mm256_set1_ph(2.0);
21617 let c = _mm256_set1_ph(3.0);
21618 let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
21619 let e = _mm256_set_ph(
21620 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21621 );
21622 assert_eq_m256h(r, e);
21623 }
21624
21625 #[simd_test(enable = "avx512fp16,avx512vl")]
21626 unsafe fn test_mm256_mask3_fmsubadd_ph() {
21627 let a = _mm256_set1_ph(1.0);
21628 let b = _mm256_set1_ph(2.0);
21629 let c = _mm256_set1_ph(3.0);
21630 let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
21631 let e = _mm256_set_ph(
21632 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21633 );
21634 assert_eq_m256h(r, e);
21635 }
21636
21637 #[simd_test(enable = "avx512fp16,avx512vl")]
21638 unsafe fn test_mm256_maskz_fmsubadd_ph() {
21639 let a = _mm256_set1_ph(1.0);
21640 let b = _mm256_set1_ph(2.0);
21641 let c = _mm256_set1_ph(3.0);
21642 let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
21643 let e = _mm256_set_ph(
21644 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21645 );
21646 assert_eq_m256h(r, e);
21647 }
21648
21649 #[simd_test(enable = "avx512fp16")]
21650 unsafe fn test_mm512_fmsubadd_ph() {
21651 let a = _mm512_set1_ph(1.0);
21652 let b = _mm512_set1_ph(2.0);
21653 let c = _mm512_set1_ph(3.0);
21654 let r = _mm512_fmsubadd_ph(a, b, c);
21655 let e = _mm512_set_ph(
21656 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21657 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21658 );
21659 assert_eq_m512h(r, e);
21660 }
21661
21662 #[simd_test(enable = "avx512fp16")]
21663 unsafe fn test_mm512_mask_fmsubadd_ph() {
21664 let a = _mm512_set1_ph(1.0);
21665 let b = _mm512_set1_ph(2.0);
21666 let c = _mm512_set1_ph(3.0);
21667 let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
21668 let e = _mm512_set_ph(
21669 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21670 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21671 );
21672 assert_eq_m512h(r, e);
21673 }
21674
21675 #[simd_test(enable = "avx512fp16")]
21676 unsafe fn test_mm512_mask3_fmsubadd_ph() {
21677 let a = _mm512_set1_ph(1.0);
21678 let b = _mm512_set1_ph(2.0);
21679 let c = _mm512_set1_ph(3.0);
21680 let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
21681 let e = _mm512_set_ph(
21682 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21683 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21684 );
21685 assert_eq_m512h(r, e);
21686 }
21687
21688 #[simd_test(enable = "avx512fp16")]
21689 unsafe fn test_mm512_maskz_fmsubadd_ph() {
21690 let a = _mm512_set1_ph(1.0);
21691 let b = _mm512_set1_ph(2.0);
21692 let c = _mm512_set1_ph(3.0);
21693 let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
21694 let e = _mm512_set_ph(
21695 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21696 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21697 );
21698 assert_eq_m512h(r, e);
21699 }
21700
21701 #[simd_test(enable = "avx512fp16")]
21702 unsafe fn test_mm512_fmsubadd_round_ph() {
21703 let a = _mm512_set1_ph(1.0);
21704 let b = _mm512_set1_ph(2.0);
21705 let c = _mm512_set1_ph(3.0);
21706 let r =
21707 _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21708 let e = _mm512_set_ph(
21709 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21710 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21711 );
21712 assert_eq_m512h(r, e);
21713 }
21714
21715 #[simd_test(enable = "avx512fp16")]
21716 unsafe fn test_mm512_mask_fmsubadd_round_ph() {
21717 let a = _mm512_set1_ph(1.0);
21718 let b = _mm512_set1_ph(2.0);
21719 let c = _mm512_set1_ph(3.0);
21720 let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21721 a,
21722 0b00110011001100110011001100110011,
21723 b,
21724 c,
21725 );
21726 let e = _mm512_set_ph(
21727 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21728 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21729 );
21730 assert_eq_m512h(r, e);
21731 }
21732
21733 #[simd_test(enable = "avx512fp16")]
21734 unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
21735 let a = _mm512_set1_ph(1.0);
21736 let b = _mm512_set1_ph(2.0);
21737 let c = _mm512_set1_ph(3.0);
21738 let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21739 a,
21740 b,
21741 c,
21742 0b00110011001100110011001100110011,
21743 );
21744 let e = _mm512_set_ph(
21745 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21746 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21747 );
21748 assert_eq_m512h(r, e);
21749 }
21750
21751 #[simd_test(enable = "avx512fp16")]
21752 unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
21753 let a = _mm512_set1_ph(1.0);
21754 let b = _mm512_set1_ph(2.0);
21755 let c = _mm512_set1_ph(3.0);
21756 let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21757 0b00110011001100110011001100110011,
21758 a,
21759 b,
21760 c,
21761 );
21762 let e = _mm512_set_ph(
21763 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21764 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21765 );
21766 assert_eq_m512h(r, e);
21767 }
21768
21769 #[simd_test(enable = "avx512fp16,avx512vl")]
21770 unsafe fn test_mm_rcp_ph() {
21771 let a = _mm_set1_ph(2.0);
21772 let r = _mm_rcp_ph(a);
21773 let e = _mm_set1_ph(0.5);
21774 assert_eq_m128h(r, e);
21775 }
21776
21777 #[simd_test(enable = "avx512fp16,avx512vl")]
21778 unsafe fn test_mm_mask_rcp_ph() {
21779 let a = _mm_set1_ph(2.0);
21780 let src = _mm_set1_ph(1.0);
21781 let r = _mm_mask_rcp_ph(src, 0b01010101, a);
21782 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21783 assert_eq_m128h(r, e);
21784 }
21785
21786 #[simd_test(enable = "avx512fp16,avx512vl")]
21787 unsafe fn test_mm_maskz_rcp_ph() {
21788 let a = _mm_set1_ph(2.0);
21789 let r = _mm_maskz_rcp_ph(0b01010101, a);
21790 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21791 assert_eq_m128h(r, e);
21792 }
21793
21794 #[simd_test(enable = "avx512fp16,avx512vl")]
21795 unsafe fn test_mm256_rcp_ph() {
21796 let a = _mm256_set1_ph(2.0);
21797 let r = _mm256_rcp_ph(a);
21798 let e = _mm256_set1_ph(0.5);
21799 assert_eq_m256h(r, e);
21800 }
21801
21802 #[simd_test(enable = "avx512fp16,avx512vl")]
21803 unsafe fn test_mm256_mask_rcp_ph() {
21804 let a = _mm256_set1_ph(2.0);
21805 let src = _mm256_set1_ph(1.0);
21806 let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
21807 let e = _mm256_set_ph(
21808 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21809 );
21810 assert_eq_m256h(r, e);
21811 }
21812
21813 #[simd_test(enable = "avx512fp16,avx512vl")]
21814 unsafe fn test_mm256_maskz_rcp_ph() {
21815 let a = _mm256_set1_ph(2.0);
21816 let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
21817 let e = _mm256_set_ph(
21818 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21819 );
21820 assert_eq_m256h(r, e);
21821 }
21822
21823 #[simd_test(enable = "avx512fp16")]
21824 unsafe fn test_mm512_rcp_ph() {
21825 let a = _mm512_set1_ph(2.0);
21826 let r = _mm512_rcp_ph(a);
21827 let e = _mm512_set1_ph(0.5);
21828 assert_eq_m512h(r, e);
21829 }
21830
21831 #[simd_test(enable = "avx512fp16")]
21832 unsafe fn test_mm512_mask_rcp_ph() {
21833 let a = _mm512_set1_ph(2.0);
21834 let src = _mm512_set1_ph(1.0);
21835 let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
21836 let e = _mm512_set_ph(
21837 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
21838 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21839 );
21840 assert_eq_m512h(r, e);
21841 }
21842
21843 #[simd_test(enable = "avx512fp16")]
21844 unsafe fn test_mm512_maskz_rcp_ph() {
21845 let a = _mm512_set1_ph(2.0);
21846 let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
21847 let e = _mm512_set_ph(
21848 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
21849 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21850 );
21851 assert_eq_m512h(r, e);
21852 }
21853
21854 #[simd_test(enable = "avx512fp16")]
21855 unsafe fn test_mm_rcp_sh() {
21856 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21857 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21858 let r = _mm_rcp_sh(a, b);
21859 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21860 assert_eq_m128h(r, e);
21861 }
21862
21863 #[simd_test(enable = "avx512fp16")]
21864 unsafe fn test_mm_mask_rcp_sh() {
21865 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21866 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21867 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
21868 let r = _mm_mask_rcp_sh(src, 0, a, b);
21869 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21870 assert_eq_m128h(r, e);
21871 let r = _mm_mask_rcp_sh(src, 1, a, b);
21872 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21873 assert_eq_m128h(r, e);
21874 }
21875
21876 #[simd_test(enable = "avx512fp16")]
21877 unsafe fn test_mm_maskz_rcp_sh() {
21878 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21879 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21880 let r = _mm_maskz_rcp_sh(0, a, b);
21881 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21882 assert_eq_m128h(r, e);
21883 let r = _mm_maskz_rcp_sh(1, a, b);
21884 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21885 assert_eq_m128h(r, e);
21886 }
21887
21888 #[simd_test(enable = "avx512fp16,avx512vl")]
21889 unsafe fn test_mm_rsqrt_ph() {
21890 let a = _mm_set1_ph(4.0);
21891 let r = _mm_rsqrt_ph(a);
21892 let e = _mm_set1_ph(0.5);
21893 assert_eq_m128h(r, e);
21894 }
21895
21896 #[simd_test(enable = "avx512fp16,avx512vl")]
21897 unsafe fn test_mm_mask_rsqrt_ph() {
21898 let a = _mm_set1_ph(4.0);
21899 let src = _mm_set1_ph(1.0);
21900 let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
21901 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21902 assert_eq_m128h(r, e);
21903 }
21904
21905 #[simd_test(enable = "avx512fp16,avx512vl")]
21906 unsafe fn test_mm_maskz_rsqrt_ph() {
21907 let a = _mm_set1_ph(4.0);
21908 let r = _mm_maskz_rsqrt_ph(0b01010101, a);
21909 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21910 assert_eq_m128h(r, e);
21911 }
21912
21913 #[simd_test(enable = "avx512fp16,avx512vl")]
21914 unsafe fn test_mm256_rsqrt_ph() {
21915 let a = _mm256_set1_ph(4.0);
21916 let r = _mm256_rsqrt_ph(a);
21917 let e = _mm256_set1_ph(0.5);
21918 assert_eq_m256h(r, e);
21919 }
21920
21921 #[simd_test(enable = "avx512fp16,avx512vl")]
21922 unsafe fn test_mm256_mask_rsqrt_ph() {
21923 let a = _mm256_set1_ph(4.0);
21924 let src = _mm256_set1_ph(1.0);
21925 let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
21926 let e = _mm256_set_ph(
21927 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21928 );
21929 assert_eq_m256h(r, e);
21930 }
21931
21932 #[simd_test(enable = "avx512fp16,avx512vl")]
21933 unsafe fn test_mm256_maskz_rsqrt_ph() {
21934 let a = _mm256_set1_ph(4.0);
21935 let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
21936 let e = _mm256_set_ph(
21937 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21938 );
21939 assert_eq_m256h(r, e);
21940 }
21941
21942 #[simd_test(enable = "avx512fp16")]
21943 unsafe fn test_mm512_rsqrt_ph() {
21944 let a = _mm512_set1_ph(4.0);
21945 let r = _mm512_rsqrt_ph(a);
21946 let e = _mm512_set1_ph(0.5);
21947 assert_eq_m512h(r, e);
21948 }
21949
21950 #[simd_test(enable = "avx512fp16")]
21951 unsafe fn test_mm512_mask_rsqrt_ph() {
21952 let a = _mm512_set1_ph(4.0);
21953 let src = _mm512_set1_ph(1.0);
21954 let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
21955 let e = _mm512_set_ph(
21956 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
21957 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21958 );
21959 assert_eq_m512h(r, e);
21960 }
21961
21962 #[simd_test(enable = "avx512fp16")]
21963 unsafe fn test_mm512_maskz_rsqrt_ph() {
21964 let a = _mm512_set1_ph(4.0);
21965 let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
21966 let e = _mm512_set_ph(
21967 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
21968 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21969 );
21970 assert_eq_m512h(r, e);
21971 }
21972
21973 #[simd_test(enable = "avx512fp16")]
21974 unsafe fn test_mm_rsqrt_sh() {
21975 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21976 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
21977 let r = _mm_rsqrt_sh(a, b);
21978 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21979 assert_eq_m128h(r, e);
21980 }
21981
21982 #[simd_test(enable = "avx512fp16")]
21983 unsafe fn test_mm_mask_rsqrt_sh() {
21984 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21985 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
21986 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
21987 let r = _mm_mask_rsqrt_sh(src, 0, a, b);
21988 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21989 assert_eq_m128h(r, e);
21990 let r = _mm_mask_rsqrt_sh(src, 1, a, b);
21991 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21992 assert_eq_m128h(r, e);
21993 }
21994
21995 #[simd_test(enable = "avx512fp16")]
21996 unsafe fn test_mm_maskz_rsqrt_sh() {
21997 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21998 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
21999 let r = _mm_maskz_rsqrt_sh(0, a, b);
22000 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22001 assert_eq_m128h(r, e);
22002 let r = _mm_maskz_rsqrt_sh(1, a, b);
22003 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22004 assert_eq_m128h(r, e);
22005 }
22006
22007 #[simd_test(enable = "avx512fp16,avx512vl")]
22008 unsafe fn test_mm_sqrt_ph() {
22009 let a = _mm_set1_ph(4.0);
22010 let r = _mm_sqrt_ph(a);
22011 let e = _mm_set1_ph(2.0);
22012 assert_eq_m128h(r, e);
22013 }
22014
22015 #[simd_test(enable = "avx512fp16,avx512vl")]
22016 unsafe fn test_mm_mask_sqrt_ph() {
22017 let a = _mm_set1_ph(4.0);
22018 let src = _mm_set1_ph(1.0);
22019 let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
22020 let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
22021 assert_eq_m128h(r, e);
22022 }
22023
22024 #[simd_test(enable = "avx512fp16,avx512vl")]
22025 unsafe fn test_mm_maskz_sqrt_ph() {
22026 let a = _mm_set1_ph(4.0);
22027 let r = _mm_maskz_sqrt_ph(0b01010101, a);
22028 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22029 assert_eq_m128h(r, e);
22030 }
22031
22032 #[simd_test(enable = "avx512fp16,avx512vl")]
22033 unsafe fn test_mm256_sqrt_ph() {
22034 let a = _mm256_set1_ph(4.0);
22035 let r = _mm256_sqrt_ph(a);
22036 let e = _mm256_set1_ph(2.0);
22037 assert_eq_m256h(r, e);
22038 }
22039
22040 #[simd_test(enable = "avx512fp16,avx512vl")]
22041 unsafe fn test_mm256_mask_sqrt_ph() {
22042 let a = _mm256_set1_ph(4.0);
22043 let src = _mm256_set1_ph(1.0);
22044 let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
22045 let e = _mm256_set_ph(
22046 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22047 );
22048 assert_eq_m256h(r, e);
22049 }
22050
22051 #[simd_test(enable = "avx512fp16,avx512vl")]
22052 unsafe fn test_mm256_maskz_sqrt_ph() {
22053 let a = _mm256_set1_ph(4.0);
22054 let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
22055 let e = _mm256_set_ph(
22056 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22057 );
22058 assert_eq_m256h(r, e);
22059 }
22060
22061 #[simd_test(enable = "avx512fp16")]
22062 unsafe fn test_mm512_sqrt_ph() {
22063 let a = _mm512_set1_ph(4.0);
22064 let r = _mm512_sqrt_ph(a);
22065 let e = _mm512_set1_ph(2.0);
22066 assert_eq_m512h(r, e);
22067 }
22068
22069 #[simd_test(enable = "avx512fp16")]
22070 unsafe fn test_mm512_mask_sqrt_ph() {
22071 let a = _mm512_set1_ph(4.0);
22072 let src = _mm512_set1_ph(1.0);
22073 let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
22074 let e = _mm512_set_ph(
22075 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22076 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22077 );
22078 assert_eq_m512h(r, e);
22079 }
22080
22081 #[simd_test(enable = "avx512fp16")]
22082 unsafe fn test_mm512_maskz_sqrt_ph() {
22083 let a = _mm512_set1_ph(4.0);
22084 let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
22085 let e = _mm512_set_ph(
22086 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22087 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22088 );
22089 assert_eq_m512h(r, e);
22090 }
22091
22092 #[simd_test(enable = "avx512fp16")]
22093 unsafe fn test_mm512_sqrt_round_ph() {
22094 let a = _mm512_set1_ph(4.0);
22095 let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
22096 let e = _mm512_set1_ph(2.0);
22097 assert_eq_m512h(r, e);
22098 }
22099
22100 #[simd_test(enable = "avx512fp16")]
22101 unsafe fn test_mm512_mask_sqrt_round_ph() {
22102 let a = _mm512_set1_ph(4.0);
22103 let src = _mm512_set1_ph(1.0);
22104 let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22105 src,
22106 0b01010101010101010101010101010101,
22107 a,
22108 );
22109 let e = _mm512_set_ph(
22110 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22111 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22112 );
22113 assert_eq_m512h(r, e);
22114 }
22115
22116 #[simd_test(enable = "avx512fp16")]
22117 unsafe fn test_mm512_maskz_sqrt_round_ph() {
22118 let a = _mm512_set1_ph(4.0);
22119 let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22120 0b01010101010101010101010101010101,
22121 a,
22122 );
22123 let e = _mm512_set_ph(
22124 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22125 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22126 );
22127 assert_eq_m512h(r, e);
22128 }
22129
22130 #[simd_test(enable = "avx512fp16")]
22131 unsafe fn test_mm_sqrt_sh() {
22132 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22133 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22134 let r = _mm_sqrt_sh(a, b);
22135 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22136 assert_eq_m128h(r, e);
22137 }
22138
22139 #[simd_test(enable = "avx512fp16")]
22140 unsafe fn test_mm_mask_sqrt_sh() {
22141 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22142 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22143 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22144 let r = _mm_mask_sqrt_sh(src, 0, a, b);
22145 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22146 assert_eq_m128h(r, e);
22147 let r = _mm_mask_sqrt_sh(src, 1, a, b);
22148 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22149 assert_eq_m128h(r, e);
22150 }
22151
22152 #[simd_test(enable = "avx512fp16")]
22153 unsafe fn test_mm_maskz_sqrt_sh() {
22154 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22155 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22156 let r = _mm_maskz_sqrt_sh(0, a, b);
22157 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22158 assert_eq_m128h(r, e);
22159 let r = _mm_maskz_sqrt_sh(1, a, b);
22160 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22161 assert_eq_m128h(r, e);
22162 }
22163
22164 #[simd_test(enable = "avx512fp16")]
22165 unsafe fn test_mm_sqrt_round_sh() {
22166 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22167 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22168 let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22169 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22170 assert_eq_m128h(r, e);
22171 }
22172
22173 #[simd_test(enable = "avx512fp16")]
22174 unsafe fn test_mm_mask_sqrt_round_sh() {
22175 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22176 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22177 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22178 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22179 src, 0, a, b,
22180 );
22181 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22182 assert_eq_m128h(r, e);
22183 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22184 src, 1, a, b,
22185 );
22186 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22187 assert_eq_m128h(r, e);
22188 }
22189
22190 #[simd_test(enable = "avx512fp16")]
22191 unsafe fn test_mm_maskz_sqrt_round_sh() {
22192 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22193 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22194 let r =
22195 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22196 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22197 assert_eq_m128h(r, e);
22198 let r =
22199 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22200 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22201 assert_eq_m128h(r, e);
22202 }
22203
22204 #[simd_test(enable = "avx512fp16,avx512vl")]
22205 unsafe fn test_mm_max_ph() {
22206 let a = _mm_set1_ph(2.0);
22207 let b = _mm_set1_ph(1.0);
22208 let r = _mm_max_ph(a, b);
22209 let e = _mm_set1_ph(2.0);
22210 assert_eq_m128h(r, e);
22211 }
22212
22213 #[simd_test(enable = "avx512fp16,avx512vl")]
22214 unsafe fn test_mm_mask_max_ph() {
22215 let a = _mm_set1_ph(2.0);
22216 let b = _mm_set1_ph(1.0);
22217 let src = _mm_set1_ph(3.0);
22218 let r = _mm_mask_max_ph(src, 0b01010101, a, b);
22219 let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
22220 assert_eq_m128h(r, e);
22221 }
22222
22223 #[simd_test(enable = "avx512fp16,avx512vl")]
22224 unsafe fn test_mm_maskz_max_ph() {
22225 let a = _mm_set1_ph(2.0);
22226 let b = _mm_set1_ph(1.0);
22227 let r = _mm_maskz_max_ph(0b01010101, a, b);
22228 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22229 assert_eq_m128h(r, e);
22230 }
22231
22232 #[simd_test(enable = "avx512fp16,avx512vl")]
22233 unsafe fn test_mm256_max_ph() {
22234 let a = _mm256_set1_ph(2.0);
22235 let b = _mm256_set1_ph(1.0);
22236 let r = _mm256_max_ph(a, b);
22237 let e = _mm256_set1_ph(2.0);
22238 assert_eq_m256h(r, e);
22239 }
22240
22241 #[simd_test(enable = "avx512fp16,avx512vl")]
22242 unsafe fn test_mm256_mask_max_ph() {
22243 let a = _mm256_set1_ph(2.0);
22244 let b = _mm256_set1_ph(1.0);
22245 let src = _mm256_set1_ph(3.0);
22246 let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
22247 let e = _mm256_set_ph(
22248 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22249 );
22250 assert_eq_m256h(r, e);
22251 }
22252
22253 #[simd_test(enable = "avx512fp16,avx512vl")]
22254 unsafe fn test_mm256_maskz_max_ph() {
22255 let a = _mm256_set1_ph(2.0);
22256 let b = _mm256_set1_ph(1.0);
22257 let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
22258 let e = _mm256_set_ph(
22259 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22260 );
22261 assert_eq_m256h(r, e);
22262 }
22263
22264 #[simd_test(enable = "avx512fp16")]
22265 unsafe fn test_mm512_max_ph() {
22266 let a = _mm512_set1_ph(2.0);
22267 let b = _mm512_set1_ph(1.0);
22268 let r = _mm512_max_ph(a, b);
22269 let e = _mm512_set1_ph(2.0);
22270 assert_eq_m512h(r, e);
22271 }
22272
22273 #[simd_test(enable = "avx512fp16")]
22274 unsafe fn test_mm512_mask_max_ph() {
22275 let a = _mm512_set1_ph(2.0);
22276 let b = _mm512_set1_ph(1.0);
22277 let src = _mm512_set1_ph(3.0);
22278 let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
22279 let e = _mm512_set_ph(
22280 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22281 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22282 );
22283 assert_eq_m512h(r, e);
22284 }
22285
22286 #[simd_test(enable = "avx512fp16")]
22287 unsafe fn test_mm512_maskz_max_ph() {
22288 let a = _mm512_set1_ph(2.0);
22289 let b = _mm512_set1_ph(1.0);
22290 let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
22291 let e = _mm512_set_ph(
22292 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22293 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22294 );
22295 assert_eq_m512h(r, e);
22296 }
22297
22298 #[simd_test(enable = "avx512fp16")]
22299 unsafe fn test_mm512_max_round_ph() {
22300 let a = _mm512_set1_ph(2.0);
22301 let b = _mm512_set1_ph(1.0);
22302 let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22303 let e = _mm512_set1_ph(2.0);
22304 assert_eq_m512h(r, e);
22305 }
22306
22307 #[simd_test(enable = "avx512fp16")]
22308 unsafe fn test_mm512_mask_max_round_ph() {
22309 let a = _mm512_set1_ph(2.0);
22310 let b = _mm512_set1_ph(1.0);
22311 let src = _mm512_set1_ph(3.0);
22312 let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22313 src,
22314 0b01010101010101010101010101010101,
22315 a,
22316 b,
22317 );
22318 let e = _mm512_set_ph(
22319 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22320 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22321 );
22322 assert_eq_m512h(r, e);
22323 }
22324
22325 #[simd_test(enable = "avx512fp16")]
22326 unsafe fn test_mm512_maskz_max_round_ph() {
22327 let a = _mm512_set1_ph(2.0);
22328 let b = _mm512_set1_ph(1.0);
22329 let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22330 0b01010101010101010101010101010101,
22331 a,
22332 b,
22333 );
22334 let e = _mm512_set_ph(
22335 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22336 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22337 );
22338 assert_eq_m512h(r, e);
22339 }
22340
22341 #[simd_test(enable = "avx512fp16")]
22342 unsafe fn test_mm_max_sh() {
22343 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22344 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22345 let r = _mm_max_sh(a, b);
22346 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22347 assert_eq_m128h(r, e);
22348 }
22349
22350 #[simd_test(enable = "avx512fp16")]
22351 unsafe fn test_mm_mask_max_sh() {
22352 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22353 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22354 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22355 let r = _mm_mask_max_sh(src, 0, a, b);
22356 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22357 assert_eq_m128h(r, e);
22358 let r = _mm_mask_max_sh(src, 1, a, b);
22359 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22360 assert_eq_m128h(r, e);
22361 }
22362
22363 #[simd_test(enable = "avx512fp16")]
22364 unsafe fn test_mm_maskz_max_sh() {
22365 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22366 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22367 let r = _mm_maskz_max_sh(0, a, b);
22368 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22369 assert_eq_m128h(r, e);
22370 let r = _mm_maskz_max_sh(1, a, b);
22371 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22372 assert_eq_m128h(r, e);
22373 }
22374
22375 #[simd_test(enable = "avx512fp16")]
22376 unsafe fn test_mm_max_round_sh() {
22377 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22378 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22379 let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22380 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22381 assert_eq_m128h(r, e);
22382 }
22383
22384 #[simd_test(enable = "avx512fp16")]
22385 unsafe fn test_mm_mask_max_round_sh() {
22386 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22387 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22388 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22389 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22390 src, 0, a, b,
22391 );
22392 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22393 assert_eq_m128h(r, e);
22394 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22395 src, 1, a, b,
22396 );
22397 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22398 assert_eq_m128h(r, e);
22399 }
22400
22401 #[simd_test(enable = "avx512fp16")]
22402 unsafe fn test_mm_maskz_max_round_sh() {
22403 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22404 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22405 let r =
22406 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22407 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22408 assert_eq_m128h(r, e);
22409 let r =
22410 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22411 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22412 assert_eq_m128h(r, e);
22413 }
22414
22415 #[simd_test(enable = "avx512fp16,avx512vl")]
22416 unsafe fn test_mm_min_ph() {
22417 let a = _mm_set1_ph(2.0);
22418 let b = _mm_set1_ph(1.0);
22419 let r = _mm_min_ph(a, b);
22420 let e = _mm_set1_ph(1.0);
22421 assert_eq_m128h(r, e);
22422 }
22423
22424 #[simd_test(enable = "avx512fp16,avx512vl")]
22425 unsafe fn test_mm_mask_min_ph() {
22426 let a = _mm_set1_ph(2.0);
22427 let b = _mm_set1_ph(1.0);
22428 let src = _mm_set1_ph(3.0);
22429 let r = _mm_mask_min_ph(src, 0b01010101, a, b);
22430 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
22431 assert_eq_m128h(r, e);
22432 }
22433
22434 #[simd_test(enable = "avx512fp16,avx512vl")]
22435 unsafe fn test_mm_maskz_min_ph() {
22436 let a = _mm_set1_ph(2.0);
22437 let b = _mm_set1_ph(1.0);
22438 let r = _mm_maskz_min_ph(0b01010101, a, b);
22439 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22440 assert_eq_m128h(r, e);
22441 }
22442
22443 #[simd_test(enable = "avx512fp16,avx512vl")]
22444 unsafe fn test_mm256_min_ph() {
22445 let a = _mm256_set1_ph(2.0);
22446 let b = _mm256_set1_ph(1.0);
22447 let r = _mm256_min_ph(a, b);
22448 let e = _mm256_set1_ph(1.0);
22449 assert_eq_m256h(r, e);
22450 }
22451
22452 #[simd_test(enable = "avx512fp16,avx512vl")]
22453 unsafe fn test_mm256_mask_min_ph() {
22454 let a = _mm256_set1_ph(2.0);
22455 let b = _mm256_set1_ph(1.0);
22456 let src = _mm256_set1_ph(3.0);
22457 let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
22458 let e = _mm256_set_ph(
22459 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22460 );
22461 assert_eq_m256h(r, e);
22462 }
22463
22464 #[simd_test(enable = "avx512fp16,avx512vl")]
22465 unsafe fn test_mm256_maskz_min_ph() {
22466 let a = _mm256_set1_ph(2.0);
22467 let b = _mm256_set1_ph(1.0);
22468 let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
22469 let e = _mm256_set_ph(
22470 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22471 );
22472 assert_eq_m256h(r, e);
22473 }
22474
22475 #[simd_test(enable = "avx512fp16")]
22476 unsafe fn test_mm512_min_ph() {
22477 let a = _mm512_set1_ph(2.0);
22478 let b = _mm512_set1_ph(1.0);
22479 let r = _mm512_min_ph(a, b);
22480 let e = _mm512_set1_ph(1.0);
22481 assert_eq_m512h(r, e);
22482 }
22483
22484 #[simd_test(enable = "avx512fp16")]
22485 unsafe fn test_mm512_mask_min_ph() {
22486 let a = _mm512_set1_ph(2.0);
22487 let b = _mm512_set1_ph(1.0);
22488 let src = _mm512_set1_ph(3.0);
22489 let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
22490 let e = _mm512_set_ph(
22491 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22492 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22493 );
22494 assert_eq_m512h(r, e);
22495 }
22496
22497 #[simd_test(enable = "avx512fp16")]
22498 unsafe fn test_mm512_maskz_min_ph() {
22499 let a = _mm512_set1_ph(2.0);
22500 let b = _mm512_set1_ph(1.0);
22501 let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
22502 let e = _mm512_set_ph(
22503 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22504 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22505 );
22506 assert_eq_m512h(r, e);
22507 }
22508
22509 #[simd_test(enable = "avx512fp16")]
22510 unsafe fn test_mm512_min_round_ph() {
22511 let a = _mm512_set1_ph(2.0);
22512 let b = _mm512_set1_ph(1.0);
22513 let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22514 let e = _mm512_set1_ph(1.0);
22515 assert_eq_m512h(r, e);
22516 }
22517
22518 #[simd_test(enable = "avx512fp16")]
22519 unsafe fn test_mm512_mask_min_round_ph() {
22520 let a = _mm512_set1_ph(2.0);
22521 let b = _mm512_set1_ph(1.0);
22522 let src = _mm512_set1_ph(3.0);
22523 let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22524 src,
22525 0b01010101010101010101010101010101,
22526 a,
22527 b,
22528 );
22529 let e = _mm512_set_ph(
22530 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22531 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22532 );
22533 assert_eq_m512h(r, e);
22534 }
22535
22536 #[simd_test(enable = "avx512fp16")]
22537 unsafe fn test_mm512_maskz_min_round_ph() {
22538 let a = _mm512_set1_ph(2.0);
22539 let b = _mm512_set1_ph(1.0);
22540 let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22541 0b01010101010101010101010101010101,
22542 a,
22543 b,
22544 );
22545 let e = _mm512_set_ph(
22546 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22547 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22548 );
22549 assert_eq_m512h(r, e);
22550 }
22551
22552 #[simd_test(enable = "avx512fp16")]
22553 unsafe fn test_mm_min_sh() {
22554 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22555 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22556 let r = _mm_min_sh(a, b);
22557 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22558 assert_eq_m128h(r, e);
22559 }
22560
22561 #[simd_test(enable = "avx512fp16")]
22562 unsafe fn test_mm_mask_min_sh() {
22563 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22564 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22565 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22566 let r = _mm_mask_min_sh(src, 0, a, b);
22567 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22568 assert_eq_m128h(r, e);
22569 let r = _mm_mask_min_sh(src, 1, a, b);
22570 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22571 assert_eq_m128h(r, e);
22572 }
22573
22574 #[simd_test(enable = "avx512fp16")]
22575 unsafe fn test_mm_maskz_min_sh() {
22576 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22577 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22578 let r = _mm_maskz_min_sh(0, a, b);
22579 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22580 assert_eq_m128h(r, e);
22581 let r = _mm_maskz_min_sh(1, a, b);
22582 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22583 assert_eq_m128h(r, e);
22584 }
22585
22586 #[simd_test(enable = "avx512fp16")]
22587 unsafe fn test_mm_min_round_sh() {
22588 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22589 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22590 let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22591 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22592 assert_eq_m128h(r, e);
22593 }
22594
22595 #[simd_test(enable = "avx512fp16")]
22596 unsafe fn test_mm_mask_min_round_sh() {
22597 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22598 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22599 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22600 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22601 src, 0, a, b,
22602 );
22603 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22604 assert_eq_m128h(r, e);
22605 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22606 src, 1, a, b,
22607 );
22608 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22609 assert_eq_m128h(r, e);
22610 }
22611
22612 #[simd_test(enable = "avx512fp16")]
22613 unsafe fn test_mm_maskz_min_round_sh() {
22614 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22615 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22616 let r =
22617 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22618 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22619 assert_eq_m128h(r, e);
22620 let r =
22621 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22622 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22623 assert_eq_m128h(r, e);
22624 }
22625
22626 #[simd_test(enable = "avx512fp16,avx512vl")]
22627 unsafe fn test_mm_getexp_ph() {
22628 let a = _mm_set1_ph(3.0);
22629 let r = _mm_getexp_ph(a);
22630 let e = _mm_set1_ph(1.0);
22631 assert_eq_m128h(r, e);
22632 }
22633
22634 #[simd_test(enable = "avx512fp16,avx512vl")]
22635 unsafe fn test_mm_mask_getexp_ph() {
22636 let a = _mm_set1_ph(3.0);
22637 let src = _mm_set1_ph(4.0);
22638 let r = _mm_mask_getexp_ph(src, 0b01010101, a);
22639 let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
22640 assert_eq_m128h(r, e);
22641 }
22642
22643 #[simd_test(enable = "avx512fp16,avx512vl")]
22644 unsafe fn test_mm_maskz_getexp_ph() {
22645 let a = _mm_set1_ph(3.0);
22646 let r = _mm_maskz_getexp_ph(0b01010101, a);
22647 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22648 assert_eq_m128h(r, e);
22649 }
22650
22651 #[simd_test(enable = "avx512fp16,avx512vl")]
22652 unsafe fn test_mm256_getexp_ph() {
22653 let a = _mm256_set1_ph(3.0);
22654 let r = _mm256_getexp_ph(a);
22655 let e = _mm256_set1_ph(1.0);
22656 assert_eq_m256h(r, e);
22657 }
22658
22659 #[simd_test(enable = "avx512fp16,avx512vl")]
22660 unsafe fn test_mm256_mask_getexp_ph() {
22661 let a = _mm256_set1_ph(3.0);
22662 let src = _mm256_set1_ph(4.0);
22663 let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
22664 let e = _mm256_set_ph(
22665 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22666 );
22667 assert_eq_m256h(r, e);
22668 }
22669
22670 #[simd_test(enable = "avx512fp16,avx512vl")]
22671 unsafe fn test_mm256_maskz_getexp_ph() {
22672 let a = _mm256_set1_ph(3.0);
22673 let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
22674 let e = _mm256_set_ph(
22675 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22676 );
22677 assert_eq_m256h(r, e);
22678 }
22679
22680 #[simd_test(enable = "avx512fp16")]
22681 unsafe fn test_mm512_getexp_ph() {
22682 let a = _mm512_set1_ph(3.0);
22683 let r = _mm512_getexp_ph(a);
22684 let e = _mm512_set1_ph(1.0);
22685 assert_eq_m512h(r, e);
22686 }
22687
22688 #[simd_test(enable = "avx512fp16")]
22689 unsafe fn test_mm512_mask_getexp_ph() {
22690 let a = _mm512_set1_ph(3.0);
22691 let src = _mm512_set1_ph(4.0);
22692 let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
22693 let e = _mm512_set_ph(
22694 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22695 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22696 );
22697 assert_eq_m512h(r, e);
22698 }
22699
22700 #[simd_test(enable = "avx512fp16")]
22701 unsafe fn test_mm512_maskz_getexp_ph() {
22702 let a = _mm512_set1_ph(3.0);
22703 let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
22704 let e = _mm512_set_ph(
22705 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22706 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22707 );
22708 assert_eq_m512h(r, e);
22709 }
22710
22711 #[simd_test(enable = "avx512fp16")]
22712 unsafe fn test_mm512_getexp_round_ph() {
22713 let a = _mm512_set1_ph(3.0);
22714 let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
22715 let e = _mm512_set1_ph(1.0);
22716 assert_eq_m512h(r, e);
22717 }
22718
22719 #[simd_test(enable = "avx512fp16")]
22720 unsafe fn test_mm512_mask_getexp_round_ph() {
22721 let a = _mm512_set1_ph(3.0);
22722 let src = _mm512_set1_ph(4.0);
22723 let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22724 src,
22725 0b01010101010101010101010101010101,
22726 a,
22727 );
22728 let e = _mm512_set_ph(
22729 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22730 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22731 );
22732 assert_eq_m512h(r, e);
22733 }
22734
22735 #[simd_test(enable = "avx512fp16")]
22736 unsafe fn test_mm512_maskz_getexp_round_ph() {
22737 let a = _mm512_set1_ph(3.0);
22738 let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22739 0b01010101010101010101010101010101,
22740 a,
22741 );
22742 let e = _mm512_set_ph(
22743 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22744 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22745 );
22746 assert_eq_m512h(r, e);
22747 }
22748
22749 #[simd_test(enable = "avx512fp16")]
22750 unsafe fn test_mm_getexp_sh() {
22751 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22752 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22753 let r = _mm_getexp_sh(a, b);
22754 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22755 assert_eq_m128h(r, e);
22756 }
22757
22758 #[simd_test(enable = "avx512fp16")]
22759 unsafe fn test_mm_mask_getexp_sh() {
22760 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22761 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22762 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22763 let r = _mm_mask_getexp_sh(src, 0, a, b);
22764 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22765 assert_eq_m128h(r, e);
22766 let r = _mm_mask_getexp_sh(src, 1, a, b);
22767 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22768 assert_eq_m128h(r, e);
22769 }
22770
22771 #[simd_test(enable = "avx512fp16")]
22772 unsafe fn test_mm_maskz_getexp_sh() {
22773 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22774 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22775 let r = _mm_maskz_getexp_sh(0, a, b);
22776 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22777 assert_eq_m128h(r, e);
22778 let r = _mm_maskz_getexp_sh(1, a, b);
22779 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22780 assert_eq_m128h(r, e);
22781 }
22782
22783 #[simd_test(enable = "avx512fp16")]
22784 unsafe fn test_mm_getexp_round_sh() {
22785 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22786 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22787 let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
22788 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22789 assert_eq_m128h(r, e);
22790 }
22791
22792 #[simd_test(enable = "avx512fp16")]
22793 unsafe fn test_mm_mask_getexp_round_sh() {
22794 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22795 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22796 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22797 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
22798 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22799 assert_eq_m128h(r, e);
22800 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
22801 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22802 assert_eq_m128h(r, e);
22803 }
22804
22805 #[simd_test(enable = "avx512fp16")]
22806 unsafe fn test_mm_maskz_getexp_round_sh() {
22807 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22808 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22809 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
22810 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22811 assert_eq_m128h(r, e);
22812 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
22813 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22814 assert_eq_m128h(r, e);
22815 }
22816
22817 #[simd_test(enable = "avx512fp16,avx512vl")]
22818 unsafe fn test_mm_getmant_ph() {
22819 let a = _mm_set1_ph(10.0);
22820 let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22821 let e = _mm_set1_ph(1.25);
22822 assert_eq_m128h(r, e);
22823 }
22824
22825 #[simd_test(enable = "avx512fp16,avx512vl")]
22826 unsafe fn test_mm_mask_getmant_ph() {
22827 let a = _mm_set1_ph(10.0);
22828 let src = _mm_set1_ph(20.0);
22829 let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
22830 let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
22831 assert_eq_m128h(r, e);
22832 }
22833
22834 #[simd_test(enable = "avx512fp16,avx512vl")]
22835 unsafe fn test_mm_maskz_getmant_ph() {
22836 let a = _mm_set1_ph(10.0);
22837 let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
22838 let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
22839 assert_eq_m128h(r, e);
22840 }
22841
22842 #[simd_test(enable = "avx512fp16,avx512vl")]
22843 unsafe fn test_mm256_getmant_ph() {
22844 let a = _mm256_set1_ph(10.0);
22845 let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22846 let e = _mm256_set1_ph(1.25);
22847 assert_eq_m256h(r, e);
22848 }
22849
22850 #[simd_test(enable = "avx512fp16,avx512vl")]
22851 unsafe fn test_mm256_mask_getmant_ph() {
22852 let a = _mm256_set1_ph(10.0);
22853 let src = _mm256_set1_ph(20.0);
22854 let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22855 src,
22856 0b0101010101010101,
22857 a,
22858 );
22859 let e = _mm256_set_ph(
22860 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22861 20.0, 1.25,
22862 );
22863 assert_eq_m256h(r, e);
22864 }
22865
22866 #[simd_test(enable = "avx512fp16,avx512vl")]
22867 unsafe fn test_mm256_maskz_getmant_ph() {
22868 let a = _mm256_set1_ph(10.0);
22869 let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22870 0b0101010101010101,
22871 a,
22872 );
22873 let e = _mm256_set_ph(
22874 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22875 );
22876 assert_eq_m256h(r, e);
22877 }
22878
22879 #[simd_test(enable = "avx512fp16")]
22880 unsafe fn test_mm512_getmant_ph() {
22881 let a = _mm512_set1_ph(10.0);
22882 let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22883 let e = _mm512_set1_ph(1.25);
22884 assert_eq_m512h(r, e);
22885 }
22886
22887 #[simd_test(enable = "avx512fp16")]
22888 unsafe fn test_mm512_mask_getmant_ph() {
22889 let a = _mm512_set1_ph(10.0);
22890 let src = _mm512_set1_ph(20.0);
22891 let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22892 src,
22893 0b01010101010101010101010101010101,
22894 a,
22895 );
22896 let e = _mm512_set_ph(
22897 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22898 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22899 20.0, 1.25, 20.0, 1.25,
22900 );
22901 assert_eq_m512h(r, e);
22902 }
22903
22904 #[simd_test(enable = "avx512fp16")]
22905 unsafe fn test_mm512_maskz_getmant_ph() {
22906 let a = _mm512_set1_ph(10.0);
22907 let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22908 0b01010101010101010101010101010101,
22909 a,
22910 );
22911 let e = _mm512_set_ph(
22912 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22913 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22914 );
22915 assert_eq_m512h(r, e);
22916 }
22917
22918 #[simd_test(enable = "avx512fp16")]
22919 unsafe fn test_mm512_getmant_round_ph() {
22920 let a = _mm512_set1_ph(10.0);
22921 let r =
22922 _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
22923 a,
22924 );
22925 let e = _mm512_set1_ph(1.25);
22926 assert_eq_m512h(r, e);
22927 }
22928
22929 #[simd_test(enable = "avx512fp16")]
22930 unsafe fn test_mm512_mask_getmant_round_ph() {
22931 let a = _mm512_set1_ph(10.0);
22932 let src = _mm512_set1_ph(20.0);
22933 let r = _mm512_mask_getmant_round_ph::<
22934 _MM_MANT_NORM_P75_1P5,
22935 _MM_MANT_SIGN_NAN,
22936 _MM_FROUND_NO_EXC,
22937 >(src, 0b01010101010101010101010101010101, a);
22938 let e = _mm512_set_ph(
22939 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22940 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22941 20.0, 1.25, 20.0, 1.25,
22942 );
22943 assert_eq_m512h(r, e);
22944 }
22945
22946 #[simd_test(enable = "avx512fp16")]
22947 unsafe fn test_mm512_maskz_getmant_round_ph() {
22948 let a = _mm512_set1_ph(10.0);
22949 let r = _mm512_maskz_getmant_round_ph::<
22950 _MM_MANT_NORM_P75_1P5,
22951 _MM_MANT_SIGN_NAN,
22952 _MM_FROUND_NO_EXC,
22953 >(0b01010101010101010101010101010101, a);
22954 let e = _mm512_set_ph(
22955 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22956 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22957 );
22958 assert_eq_m512h(r, e);
22959 }
22960
22961 #[simd_test(enable = "avx512fp16")]
22962 unsafe fn test_mm_getmant_sh() {
22963 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22964 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22965 let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
22966 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
22967 assert_eq_m128h(r, e);
22968 }
22969
22970 #[simd_test(enable = "avx512fp16")]
22971 unsafe fn test_mm_mask_getmant_sh() {
22972 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22973 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22974 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
22975 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
22976 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
22977 assert_eq_m128h(r, e);
22978 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
22979 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
22980 assert_eq_m128h(r, e);
22981 }
22982
22983 #[simd_test(enable = "avx512fp16")]
22984 unsafe fn test_mm_maskz_getmant_sh() {
22985 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22986 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22987 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
22988 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22989 assert_eq_m128h(r, e);
22990 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
22991 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
22992 assert_eq_m128h(r, e);
22993 }
22994
22995 #[simd_test(enable = "avx512fp16")]
22996 unsafe fn test_mm_getmant_round_sh() {
22997 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
22998 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
22999 let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23000 a, b,
23001 );
23002 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23003 assert_eq_m128h(r, e);
23004 }
23005
23006 #[simd_test(enable = "avx512fp16")]
23007 unsafe fn test_mm_mask_getmant_round_sh() {
23008 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23009 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23010 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23011 let r = _mm_mask_getmant_round_sh::<
23012 _MM_MANT_NORM_P75_1P5,
23013 _MM_MANT_SIGN_NAN,
23014 _MM_FROUND_NO_EXC,
23015 >(src, 0, a, b);
23016 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23017 assert_eq_m128h(r, e);
23018 let r = _mm_mask_getmant_round_sh::<
23019 _MM_MANT_NORM_P75_1P5,
23020 _MM_MANT_SIGN_NAN,
23021 _MM_FROUND_NO_EXC,
23022 >(src, 1, a, b);
23023 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23024 assert_eq_m128h(r, e);
23025 }
23026
23027 #[simd_test(enable = "avx512fp16")]
23028 unsafe fn test_mm_maskz_getmant_round_sh() {
23029 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23030 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23031 let r = _mm_maskz_getmant_round_sh::<
23032 _MM_MANT_NORM_P75_1P5,
23033 _MM_MANT_SIGN_NAN,
23034 _MM_FROUND_NO_EXC,
23035 >(0, a, b);
23036 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23037 assert_eq_m128h(r, e);
23038 let r = _mm_maskz_getmant_round_sh::<
23039 _MM_MANT_NORM_P75_1P5,
23040 _MM_MANT_SIGN_NAN,
23041 _MM_FROUND_NO_EXC,
23042 >(1, a, b);
23043 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23044 assert_eq_m128h(r, e);
23045 }
23046
23047 #[simd_test(enable = "avx512fp16,avx512vl")]
23048 unsafe fn test_mm_roundscale_ph() {
23049 let a = _mm_set1_ph(1.1);
23050 let r = _mm_roundscale_ph::<0>(a);
23051 let e = _mm_set1_ph(1.0);
23052 assert_eq_m128h(r, e);
23053 }
23054
23055 #[simd_test(enable = "avx512fp16,avx512vl")]
23056 unsafe fn test_mm_mask_roundscale_ph() {
23057 let a = _mm_set1_ph(1.1);
23058 let src = _mm_set1_ph(2.0);
23059 let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
23060 let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
23061 assert_eq_m128h(r, e);
23062 }
23063
23064 #[simd_test(enable = "avx512fp16,avx512vl")]
23065 unsafe fn test_mm_maskz_roundscale_ph() {
23066 let a = _mm_set1_ph(1.1);
23067 let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
23068 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
23069 assert_eq_m128h(r, e);
23070 }
23071
23072 #[simd_test(enable = "avx512fp16,avx512vl")]
23073 unsafe fn test_mm256_roundscale_ph() {
23074 let a = _mm256_set1_ph(1.1);
23075 let r = _mm256_roundscale_ph::<0>(a);
23076 let e = _mm256_set1_ph(1.0);
23077 assert_eq_m256h(r, e);
23078 }
23079
23080 #[simd_test(enable = "avx512fp16,avx512vl")]
23081 unsafe fn test_mm256_mask_roundscale_ph() {
23082 let a = _mm256_set1_ph(1.1);
23083 let src = _mm256_set1_ph(2.0);
23084 let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
23085 let e = _mm256_set_ph(
23086 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23087 );
23088 assert_eq_m256h(r, e);
23089 }
23090
23091 #[simd_test(enable = "avx512fp16,avx512vl")]
23092 unsafe fn test_mm256_maskz_roundscale_ph() {
23093 let a = _mm256_set1_ph(1.1);
23094 let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
23095 let e = _mm256_set_ph(
23096 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23097 );
23098 assert_eq_m256h(r, e);
23099 }
23100
23101 #[simd_test(enable = "avx512fp16")]
23102 unsafe fn test_mm512_roundscale_ph() {
23103 let a = _mm512_set1_ph(1.1);
23104 let r = _mm512_roundscale_ph::<0>(a);
23105 let e = _mm512_set1_ph(1.0);
23106 assert_eq_m512h(r, e);
23107 }
23108
23109 #[simd_test(enable = "avx512fp16")]
23110 unsafe fn test_mm512_mask_roundscale_ph() {
23111 let a = _mm512_set1_ph(1.1);
23112 let src = _mm512_set1_ph(2.0);
23113 let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
23114 let e = _mm512_set_ph(
23115 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23116 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23117 );
23118 assert_eq_m512h(r, e);
23119 }
23120
23121 #[simd_test(enable = "avx512fp16")]
23122 unsafe fn test_mm512_maskz_roundscale_ph() {
23123 let a = _mm512_set1_ph(1.1);
23124 let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
23125 let e = _mm512_set_ph(
23126 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23127 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23128 );
23129 assert_eq_m512h(r, e);
23130 }
23131
23132 #[simd_test(enable = "avx512fp16")]
23133 unsafe fn test_mm512_roundscale_round_ph() {
23134 let a = _mm512_set1_ph(1.1);
23135 let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
23136 let e = _mm512_set1_ph(1.0);
23137 assert_eq_m512h(r, e);
23138 }
23139
23140 #[simd_test(enable = "avx512fp16")]
23141 unsafe fn test_mm512_mask_roundscale_round_ph() {
23142 let a = _mm512_set1_ph(1.1);
23143 let src = _mm512_set1_ph(2.0);
23144 let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23145 src,
23146 0b01010101010101010101010101010101,
23147 a,
23148 );
23149 let e = _mm512_set_ph(
23150 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23151 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23152 );
23153 assert_eq_m512h(r, e);
23154 }
23155
23156 #[simd_test(enable = "avx512fp16")]
23157 unsafe fn test_mm512_maskz_roundscale_round_ph() {
23158 let a = _mm512_set1_ph(1.1);
23159 let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23160 0b01010101010101010101010101010101,
23161 a,
23162 );
23163 let e = _mm512_set_ph(
23164 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23165 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23166 );
23167 assert_eq_m512h(r, e);
23168 }
23169
23170 #[simd_test(enable = "avx512fp16")]
23171 unsafe fn test_mm_roundscale_sh() {
23172 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23173 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23174 let r = _mm_roundscale_sh::<0>(a, b);
23175 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23176 assert_eq_m128h(r, e);
23177 }
23178
23179 #[simd_test(enable = "avx512fp16")]
23180 unsafe fn test_mm_mask_roundscale_sh() {
23181 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23182 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23183 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23184 let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
23185 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23186 assert_eq_m128h(r, e);
23187 let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
23188 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23189 assert_eq_m128h(r, e);
23190 }
23191
23192 #[simd_test(enable = "avx512fp16")]
23193 unsafe fn test_mm_maskz_roundscale_sh() {
23194 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23195 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23196 let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
23197 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23198 assert_eq_m128h(r, e);
23199 let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
23200 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23201 assert_eq_m128h(r, e);
23202 }
23203
23204 #[simd_test(enable = "avx512fp16")]
23205 unsafe fn test_mm_roundscale_round_sh() {
23206 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23207 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23208 let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
23209 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23210 assert_eq_m128h(r, e);
23211 }
23212
23213 #[simd_test(enable = "avx512fp16")]
23214 unsafe fn test_mm_mask_roundscale_round_sh() {
23215 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23216 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23217 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23218 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
23219 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23220 assert_eq_m128h(r, e);
23221 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
23222 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23223 assert_eq_m128h(r, e);
23224 }
23225
23226 #[simd_test(enable = "avx512fp16")]
23227 unsafe fn test_mm_maskz_roundscale_round_sh() {
23228 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23229 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23230 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
23231 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23232 assert_eq_m128h(r, e);
23233 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
23234 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23235 assert_eq_m128h(r, e);
23236 }
23237
23238 #[simd_test(enable = "avx512fp16,avx512vl")]
23239 unsafe fn test_mm_scalef_ph() {
23240 let a = _mm_set1_ph(1.);
23241 let b = _mm_set1_ph(3.);
23242 let r = _mm_scalef_ph(a, b);
23243 let e = _mm_set1_ph(8.0);
23244 assert_eq_m128h(r, e);
23245 }
23246
23247 #[simd_test(enable = "avx512fp16,avx512vl")]
23248 unsafe fn test_mm_mask_scalef_ph() {
23249 let a = _mm_set1_ph(1.);
23250 let b = _mm_set1_ph(3.);
23251 let src = _mm_set1_ph(2.);
23252 let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
23253 let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
23254 assert_eq_m128h(r, e);
23255 }
23256
23257 #[simd_test(enable = "avx512fp16,avx512vl")]
23258 unsafe fn test_mm_maskz_scalef_ph() {
23259 let a = _mm_set1_ph(1.);
23260 let b = _mm_set1_ph(3.);
23261 let r = _mm_maskz_scalef_ph(0b01010101, a, b);
23262 let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
23263 assert_eq_m128h(r, e);
23264 }
23265
23266 #[simd_test(enable = "avx512fp16,avx512vl")]
23267 unsafe fn test_mm256_scalef_ph() {
23268 let a = _mm256_set1_ph(1.);
23269 let b = _mm256_set1_ph(3.);
23270 let r = _mm256_scalef_ph(a, b);
23271 let e = _mm256_set1_ph(8.0);
23272 assert_eq_m256h(r, e);
23273 }
23274
23275 #[simd_test(enable = "avx512fp16,avx512vl")]
23276 unsafe fn test_mm256_mask_scalef_ph() {
23277 let a = _mm256_set1_ph(1.);
23278 let b = _mm256_set1_ph(3.);
23279 let src = _mm256_set1_ph(2.);
23280 let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
23281 let e = _mm256_set_ph(
23282 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23283 );
23284 assert_eq_m256h(r, e);
23285 }
23286
23287 #[simd_test(enable = "avx512fp16,avx512vl")]
23288 unsafe fn test_mm256_maskz_scalef_ph() {
23289 let a = _mm256_set1_ph(1.);
23290 let b = _mm256_set1_ph(3.);
23291 let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
23292 let e = _mm256_set_ph(
23293 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23294 );
23295 assert_eq_m256h(r, e);
23296 }
23297
23298 #[simd_test(enable = "avx512fp16")]
23299 unsafe fn test_mm512_scalef_ph() {
23300 let a = _mm512_set1_ph(1.);
23301 let b = _mm512_set1_ph(3.);
23302 let r = _mm512_scalef_ph(a, b);
23303 let e = _mm512_set1_ph(8.0);
23304 assert_eq_m512h(r, e);
23305 }
23306
23307 #[simd_test(enable = "avx512fp16")]
23308 unsafe fn test_mm512_mask_scalef_ph() {
23309 let a = _mm512_set1_ph(1.);
23310 let b = _mm512_set1_ph(3.);
23311 let src = _mm512_set1_ph(2.);
23312 let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
23313 let e = _mm512_set_ph(
23314 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23315 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23316 );
23317 assert_eq_m512h(r, e);
23318 }
23319
23320 #[simd_test(enable = "avx512fp16")]
23321 unsafe fn test_mm512_maskz_scalef_ph() {
23322 let a = _mm512_set1_ph(1.);
23323 let b = _mm512_set1_ph(3.);
23324 let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
23325 let e = _mm512_set_ph(
23326 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23327 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23328 );
23329 assert_eq_m512h(r, e);
23330 }
23331
23332 #[simd_test(enable = "avx512fp16")]
23333 unsafe fn test_mm512_scalef_round_ph() {
23334 let a = _mm512_set1_ph(1.);
23335 let b = _mm512_set1_ph(3.);
23336 let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23337 let e = _mm512_set1_ph(8.0);
23338 assert_eq_m512h(r, e);
23339 }
23340
23341 #[simd_test(enable = "avx512fp16")]
23342 unsafe fn test_mm512_mask_scalef_round_ph() {
23343 let a = _mm512_set1_ph(1.);
23344 let b = _mm512_set1_ph(3.);
23345 let src = _mm512_set1_ph(2.);
23346 let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23347 src,
23348 0b01010101010101010101010101010101,
23349 a,
23350 b,
23351 );
23352 let e = _mm512_set_ph(
23353 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23354 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23355 );
23356 assert_eq_m512h(r, e);
23357 }
23358
23359 #[simd_test(enable = "avx512fp16")]
23360 unsafe fn test_mm512_maskz_scalef_round_ph() {
23361 let a = _mm512_set1_ph(1.);
23362 let b = _mm512_set1_ph(3.);
23363 let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23364 0b01010101010101010101010101010101,
23365 a,
23366 b,
23367 );
23368 let e = _mm512_set_ph(
23369 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23370 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23371 );
23372 assert_eq_m512h(r, e);
23373 }
23374
23375 #[simd_test(enable = "avx512fp16")]
23376 unsafe fn test_mm_scalef_sh() {
23377 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23378 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23379 let r = _mm_scalef_sh(a, b);
23380 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23381 assert_eq_m128h(r, e);
23382 }
23383
23384 #[simd_test(enable = "avx512fp16")]
23385 unsafe fn test_mm_mask_scalef_sh() {
23386 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23387 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23388 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23389 let r = _mm_mask_scalef_sh(src, 0, a, b);
23390 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23391 assert_eq_m128h(r, e);
23392 let r = _mm_mask_scalef_sh(src, 1, a, b);
23393 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23394 assert_eq_m128h(r, e);
23395 }
23396
23397 #[simd_test(enable = "avx512fp16")]
23398 unsafe fn test_mm_maskz_scalef_sh() {
23399 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23400 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23401 let r = _mm_maskz_scalef_sh(0, a, b);
23402 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23403 assert_eq_m128h(r, e);
23404 let r = _mm_maskz_scalef_sh(1, a, b);
23405 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23406 assert_eq_m128h(r, e);
23407 }
23408
23409 #[simd_test(enable = "avx512fp16")]
23410 unsafe fn test_mm_scalef_round_sh() {
23411 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23412 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23413 let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23414 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23415 assert_eq_m128h(r, e);
23416 }
23417
23418 #[simd_test(enable = "avx512fp16")]
23419 unsafe fn test_mm_mask_scalef_round_sh() {
23420 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23421 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23422 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23423 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23424 src, 0, a, b,
23425 );
23426 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23427 assert_eq_m128h(r, e);
23428 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23429 src, 1, a, b,
23430 );
23431 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23432 assert_eq_m128h(r, e);
23433 }
23434
23435 #[simd_test(enable = "avx512fp16")]
23436 unsafe fn test_mm_maskz_scalef_round_sh() {
23437 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23438 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23439 let r =
23440 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
23441 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23442 assert_eq_m128h(r, e);
23443 let r =
23444 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
23445 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23446 assert_eq_m128h(r, e);
23447 }
23448
23449 #[simd_test(enable = "avx512fp16,avx512vl")]
23450 unsafe fn test_mm_reduce_ph() {
23451 let a = _mm_set1_ph(1.25);
23452 let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23453 let e = _mm_set1_ph(0.25);
23454 assert_eq_m128h(r, e);
23455 }
23456
23457 #[simd_test(enable = "avx512fp16,avx512vl")]
23458 unsafe fn test_mm_mask_reduce_ph() {
23459 let a = _mm_set1_ph(1.25);
23460 let src = _mm_set1_ph(2.0);
23461 let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
23462 let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
23463 assert_eq_m128h(r, e);
23464 }
23465
23466 #[simd_test(enable = "avx512fp16,avx512vl")]
23467 unsafe fn test_mm_maskz_reduce_ph() {
23468 let a = _mm_set1_ph(1.25);
23469 let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
23470 let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
23471 assert_eq_m128h(r, e);
23472 }
23473
23474 #[simd_test(enable = "avx512fp16,avx512vl")]
23475 unsafe fn test_mm256_reduce_ph() {
23476 let a = _mm256_set1_ph(1.25);
23477 let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23478 let e = _mm256_set1_ph(0.25);
23479 assert_eq_m256h(r, e);
23480 }
23481
23482 #[simd_test(enable = "avx512fp16,avx512vl")]
23483 unsafe fn test_mm256_mask_reduce_ph() {
23484 let a = _mm256_set1_ph(1.25);
23485 let src = _mm256_set1_ph(2.0);
23486 let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
23487 let e = _mm256_set_ph(
23488 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23489 );
23490 assert_eq_m256h(r, e);
23491 }
23492
23493 #[simd_test(enable = "avx512fp16,avx512vl")]
23494 unsafe fn test_mm256_maskz_reduce_ph() {
23495 let a = _mm256_set1_ph(1.25);
23496 let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
23497 let e = _mm256_set_ph(
23498 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23499 );
23500 assert_eq_m256h(r, e);
23501 }
23502
23503 #[simd_test(enable = "avx512fp16")]
23504 unsafe fn test_mm512_reduce_ph() {
23505 let a = _mm512_set1_ph(1.25);
23506 let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23507 let e = _mm512_set1_ph(0.25);
23508 assert_eq_m512h(r, e);
23509 }
23510
23511 #[simd_test(enable = "avx512fp16")]
23512 unsafe fn test_mm512_mask_reduce_ph() {
23513 let a = _mm512_set1_ph(1.25);
23514 let src = _mm512_set1_ph(2.0);
23515 let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23516 src,
23517 0b01010101010101010101010101010101,
23518 a,
23519 );
23520 let e = _mm512_set_ph(
23521 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23522 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23523 );
23524 assert_eq_m512h(r, e);
23525 }
23526
23527 #[simd_test(enable = "avx512fp16")]
23528 unsafe fn test_mm512_maskz_reduce_ph() {
23529 let a = _mm512_set1_ph(1.25);
23530 let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23531 0b01010101010101010101010101010101,
23532 a,
23533 );
23534 let e = _mm512_set_ph(
23535 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23536 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23537 );
23538 assert_eq_m512h(r, e);
23539 }
23540
23541 #[simd_test(enable = "avx512fp16")]
23542 unsafe fn test_mm512_reduce_round_ph() {
23543 let a = _mm512_set1_ph(1.25);
23544 let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23545 let e = _mm512_set1_ph(0.25);
23546 assert_eq_m512h(r, e);
23547 }
23548
23549 #[simd_test(enable = "avx512fp16")]
23550 unsafe fn test_mm512_mask_reduce_round_ph() {
23551 let a = _mm512_set1_ph(1.25);
23552 let src = _mm512_set1_ph(2.0);
23553 let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23554 src,
23555 0b01010101010101010101010101010101,
23556 a,
23557 );
23558 let e = _mm512_set_ph(
23559 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23560 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23561 );
23562 assert_eq_m512h(r, e);
23563 }
23564
23565 #[simd_test(enable = "avx512fp16")]
23566 unsafe fn test_mm512_maskz_reduce_round_ph() {
23567 let a = _mm512_set1_ph(1.25);
23568 let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23569 0b01010101010101010101010101010101,
23570 a,
23571 );
23572 let e = _mm512_set_ph(
23573 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23574 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23575 );
23576 assert_eq_m512h(r, e);
23577 }
23578
23579 #[simd_test(enable = "avx512fp16")]
23580 unsafe fn test_mm_reduce_sh() {
23581 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23582 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23583 let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
23584 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23585 assert_eq_m128h(r, e);
23586 }
23587
23588 #[simd_test(enable = "avx512fp16")]
23589 unsafe fn test_mm_mask_reduce_sh() {
23590 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23591 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23592 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23593 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
23594 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23595 assert_eq_m128h(r, e);
23596 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
23597 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23598 assert_eq_m128h(r, e);
23599 }
23600
23601 #[simd_test(enable = "avx512fp16")]
23602 unsafe fn test_mm_maskz_reduce_sh() {
23603 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23604 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23605 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
23606 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23607 assert_eq_m128h(r, e);
23608 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
23609 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23610 assert_eq_m128h(r, e);
23611 }
23612
23613 #[simd_test(enable = "avx512fp16")]
23614 unsafe fn test_mm_reduce_round_sh() {
23615 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23616 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23617 let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23618 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23619 assert_eq_m128h(r, e);
23620 }
23621
23622 #[simd_test(enable = "avx512fp16")]
23623 unsafe fn test_mm_mask_reduce_round_sh() {
23624 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23625 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23626 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23627 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23628 src, 0, a, b,
23629 );
23630 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23631 assert_eq_m128h(r, e);
23632 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23633 src, 1, a, b,
23634 );
23635 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23636 assert_eq_m128h(r, e);
23637 }
23638
23639 #[simd_test(enable = "avx512fp16")]
23640 unsafe fn test_mm_maskz_reduce_round_sh() {
23641 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23642 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23643 let r =
23644 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
23645 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23646 assert_eq_m128h(r, e);
23647 let r =
23648 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
23649 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23650 assert_eq_m128h(r, e);
23651 }
23652
23653 #[simd_test(enable = "avx512fp16,avx512vl")]
23654 unsafe fn test_mm_reduce_add_ph() {
23655 let a = _mm_set1_ph(2.0);
23656 let r = _mm_reduce_add_ph(a);
23657 assert_eq!(r, 16.0);
23658 }
23659
23660 #[simd_test(enable = "avx512fp16,avx512vl")]
23661 unsafe fn test_mm256_reduce_add_ph() {
23662 let a = _mm256_set1_ph(2.0);
23663 let r = _mm256_reduce_add_ph(a);
23664 assert_eq!(r, 32.0);
23665 }
23666
23667 #[simd_test(enable = "avx512fp16")]
23668 unsafe fn test_mm512_reduce_add_ph() {
23669 let a = _mm512_set1_ph(2.0);
23670 let r = _mm512_reduce_add_ph(a);
23671 assert_eq!(r, 64.0);
23672 }
23673
23674 #[simd_test(enable = "avx512fp16,avx512vl")]
23675 unsafe fn test_mm_reduce_mul_ph() {
23676 let a = _mm_set1_ph(2.0);
23677 let r = _mm_reduce_mul_ph(a);
23678 assert_eq!(r, 256.0);
23679 }
23680
23681 #[simd_test(enable = "avx512fp16,avx512vl")]
23682 unsafe fn test_mm256_reduce_mul_ph() {
23683 let a = _mm256_set1_ph(2.0);
23684 let r = _mm256_reduce_mul_ph(a);
23685 assert_eq!(r, 65536.0);
23686 }
23687
23688 #[simd_test(enable = "avx512fp16")]
23689 unsafe fn test_mm512_reduce_mul_ph() {
23690 let a = _mm512_set1_ph(2.0);
23691 let r = _mm512_reduce_mul_ph(a);
23692 assert_eq!(r, 16777216.0);
23693 }
23694
23695 #[simd_test(enable = "avx512fp16,avx512vl")]
23696 unsafe fn test_mm_reduce_max_ph() {
23697 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23698 let r = _mm_reduce_max_ph(a);
23699 assert_eq!(r, 8.0);
23700 }
23701
23702 #[simd_test(enable = "avx512fp16,avx512vl")]
23703 unsafe fn test_mm256_reduce_max_ph() {
23704 let a = _mm256_set_ph(
23705 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23706 );
23707 let r = _mm256_reduce_max_ph(a);
23708 assert_eq!(r, 16.0);
23709 }
23710
23711 #[simd_test(enable = "avx512fp16")]
23712 unsafe fn test_mm512_reduce_max_ph() {
23713 let a = _mm512_set_ph(
23714 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23715 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23716 31.0, 32.0,
23717 );
23718 let r = _mm512_reduce_max_ph(a);
23719 assert_eq!(r, 32.0);
23720 }
23721
23722 #[simd_test(enable = "avx512fp16,avx512vl")]
23723 unsafe fn test_mm_reduce_min_ph() {
23724 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23725 let r = _mm_reduce_min_ph(a);
23726 assert_eq!(r, 1.0);
23727 }
23728
23729 #[simd_test(enable = "avx512fp16,avx512vl")]
23730 unsafe fn test_mm256_reduce_min_ph() {
23731 let a = _mm256_set_ph(
23732 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23733 );
23734 let r = _mm256_reduce_min_ph(a);
23735 assert_eq!(r, 1.0);
23736 }
23737
23738 #[simd_test(enable = "avx512fp16")]
23739 unsafe fn test_mm512_reduce_min_ph() {
23740 let a = _mm512_set_ph(
23741 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23742 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23743 31.0, 32.0,
23744 );
23745 let r = _mm512_reduce_min_ph(a);
23746 assert_eq!(r, 1.0);
23747 }
23748
23749 #[simd_test(enable = "avx512fp16,avx512vl")]
23750 unsafe fn test_mm_fpclass_ph_mask() {
23751 let a = _mm_set_ph(
23752 1.,
23753 f16::INFINITY,
23754 f16::NEG_INFINITY,
23755 0.0,
23756 -0.0,
23757 -2.0,
23758 f16::NAN,
23759 5.9e-8, // Denormal
23760 );
23761 let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
23762 assert_eq!(r, 0b01100000);
23763 }
23764
23765 #[simd_test(enable = "avx512fp16,avx512vl")]
23766 unsafe fn test_mm_mask_fpclass_ph_mask() {
23767 let a = _mm_set_ph(
23768 1.,
23769 f16::INFINITY,
23770 f16::NEG_INFINITY,
23771 0.0,
23772 -0.0,
23773 -2.0,
23774 f16::NAN,
23775 5.9e-8, // Denormal
23776 );
23777 let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
23778 assert_eq!(r, 0b01000000);
23779 }
23780
23781 #[simd_test(enable = "avx512fp16,avx512vl")]
23782 unsafe fn test_mm256_fpclass_ph_mask() {
23783 let a = _mm256_set_ph(
23784 1.,
23785 f16::INFINITY,
23786 f16::NEG_INFINITY,
23787 0.0,
23788 -0.0,
23789 -2.0,
23790 f16::NAN,
23791 5.9e-8, // Denormal
23792 1.,
23793 f16::INFINITY,
23794 f16::NEG_INFINITY,
23795 0.0,
23796 -0.0,
23797 -2.0,
23798 f16::NAN,
23799 5.9e-8, // Denormal
23800 );
23801 let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
23802 assert_eq!(r, 0b0110000001100000);
23803 }
23804
23805 #[simd_test(enable = "avx512fp16,avx512vl")]
23806 unsafe fn test_mm256_mask_fpclass_ph_mask() {
23807 let a = _mm256_set_ph(
23808 1.,
23809 f16::INFINITY,
23810 f16::NEG_INFINITY,
23811 0.0,
23812 -0.0,
23813 -2.0,
23814 f16::NAN,
23815 5.9e-8, // Denormal
23816 1.,
23817 f16::INFINITY,
23818 f16::NEG_INFINITY,
23819 0.0,
23820 -0.0,
23821 -2.0,
23822 f16::NAN,
23823 5.9e-8, // Denormal
23824 );
23825 let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
23826 assert_eq!(r, 0b0100000001000000);
23827 }
23828
23829 #[simd_test(enable = "avx512fp16")]
23830 unsafe fn test_mm512_fpclass_ph_mask() {
23831 let a = _mm512_set_ph(
23832 1.,
23833 f16::INFINITY,
23834 f16::NEG_INFINITY,
23835 0.0,
23836 -0.0,
23837 -2.0,
23838 f16::NAN,
23839 5.9e-8, // Denormal
23840 1.,
23841 f16::INFINITY,
23842 f16::NEG_INFINITY,
23843 0.0,
23844 -0.0,
23845 -2.0,
23846 f16::NAN,
23847 5.9e-8, // Denormal
23848 1.,
23849 f16::INFINITY,
23850 f16::NEG_INFINITY,
23851 0.0,
23852 -0.0,
23853 -2.0,
23854 f16::NAN,
23855 5.9e-8, // Denormal
23856 1.,
23857 f16::INFINITY,
23858 f16::NEG_INFINITY,
23859 0.0,
23860 -0.0,
23861 -2.0,
23862 f16::NAN,
23863 5.9e-8, // Denormal
23864 );
23865 let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
23866 assert_eq!(r, 0b01100000011000000110000001100000);
23867 }
23868
23869 #[simd_test(enable = "avx512fp16")]
23870 unsafe fn test_mm512_mask_fpclass_ph_mask() {
23871 let a = _mm512_set_ph(
23872 1.,
23873 f16::INFINITY,
23874 f16::NEG_INFINITY,
23875 0.0,
23876 -0.0,
23877 -2.0,
23878 f16::NAN,
23879 5.9e-8, // Denormal
23880 1.,
23881 f16::INFINITY,
23882 f16::NEG_INFINITY,
23883 0.0,
23884 -0.0,
23885 -2.0,
23886 f16::NAN,
23887 5.9e-8, // Denormal
23888 1.,
23889 f16::INFINITY,
23890 f16::NEG_INFINITY,
23891 0.0,
23892 -0.0,
23893 -2.0,
23894 f16::NAN,
23895 5.9e-8, // Denormal
23896 1.,
23897 f16::INFINITY,
23898 f16::NEG_INFINITY,
23899 0.0,
23900 -0.0,
23901 -2.0,
23902 f16::NAN,
23903 5.9e-8, // Denormal
23904 );
23905 let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
23906 assert_eq!(r, 0b01000000010000000100000001000000);
23907 }
23908
23909 #[simd_test(enable = "avx512fp16")]
23910 unsafe fn test_mm_fpclass_sh_mask() {
23911 let a = _mm_set_sh(f16::INFINITY);
23912 let r = _mm_fpclass_sh_mask::<0x18>(a);
23913 assert_eq!(r, 1);
23914 }
23915
23916 #[simd_test(enable = "avx512fp16")]
23917 unsafe fn test_mm_mask_fpclass_sh_mask() {
23918 let a = _mm_set_sh(f16::INFINITY);
23919 let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
23920 assert_eq!(r, 0);
23921 let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
23922 assert_eq!(r, 1);
23923 }
23924
23925 #[simd_test(enable = "avx512fp16,avx512vl")]
23926 unsafe fn test_mm_mask_blend_ph() {
23927 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23928 let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
23929 let r = _mm_mask_blend_ph(0b01010101, a, b);
23930 let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
23931 assert_eq_m128h(r, e);
23932 }
23933
23934 #[simd_test(enable = "avx512fp16,avx512vl")]
23935 unsafe fn test_mm256_mask_blend_ph() {
23936 let a = _mm256_set_ph(
23937 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23938 );
23939 let b = _mm256_set_ph(
23940 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
23941 -14.0, -15.0, -16.0,
23942 );
23943 let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
23944 let e = _mm256_set_ph(
23945 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
23946 -16.0,
23947 );
23948 assert_eq_m256h(r, e);
23949 }
23950
23951 #[simd_test(enable = "avx512fp16")]
23952 unsafe fn test_mm512_mask_blend_ph() {
23953 let a = _mm512_set_ph(
23954 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23955 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23956 31.0, 32.0,
23957 );
23958 let b = _mm512_set_ph(
23959 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
23960 -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
23961 -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
23962 );
23963 let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
23964 let e = _mm512_set_ph(
23965 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
23966 -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
23967 29.0, -30.0, 31.0, -32.0,
23968 );
23969 assert_eq_m512h(r, e);
23970 }
23971
23972 #[simd_test(enable = "avx512fp16,avx512vl")]
23973 unsafe fn test_mm_permutex2var_ph() {
23974 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23975 let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
23976 let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
23977 let r = _mm_permutex2var_ph(a, idx, b);
23978 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
23979 assert_eq_m128h(r, e);
23980 }
23981
23982 #[simd_test(enable = "avx512fp16,avx512vl")]
23983 unsafe fn test_mm256_permutex2var_ph() {
23984 let a = _mm256_setr_ph(
23985 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23986 );
23987 let b = _mm256_setr_ph(
23988 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23989 31.0, 32.0,
23990 );
23991 let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
23992 let r = _mm256_permutex2var_ph(a, idx, b);
23993 let e = _mm256_setr_ph(
23994 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
23995 31.0,
23996 );
23997 assert_eq_m256h(r, e);
23998 }
23999
24000 #[simd_test(enable = "avx512fp16")]
24001 unsafe fn test_mm512_permutex2var_ph() {
24002 let a = _mm512_setr_ph(
24003 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24004 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24005 31.0, 32.0,
24006 );
24007 let b = _mm512_setr_ph(
24008 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
24009 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
24010 61.0, 62.0, 63.0, 64.0,
24011 );
24012 let idx = _mm512_set_epi16(
24013 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
24014 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
24015 );
24016 let r = _mm512_permutex2var_ph(a, idx, b);
24017 let e = _mm512_setr_ph(
24018 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24019 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
24020 59.0, 61.0, 63.0,
24021 );
24022 assert_eq_m512h(r, e);
24023 }
24024
24025 #[simd_test(enable = "avx512fp16,avx512vl")]
24026 unsafe fn test_mm_permutexvar_ph() {
24027 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24028 let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
24029 let r = _mm_permutexvar_ph(idx, a);
24030 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
24031 assert_eq_m128h(r, e);
24032 }
24033
24034 #[simd_test(enable = "avx512fp16,avx512vl")]
24035 unsafe fn test_mm256_permutexvar_ph() {
24036 let a = _mm256_set_ph(
24037 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24038 );
24039 let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
24040 let r = _mm256_permutexvar_ph(idx, a);
24041 let e = _mm256_setr_ph(
24042 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
24043 );
24044 assert_eq_m256h(r, e);
24045 }
24046
24047 #[simd_test(enable = "avx512fp16")]
24048 unsafe fn test_mm512_permutexvar_ph() {
24049 let a = _mm512_set_ph(
24050 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24051 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24052 31.0, 32.0,
24053 );
24054 let idx = _mm512_set_epi16(
24055 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
24056 17, 19, 21, 23, 25, 27, 29, 31,
24057 );
24058 let r = _mm512_permutexvar_ph(idx, a);
24059 let e = _mm512_setr_ph(
24060 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24061 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
24062 30.0, 32.0,
24063 );
24064 assert_eq_m512h(r, e);
24065 }
24066
24067 #[simd_test(enable = "avx512fp16,avx512vl")]
24068 unsafe fn test_mm_cvtepi16_ph() {
24069 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24070 let r = _mm_cvtepi16_ph(a);
24071 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24072 assert_eq_m128h(r, e);
24073 }
24074
24075 #[simd_test(enable = "avx512fp16,avx512vl")]
24076 unsafe fn test_mm_mask_cvtepi16_ph() {
24077 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24078 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24079 let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
24080 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24081 assert_eq_m128h(r, e);
24082 }
24083
24084 #[simd_test(enable = "avx512fp16,avx512vl")]
24085 unsafe fn test_mm_maskz_cvtepi16_ph() {
24086 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24087 let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
24088 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24089 assert_eq_m128h(r, e);
24090 }
24091
24092 #[simd_test(enable = "avx512fp16,avx512vl")]
24093 unsafe fn test_mm256_cvtepi16_ph() {
24094 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24095 let r = _mm256_cvtepi16_ph(a);
24096 let e = _mm256_set_ph(
24097 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24098 );
24099 assert_eq_m256h(r, e);
24100 }
24101
24102 #[simd_test(enable = "avx512fp16,avx512vl")]
24103 unsafe fn test_mm256_mask_cvtepi16_ph() {
24104 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24105 let src = _mm256_set_ph(
24106 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24107 );
24108 let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a);
24109 let e = _mm256_set_ph(
24110 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24111 );
24112 assert_eq_m256h(r, e);
24113 }
24114
24115 #[simd_test(enable = "avx512fp16,avx512vl")]
24116 unsafe fn test_mm256_maskz_cvtepi16_ph() {
24117 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24118 let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
24119 let e = _mm256_set_ph(
24120 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24121 );
24122 assert_eq_m256h(r, e);
24123 }
24124
24125 #[simd_test(enable = "avx512fp16")]
24126 unsafe fn test_mm512_cvtepi16_ph() {
24127 let a = _mm512_set_epi16(
24128 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24129 25, 26, 27, 28, 29, 30, 31, 32,
24130 );
24131 let r = _mm512_cvtepi16_ph(a);
24132 let e = _mm512_set_ph(
24133 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24134 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24135 31.0, 32.0,
24136 );
24137 assert_eq_m512h(r, e);
24138 }
24139
24140 #[simd_test(enable = "avx512fp16")]
24141 unsafe fn test_mm512_mask_cvtepi16_ph() {
24142 let a = _mm512_set_epi16(
24143 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24144 25, 26, 27, 28, 29, 30, 31, 32,
24145 );
24146 let src = _mm512_set_ph(
24147 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24148 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24149 );
24150 let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a);
24151 let e = _mm512_set_ph(
24152 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24153 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24154 );
24155 assert_eq_m512h(r, e);
24156 }
24157
24158 #[simd_test(enable = "avx512fp16")]
24159 unsafe fn test_mm512_maskz_cvtepi16_ph() {
24160 let a = _mm512_set_epi16(
24161 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24162 25, 26, 27, 28, 29, 30, 31, 32,
24163 );
24164 let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a);
24165 let e = _mm512_set_ph(
24166 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24167 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24168 );
24169 assert_eq_m512h(r, e);
24170 }
24171
24172 #[simd_test(enable = "avx512fp16")]
24173 unsafe fn test_mm512_cvt_roundepi16_ph() {
24174 let a = _mm512_set_epi16(
24175 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24176 25, 26, 27, 28, 29, 30, 31, 32,
24177 );
24178 let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24179 let e = _mm512_set_ph(
24180 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24181 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24182 31.0, 32.0,
24183 );
24184 assert_eq_m512h(r, e);
24185 }
24186
24187 #[simd_test(enable = "avx512fp16")]
24188 unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
24189 let a = _mm512_set_epi16(
24190 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24191 25, 26, 27, 28, 29, 30, 31, 32,
24192 );
24193 let src = _mm512_set_ph(
24194 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24195 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24196 );
24197 let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24198 src,
24199 0b01010101010101010101010101010101,
24200 a,
24201 );
24202 let e = _mm512_set_ph(
24203 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24204 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24205 );
24206 assert_eq_m512h(r, e);
24207 }
24208
24209 #[simd_test(enable = "avx512fp16")]
24210 unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
24211 let a = _mm512_set_epi16(
24212 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24213 25, 26, 27, 28, 29, 30, 31, 32,
24214 );
24215 let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24216 0b01010101010101010101010101010101,
24217 a,
24218 );
24219 let e = _mm512_set_ph(
24220 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24221 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24222 );
24223 assert_eq_m512h(r, e);
24224 }
24225
24226 #[simd_test(enable = "avx512fp16,avx512vl")]
24227 unsafe fn test_mm_cvtepu16_ph() {
24228 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24229 let r = _mm_cvtepu16_ph(a);
24230 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24231 assert_eq_m128h(r, e);
24232 }
24233
24234 #[simd_test(enable = "avx512fp16,avx512vl")]
24235 unsafe fn test_mm_mask_cvtepu16_ph() {
24236 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24237 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24238 let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
24239 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24240 assert_eq_m128h(r, e);
24241 }
24242
24243 #[simd_test(enable = "avx512fp16,avx512vl")]
24244 unsafe fn test_mm_maskz_cvtepu16_ph() {
24245 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24246 let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
24247 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24248 assert_eq_m128h(r, e);
24249 }
24250
24251 #[simd_test(enable = "avx512fp16,avx512vl")]
24252 unsafe fn test_mm256_cvtepu16_ph() {
24253 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24254 let r = _mm256_cvtepu16_ph(a);
24255 let e = _mm256_set_ph(
24256 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24257 );
24258 assert_eq_m256h(r, e);
24259 }
24260
24261 #[simd_test(enable = "avx512fp16,avx512vl")]
24262 unsafe fn test_mm256_mask_cvtepu16_ph() {
24263 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24264 let src = _mm256_set_ph(
24265 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24266 );
24267 let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a);
24268 let e = _mm256_set_ph(
24269 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24270 );
24271 assert_eq_m256h(r, e);
24272 }
24273
24274 #[simd_test(enable = "avx512fp16,avx512vl")]
24275 unsafe fn test_mm256_maskz_cvtepu16_ph() {
24276 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24277 let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
24278 let e = _mm256_set_ph(
24279 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24280 );
24281 assert_eq_m256h(r, e);
24282 }
24283
24284 #[simd_test(enable = "avx512fp16")]
24285 unsafe fn test_mm512_cvtepu16_ph() {
24286 let a = _mm512_set_epi16(
24287 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24288 25, 26, 27, 28, 29, 30, 31, 32,
24289 );
24290 let r = _mm512_cvtepu16_ph(a);
24291 let e = _mm512_set_ph(
24292 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24293 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24294 31.0, 32.0,
24295 );
24296 assert_eq_m512h(r, e);
24297 }
24298
24299 #[simd_test(enable = "avx512fp16")]
24300 unsafe fn test_mm512_mask_cvtepu16_ph() {
24301 let a = _mm512_set_epi16(
24302 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24303 25, 26, 27, 28, 29, 30, 31, 32,
24304 );
24305 let src = _mm512_set_ph(
24306 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24307 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24308 );
24309 let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a);
24310 let e = _mm512_set_ph(
24311 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24312 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24313 );
24314 assert_eq_m512h(r, e);
24315 }
24316
24317 #[simd_test(enable = "avx512fp16")]
24318 unsafe fn test_mm512_maskz_cvtepu16_ph() {
24319 let a = _mm512_set_epi16(
24320 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24321 25, 26, 27, 28, 29, 30, 31, 32,
24322 );
24323 let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a);
24324 let e = _mm512_set_ph(
24325 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24326 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24327 );
24328 assert_eq_m512h(r, e);
24329 }
24330
24331 #[simd_test(enable = "avx512fp16")]
24332 unsafe fn test_mm512_cvt_roundepu16_ph() {
24333 let a = _mm512_set_epi16(
24334 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24335 25, 26, 27, 28, 29, 30, 31, 32,
24336 );
24337 let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24338 let e = _mm512_set_ph(
24339 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24340 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24341 31.0, 32.0,
24342 );
24343 assert_eq_m512h(r, e);
24344 }
24345
24346 #[simd_test(enable = "avx512fp16")]
24347 unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
24348 let a = _mm512_set_epi16(
24349 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24350 25, 26, 27, 28, 29, 30, 31, 32,
24351 );
24352 let src = _mm512_set_ph(
24353 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24354 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24355 );
24356 let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24357 src,
24358 0b01010101010101010101010101010101,
24359 a,
24360 );
24361 let e = _mm512_set_ph(
24362 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24363 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24364 );
24365 assert_eq_m512h(r, e);
24366 }
24367
24368 #[simd_test(enable = "avx512fp16")]
24369 unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
24370 let a = _mm512_set_epi16(
24371 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24372 25, 26, 27, 28, 29, 30, 31, 32,
24373 );
24374 let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24375 0b01010101010101010101010101010101,
24376 a,
24377 );
24378 let e = _mm512_set_ph(
24379 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24380 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24381 );
24382 assert_eq_m512h(r, e);
24383 }
24384
24385 #[simd_test(enable = "avx512fp16,avx512vl")]
24386 unsafe fn test_mm_cvtepi32_ph() {
24387 let a = _mm_set_epi32(1, 2, 3, 4);
24388 let r = _mm_cvtepi32_ph(a);
24389 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24390 assert_eq_m128h(r, e);
24391 }
24392
24393 #[simd_test(enable = "avx512fp16,avx512vl")]
24394 unsafe fn test_mm_mask_cvtepi32_ph() {
24395 let a = _mm_set_epi32(1, 2, 3, 4);
24396 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24397 let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
24398 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24399 assert_eq_m128h(r, e);
24400 }
24401
24402 #[simd_test(enable = "avx512fp16,avx512vl")]
24403 unsafe fn test_mm_maskz_cvtepi32_ph() {
24404 let a = _mm_set_epi32(1, 2, 3, 4);
24405 let r = _mm_maskz_cvtepi32_ph(0b0101, a);
24406 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24407 assert_eq_m128h(r, e);
24408 }
24409
24410 #[simd_test(enable = "avx512fp16,avx512vl")]
24411 unsafe fn test_mm256_cvtepi32_ph() {
24412 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24413 let r = _mm256_cvtepi32_ph(a);
24414 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24415 assert_eq_m128h(r, e);
24416 }
24417
24418 #[simd_test(enable = "avx512fp16,avx512vl")]
24419 unsafe fn test_mm256_mask_cvtepi32_ph() {
24420 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24421 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24422 let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
24423 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24424 assert_eq_m128h(r, e);
24425 }
24426
24427 #[simd_test(enable = "avx512fp16,avx512vl")]
24428 unsafe fn test_mm256_maskz_cvtepi32_ph() {
24429 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24430 let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
24431 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24432 assert_eq_m128h(r, e);
24433 }
24434
24435 #[simd_test(enable = "avx512fp16")]
24436 unsafe fn test_mm512_cvtepi32_ph() {
24437 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24438 let r = _mm512_cvtepi32_ph(a);
24439 let e = _mm256_set_ph(
24440 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24441 );
24442 assert_eq_m256h(r, e);
24443 }
24444
24445 #[simd_test(enable = "avx512fp16")]
24446 unsafe fn test_mm512_mask_cvtepi32_ph() {
24447 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24448 let src = _mm256_set_ph(
24449 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24450 );
24451 let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a);
24452 let e = _mm256_set_ph(
24453 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24454 );
24455 assert_eq_m256h(r, e);
24456 }
24457
24458 #[simd_test(enable = "avx512fp16")]
24459 unsafe fn test_mm512_maskz_cvtepi32_ph() {
24460 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24461 let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
24462 let e = _mm256_set_ph(
24463 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24464 );
24465 assert_eq_m256h(r, e);
24466 }
24467
24468 #[simd_test(enable = "avx512fp16")]
24469 unsafe fn test_mm512_cvt_roundepi32_ph() {
24470 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24471 let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24472 let e = _mm256_set_ph(
24473 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24474 );
24475 assert_eq_m256h(r, e);
24476 }
24477
24478 #[simd_test(enable = "avx512fp16")]
24479 unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
24480 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24481 let src = _mm256_set_ph(
24482 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24483 );
24484 let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24485 src,
24486 0b0101010101010101,
24487 a,
24488 );
24489 let e = _mm256_set_ph(
24490 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24491 );
24492 assert_eq_m256h(r, e);
24493 }
24494
24495 #[simd_test(enable = "avx512fp16")]
24496 unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
24497 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24498 let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24499 0b0101010101010101,
24500 a,
24501 );
24502 let e = _mm256_set_ph(
24503 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24504 );
24505 assert_eq_m256h(r, e);
24506 }
24507
24508 #[simd_test(enable = "avx512fp16")]
24509 unsafe fn test_mm_cvti32_sh() {
24510 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24511 let r = _mm_cvti32_sh(a, 10);
24512 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24513 assert_eq_m128h(r, e);
24514 }
24515
24516 #[simd_test(enable = "avx512fp16")]
24517 unsafe fn test_mm_cvt_roundi32_sh() {
24518 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24519 let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24520 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24521 assert_eq_m128h(r, e);
24522 }
24523
24524 #[simd_test(enable = "avx512fp16,avx512vl")]
24525 unsafe fn test_mm_cvtepu32_ph() {
24526 let a = _mm_set_epi32(1, 2, 3, 4);
24527 let r = _mm_cvtepu32_ph(a);
24528 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24529 assert_eq_m128h(r, e);
24530 }
24531
24532 #[simd_test(enable = "avx512fp16,avx512vl")]
24533 unsafe fn test_mm_mask_cvtepu32_ph() {
24534 let a = _mm_set_epi32(1, 2, 3, 4);
24535 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24536 let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
24537 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24538 assert_eq_m128h(r, e);
24539 }
24540
24541 #[simd_test(enable = "avx512fp16,avx512vl")]
24542 unsafe fn test_mm_maskz_cvtepu32_ph() {
24543 let a = _mm_set_epi32(1, 2, 3, 4);
24544 let r = _mm_maskz_cvtepu32_ph(0b0101, a);
24545 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24546 assert_eq_m128h(r, e);
24547 }
24548
24549 #[simd_test(enable = "avx512fp16,avx512vl")]
24550 unsafe fn test_mm256_cvtepu32_ph() {
24551 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24552 let r = _mm256_cvtepu32_ph(a);
24553 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24554 assert_eq_m128h(r, e);
24555 }
24556
24557 #[simd_test(enable = "avx512fp16,avx512vl")]
24558 unsafe fn test_mm256_mask_cvtepu32_ph() {
24559 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24560 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24561 let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
24562 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24563 assert_eq_m128h(r, e);
24564 }
24565
24566 #[simd_test(enable = "avx512fp16,avx512vl")]
24567 unsafe fn test_mm256_maskz_cvtepu32_ph() {
24568 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24569 let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
24570 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24571 assert_eq_m128h(r, e);
24572 }
24573
24574 #[simd_test(enable = "avx512fp16")]
24575 unsafe fn test_mm512_cvtepu32_ph() {
24576 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24577 let r = _mm512_cvtepu32_ph(a);
24578 let e = _mm256_set_ph(
24579 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24580 );
24581 assert_eq_m256h(r, e);
24582 }
24583
24584 #[simd_test(enable = "avx512fp16")]
24585 unsafe fn test_mm512_mask_cvtepu32_ph() {
24586 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24587 let src = _mm256_set_ph(
24588 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24589 );
24590 let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a);
24591 let e = _mm256_set_ph(
24592 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24593 );
24594 assert_eq_m256h(r, e);
24595 }
24596
24597 #[simd_test(enable = "avx512fp16")]
24598 unsafe fn test_mm512_maskz_cvtepu32_ph() {
24599 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24600 let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
24601 let e = _mm256_set_ph(
24602 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24603 );
24604 assert_eq_m256h(r, e);
24605 }
24606
24607 #[simd_test(enable = "avx512fp16")]
24608 unsafe fn test_mm512_cvt_roundepu32_ph() {
24609 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24610 let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24611 let e = _mm256_set_ph(
24612 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24613 );
24614 assert_eq_m256h(r, e);
24615 }
24616
24617 #[simd_test(enable = "avx512fp16")]
24618 unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
24619 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24620 let src = _mm256_set_ph(
24621 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24622 );
24623 let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24624 src,
24625 0b0101010101010101,
24626 a,
24627 );
24628 let e = _mm256_set_ph(
24629 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24630 16.0,
24631 );
24632 assert_eq_m256h(r, e);
24633 }
24634
24635 #[simd_test(enable = "avx512fp16")]
24636 unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
24637 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24638 let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24639 0b0101010101010101,
24640 a,
24641 );
24642 let e = _mm256_set_ph(
24643 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24644 );
24645 assert_eq_m256h(r, e);
24646 }
24647
24648 #[simd_test(enable = "avx512fp16")]
24649 unsafe fn test_mm_cvtu32_sh() {
24650 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24651 let r = _mm_cvtu32_sh(a, 10);
24652 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24653 assert_eq_m128h(r, e);
24654 }
24655
24656 #[simd_test(enable = "avx512fp16")]
24657 unsafe fn test_mm_cvt_roundu32_sh() {
24658 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24659 let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24660 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24661 assert_eq_m128h(r, e);
24662 }
24663
24664 #[simd_test(enable = "avx512fp16,avx512vl")]
24665 unsafe fn test_mm_cvtepi64_ph() {
24666 let a = _mm_set_epi64x(1, 2);
24667 let r = _mm_cvtepi64_ph(a);
24668 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24669 assert_eq_m128h(r, e);
24670 }
24671
24672 #[simd_test(enable = "avx512fp16,avx512vl")]
24673 unsafe fn test_mm_mask_cvtepi64_ph() {
24674 let a = _mm_set_epi64x(1, 2);
24675 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24676 let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
24677 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24678 assert_eq_m128h(r, e);
24679 }
24680
24681 #[simd_test(enable = "avx512fp16,avx512vl")]
24682 unsafe fn test_mm_maskz_cvtepi64_ph() {
24683 let a = _mm_set_epi64x(1, 2);
24684 let r = _mm_maskz_cvtepi64_ph(0b01, a);
24685 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
24686 assert_eq_m128h(r, e);
24687 }
24688
24689 #[simd_test(enable = "avx512fp16,avx512vl")]
24690 unsafe fn test_mm256_cvtepi64_ph() {
24691 let a = _mm256_set_epi64x(1, 2, 3, 4);
24692 let r = _mm256_cvtepi64_ph(a);
24693 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24694 assert_eq_m128h(r, e);
24695 }
24696
24697 #[simd_test(enable = "avx512fp16,avx512vl")]
24698 unsafe fn test_mm256_mask_cvtepi64_ph() {
24699 let a = _mm256_set_epi64x(1, 2, 3, 4);
24700 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24701 let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
24702 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24703 assert_eq_m128h(r, e);
24704 }
24705
24706 #[simd_test(enable = "avx512fp16,avx512vl")]
24707 unsafe fn test_mm256_maskz_cvtepi64_ph() {
24708 let a = _mm256_set_epi64x(1, 2, 3, 4);
24709 let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
24710 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24711 assert_eq_m128h(r, e);
24712 }
24713
24714 #[simd_test(enable = "avx512fp16")]
24715 unsafe fn test_mm512_cvtepi64_ph() {
24716 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24717 let r = _mm512_cvtepi64_ph(a);
24718 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24719 assert_eq_m128h(r, e);
24720 }
24721
24722 #[simd_test(enable = "avx512fp16")]
24723 unsafe fn test_mm512_mask_cvtepi64_ph() {
24724 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24725 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24726 let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
24727 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24728 assert_eq_m128h(r, e);
24729 }
24730
24731 #[simd_test(enable = "avx512fp16")]
24732 unsafe fn test_mm512_maskz_cvtepi64_ph() {
24733 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24734 let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
24735 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24736 assert_eq_m128h(r, e);
24737 }
24738
24739 #[simd_test(enable = "avx512fp16")]
24740 unsafe fn test_mm512_cvt_roundepi64_ph() {
24741 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24742 let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24743 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24744 assert_eq_m128h(r, e);
24745 }
24746
24747 #[simd_test(enable = "avx512fp16")]
24748 unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
24749 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24750 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24751 let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24752 src, 0b01010101, a,
24753 );
24754 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24755 assert_eq_m128h(r, e);
24756 }
24757
24758 #[simd_test(enable = "avx512fp16")]
24759 unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
24760 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24761 let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24762 0b01010101, a,
24763 );
24764 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24765 assert_eq_m128h(r, e);
24766 }
24767
24768 #[simd_test(enable = "avx512fp16,avx512vl")]
24769 unsafe fn test_mm_cvtepu64_ph() {
24770 let a = _mm_set_epi64x(1, 2);
24771 let r = _mm_cvtepu64_ph(a);
24772 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24773 assert_eq_m128h(r, e);
24774 }
24775
24776 #[simd_test(enable = "avx512fp16,avx512vl")]
24777 unsafe fn test_mm_mask_cvtepu64_ph() {
24778 let a = _mm_set_epi64x(1, 2);
24779 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24780 let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
24781 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24782 assert_eq_m128h(r, e);
24783 }
24784
24785 #[simd_test(enable = "avx512fp16,avx512vl")]
24786 unsafe fn test_mm_maskz_cvtepu64_ph() {
24787 let a = _mm_set_epi64x(1, 2);
24788 let r = _mm_maskz_cvtepu64_ph(0b01, a);
24789 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
24790 assert_eq_m128h(r, e);
24791 }
24792
24793 #[simd_test(enable = "avx512fp16,avx512vl")]
24794 unsafe fn test_mm256_cvtepu64_ph() {
24795 let a = _mm256_set_epi64x(1, 2, 3, 4);
24796 let r = _mm256_cvtepu64_ph(a);
24797 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24798 assert_eq_m128h(r, e);
24799 }
24800
24801 #[simd_test(enable = "avx512fp16,avx512vl")]
24802 unsafe fn test_mm256_mask_cvtepu64_ph() {
24803 let a = _mm256_set_epi64x(1, 2, 3, 4);
24804 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24805 let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
24806 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24807 assert_eq_m128h(r, e);
24808 }
24809
24810 #[simd_test(enable = "avx512fp16,avx512vl")]
24811 unsafe fn test_mm256_maskz_cvtepu64_ph() {
24812 let a = _mm256_set_epi64x(1, 2, 3, 4);
24813 let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
24814 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24815 assert_eq_m128h(r, e);
24816 }
24817
24818 #[simd_test(enable = "avx512fp16")]
24819 unsafe fn test_mm512_cvtepu64_ph() {
24820 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24821 let r = _mm512_cvtepu64_ph(a);
24822 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24823 assert_eq_m128h(r, e);
24824 }
24825
24826 #[simd_test(enable = "avx512fp16")]
24827 unsafe fn test_mm512_mask_cvtepu64_ph() {
24828 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24829 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24830 let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
24831 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24832 assert_eq_m128h(r, e);
24833 }
24834
24835 #[simd_test(enable = "avx512fp16")]
24836 unsafe fn test_mm512_maskz_cvtepu64_ph() {
24837 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24838 let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
24839 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24840 assert_eq_m128h(r, e);
24841 }
24842
24843 #[simd_test(enable = "avx512fp16")]
24844 unsafe fn test_mm512_cvt_roundepu64_ph() {
24845 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24846 let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24847 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24848 assert_eq_m128h(r, e);
24849 }
24850
24851 #[simd_test(enable = "avx512fp16")]
24852 unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
24853 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24854 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24855 let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24856 src, 0b01010101, a,
24857 );
24858 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24859 assert_eq_m128h(r, e);
24860 }
24861
24862 #[simd_test(enable = "avx512fp16")]
24863 unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
24864 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24865 let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24866 0b01010101, a,
24867 );
24868 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24869 assert_eq_m128h(r, e);
24870 }
24871
24872 #[simd_test(enable = "avx512fp16,avx512vl")]
24873 unsafe fn test_mm_cvtxps_ph() {
24874 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24875 let r = _mm_cvtxps_ph(a);
24876 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24877 assert_eq_m128h(r, e);
24878 }
24879
24880 #[simd_test(enable = "avx512fp16,avx512vl")]
24881 unsafe fn test_mm_mask_cvtxps_ph() {
24882 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24883 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24884 let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
24885 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0);
24886 assert_eq_m128h(r, e);
24887 }
24888
24889 #[simd_test(enable = "avx512fp16,avx512vl")]
24890 unsafe fn test_mm_maskz_cvtxps_ph() {
24891 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24892 let r = _mm_maskz_cvtxps_ph(0b0101, a);
24893 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24894 assert_eq_m128h(r, e);
24895 }
24896
24897 #[simd_test(enable = "avx512fp16,avx512vl")]
24898 unsafe fn test_mm256_cvtxps_ph() {
24899 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24900 let r = _mm256_cvtxps_ph(a);
24901 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24902 assert_eq_m128h(r, e);
24903 }
24904
24905 #[simd_test(enable = "avx512fp16,avx512vl")]
24906 unsafe fn test_mm256_mask_cvtxps_ph() {
24907 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24908 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24909 let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
24910 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24911 assert_eq_m128h(r, e);
24912 }
24913
24914 #[simd_test(enable = "avx512fp16,avx512vl")]
24915 unsafe fn test_mm256_maskz_cvtxps_ph() {
24916 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24917 let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
24918 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24919 assert_eq_m128h(r, e);
24920 }
24921
24922 #[simd_test(enable = "avx512fp16")]
24923 unsafe fn test_mm512_cvtxps_ph() {
24924 let a = _mm512_set_ps(
24925 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24926 );
24927 let r = _mm512_cvtxps_ph(a);
24928 let e = _mm256_set_ph(
24929 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24930 );
24931 assert_eq_m256h(r, e);
24932 }
24933
24934 #[simd_test(enable = "avx512fp16")]
24935 unsafe fn test_mm512_mask_cvtxps_ph() {
24936 let a = _mm512_set_ps(
24937 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24938 );
24939 let src = _mm256_set_ph(
24940 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24941 );
24942 let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a);
24943 let e = _mm256_set_ph(
24944 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24945 );
24946 assert_eq_m256h(r, e);
24947 }
24948
24949 #[simd_test(enable = "avx512fp16")]
24950 unsafe fn test_mm512_maskz_cvtxps_ph() {
24951 let a = _mm512_set_ps(
24952 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24953 );
24954 let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a);
24955 let e = _mm256_set_ph(
24956 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24957 );
24958 assert_eq_m256h(r, e);
24959 }
24960
24961 #[simd_test(enable = "avx512fp16")]
24962 unsafe fn test_mm512_cvtx_roundps_ph() {
24963 let a = _mm512_set_ps(
24964 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24965 );
24966 let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24967 let e = _mm256_set_ph(
24968 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24969 );
24970 assert_eq_m256h(r, e);
24971 }
24972
24973 #[simd_test(enable = "avx512fp16")]
24974 unsafe fn test_mm512_mask_cvtx_roundps_ph() {
24975 let a = _mm512_set_ps(
24976 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24977 );
24978 let src = _mm256_set_ph(
24979 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24980 );
24981 let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24982 src,
24983 0b0101010101010101,
24984 a,
24985 );
24986 let e = _mm256_set_ph(
24987 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24988 16.0,
24989 );
24990 assert_eq_m256h(r, e);
24991 }
24992
24993 #[simd_test(enable = "avx512fp16")]
24994 unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
24995 let a = _mm512_set_ps(
24996 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24997 );
24998 let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24999 0b0101010101010101,
25000 a,
25001 );
25002 let e = _mm256_set_ph(
25003 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25004 );
25005 assert_eq_m256h(r, e);
25006 }
25007
25008 #[simd_test(enable = "avx512fp16")]
25009 unsafe fn test_mm_cvtss_sh() {
25010 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25011 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25012 let r = _mm_cvtss_sh(a, b);
25013 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25014 assert_eq_m128h(r, e);
25015 }
25016
25017 #[simd_test(enable = "avx512fp16")]
25018 unsafe fn test_mm_mask_cvtss_sh() {
25019 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25020 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25021 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25022 let r = _mm_mask_cvtss_sh(src, 0, a, b);
25023 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25024 assert_eq_m128h(r, e);
25025 let r = _mm_mask_cvtss_sh(src, 1, a, b);
25026 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25027 assert_eq_m128h(r, e);
25028 }
25029
25030 #[simd_test(enable = "avx512fp16")]
25031 unsafe fn test_mm_maskz_cvtss_sh() {
25032 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25033 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25034 let r = _mm_maskz_cvtss_sh(0, a, b);
25035 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25036 assert_eq_m128h(r, e);
25037 let r = _mm_maskz_cvtss_sh(1, a, b);
25038 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25039 assert_eq_m128h(r, e);
25040 }
25041
25042 #[simd_test(enable = "avx512fp16")]
25043 unsafe fn test_mm_cvt_roundss_sh() {
25044 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25045 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25046 let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25047 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25048 assert_eq_m128h(r, e);
25049 }
25050
25051 #[simd_test(enable = "avx512fp16")]
25052 unsafe fn test_mm_mask_cvt_roundss_sh() {
25053 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25054 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25055 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25056 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25057 src, 0, a, b,
25058 );
25059 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25060 assert_eq_m128h(r, e);
25061 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25062 src, 1, a, b,
25063 );
25064 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25065 assert_eq_m128h(r, e);
25066 }
25067
25068 #[simd_test(enable = "avx512fp16")]
25069 unsafe fn test_mm_maskz_cvt_roundss_sh() {
25070 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25071 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25072 let r =
25073 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25074 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25075 assert_eq_m128h(r, e);
25076 let r =
25077 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25078 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25079 assert_eq_m128h(r, e);
25080 }
25081
25082 #[simd_test(enable = "avx512fp16,avx512vl")]
25083 unsafe fn test_mm_cvtpd_ph() {
25084 let a = _mm_set_pd(1.0, 2.0);
25085 let r = _mm_cvtpd_ph(a);
25086 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
25087 assert_eq_m128h(r, e);
25088 }
25089
25090 #[simd_test(enable = "avx512fp16,avx512vl")]
25091 unsafe fn test_mm_mask_cvtpd_ph() {
25092 let a = _mm_set_pd(1.0, 2.0);
25093 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25094 let r = _mm_mask_cvtpd_ph(src, 0b01, a);
25095 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25096 assert_eq_m128h(r, e);
25097 }
25098
25099 #[simd_test(enable = "avx512fp16,avx512vl")]
25100 unsafe fn test_mm_maskz_cvtpd_ph() {
25101 let a = _mm_set_pd(1.0, 2.0);
25102 let r = _mm_maskz_cvtpd_ph(0b01, a);
25103 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
25104 assert_eq_m128h(r, e);
25105 }
25106
25107 #[simd_test(enable = "avx512fp16,avx512vl")]
25108 unsafe fn test_mm256_cvtpd_ph() {
25109 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25110 let r = _mm256_cvtpd_ph(a);
25111 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25112 assert_eq_m128h(r, e);
25113 }
25114
25115 #[simd_test(enable = "avx512fp16,avx512vl")]
25116 unsafe fn test_mm256_mask_cvtpd_ph() {
25117 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25118 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25119 let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
25120 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25121 assert_eq_m128h(r, e);
25122 }
25123
25124 #[simd_test(enable = "avx512fp16,avx512vl")]
25125 unsafe fn test_mm256_maskz_cvtpd_ph() {
25126 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25127 let r = _mm256_maskz_cvtpd_ph(0b0101, a);
25128 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25129 assert_eq_m128h(r, e);
25130 }
25131
25132 #[simd_test(enable = "avx512fp16")]
25133 unsafe fn test_mm512_cvtpd_ph() {
25134 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25135 let r = _mm512_cvtpd_ph(a);
25136 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25137 assert_eq_m128h(r, e);
25138 }
25139
25140 #[simd_test(enable = "avx512fp16")]
25141 unsafe fn test_mm512_mask_cvtpd_ph() {
25142 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25143 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25144 let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
25145 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25146 assert_eq_m128h(r, e);
25147 }
25148
25149 #[simd_test(enable = "avx512fp16")]
25150 unsafe fn test_mm512_maskz_cvtpd_ph() {
25151 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25152 let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
25153 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25154 assert_eq_m128h(r, e);
25155 }
25156
25157 #[simd_test(enable = "avx512fp16")]
25158 unsafe fn test_mm512_cvt_roundpd_ph() {
25159 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25160 let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25161 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25162 assert_eq_m128h(r, e);
25163 }
25164
25165 #[simd_test(enable = "avx512fp16")]
25166 unsafe fn test_mm512_mask_cvt_roundpd_ph() {
25167 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25168 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25169 let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25170 src, 0b01010101, a,
25171 );
25172 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25173 assert_eq_m128h(r, e);
25174 }
25175
25176 #[simd_test(enable = "avx512fp16")]
25177 unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
25178 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25179 let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25180 0b01010101, a,
25181 );
25182 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25183 assert_eq_m128h(r, e);
25184 }
25185
25186 #[simd_test(enable = "avx512fp16")]
25187 unsafe fn test_mm_cvtsd_sh() {
25188 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25189 let b = _mm_setr_pd(1.0, 2.0);
25190 let r = _mm_cvtsd_sh(a, b);
25191 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25192 assert_eq_m128h(r, e);
25193 }
25194
25195 #[simd_test(enable = "avx512fp16")]
25196 unsafe fn test_mm_mask_cvtsd_sh() {
25197 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25198 let b = _mm_setr_pd(1.0, 2.0);
25199 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25200 let r = _mm_mask_cvtsd_sh(src, 0, a, b);
25201 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25202 assert_eq_m128h(r, e);
25203 let r = _mm_mask_cvtsd_sh(src, 1, a, b);
25204 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25205 assert_eq_m128h(r, e);
25206 }
25207
25208 #[simd_test(enable = "avx512fp16")]
25209 unsafe fn test_mm_maskz_cvtsd_sh() {
25210 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25211 let b = _mm_setr_pd(1.0, 2.0);
25212 let r = _mm_maskz_cvtsd_sh(0, a, b);
25213 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25214 assert_eq_m128h(r, e);
25215 let r = _mm_maskz_cvtsd_sh(1, a, b);
25216 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25217 assert_eq_m128h(r, e);
25218 }
25219
25220 #[simd_test(enable = "avx512fp16")]
25221 unsafe fn test_mm_cvt_roundsd_sh() {
25222 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25223 let b = _mm_setr_pd(1.0, 2.0);
25224 let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25225 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25226 assert_eq_m128h(r, e);
25227 }
25228
25229 #[simd_test(enable = "avx512fp16")]
25230 unsafe fn test_mm_mask_cvt_roundsd_sh() {
25231 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25232 let b = _mm_setr_pd(1.0, 2.0);
25233 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25234 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25235 src, 0, a, b,
25236 );
25237 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25238 assert_eq_m128h(r, e);
25239 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25240 src, 1, a, b,
25241 );
25242 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25243 assert_eq_m128h(r, e);
25244 }
25245
25246 #[simd_test(enable = "avx512fp16")]
25247 unsafe fn test_mm_maskz_cvt_roundsd_sh() {
25248 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25249 let b = _mm_setr_pd(1.0, 2.0);
25250 let r =
25251 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25252 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25253 assert_eq_m128h(r, e);
25254 let r =
25255 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25256 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25257 assert_eq_m128h(r, e);
25258 }
25259
25260 #[simd_test(enable = "avx512fp16,avx512vl")]
25261 unsafe fn test_mm_cvtph_epi16() {
25262 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25263 let r = _mm_cvttph_epi16(a);
25264 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25265 assert_eq_m128i(r, e);
25266 }
25267
25268 #[simd_test(enable = "avx512fp16,avx512vl")]
25269 unsafe fn test_mm_mask_cvtph_epi16() {
25270 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25271 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25272 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25273 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25274 assert_eq_m128i(r, e);
25275 }
25276
25277 #[simd_test(enable = "avx512fp16,avx512vl")]
25278 unsafe fn test_mm_maskz_cvtph_epi16() {
25279 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25280 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25281 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25282 assert_eq_m128i(r, e);
25283 }
25284
25285 #[simd_test(enable = "avx512fp16,avx512vl")]
25286 unsafe fn test_mm256_cvtph_epi16() {
25287 let a = _mm256_set_ph(
25288 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25289 );
25290 let r = _mm256_cvttph_epi16(a);
25291 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25292 assert_eq_m256i(r, e);
25293 }
25294
25295 #[simd_test(enable = "avx512fp16,avx512vl")]
25296 unsafe fn test_mm256_mask_cvtph_epi16() {
25297 let a = _mm256_set_ph(
25298 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25299 );
25300 let src = _mm256_set_epi16(
25301 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25302 );
25303 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25304 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25305 assert_eq_m256i(r, e);
25306 }
25307
25308 #[simd_test(enable = "avx512fp16,avx512vl")]
25309 unsafe fn test_mm256_maskz_cvtph_epi16() {
25310 let a = _mm256_set_ph(
25311 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25312 );
25313 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25314 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25315 assert_eq_m256i(r, e);
25316 }
25317
25318 #[simd_test(enable = "avx512fp16")]
25319 unsafe fn test_mm512_cvtph_epi16() {
25320 let a = _mm512_set_ph(
25321 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25322 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25323 31.0, 32.0,
25324 );
25325 let r = _mm512_cvttph_epi16(a);
25326 let e = _mm512_set_epi16(
25327 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25328 25, 26, 27, 28, 29, 30, 31, 32,
25329 );
25330 assert_eq_m512i(r, e);
25331 }
25332
25333 #[simd_test(enable = "avx512fp16")]
25334 unsafe fn test_mm512_mask_cvtph_epi16() {
25335 let a = _mm512_set_ph(
25336 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25337 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25338 31.0, 32.0,
25339 );
25340 let src = _mm512_set_epi16(
25341 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25342 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25343 );
25344 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25345 let e = _mm512_set_epi16(
25346 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25347 24, 34, 26, 36, 28, 38, 30, 40, 32,
25348 );
25349 assert_eq_m512i(r, e);
25350 }
25351
25352 #[simd_test(enable = "avx512fp16")]
25353 unsafe fn test_mm512_maskz_cvtph_epi16() {
25354 let a = _mm512_set_ph(
25355 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25356 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25357 31.0, 32.0,
25358 );
25359 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25360 let e = _mm512_set_epi16(
25361 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25362 0, 28, 0, 30, 0, 32,
25363 );
25364 assert_eq_m512i(r, e);
25365 }
25366
25367 #[simd_test(enable = "avx512fp16")]
25368 unsafe fn test_mm512_cvt_roundph_epi16() {
25369 let a = _mm512_set_ph(
25370 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25371 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25372 31.0, 32.0,
25373 );
25374 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25375 let e = _mm512_set_epi16(
25376 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25377 25, 26, 27, 28, 29, 30, 31, 32,
25378 );
25379 assert_eq_m512i(r, e);
25380 }
25381
25382 #[simd_test(enable = "avx512fp16")]
25383 unsafe fn test_mm512_mask_cvt_roundph_epi16() {
25384 let a = _mm512_set_ph(
25385 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25386 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25387 31.0, 32.0,
25388 );
25389 let src = _mm512_set_epi16(
25390 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25391 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25392 );
25393 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25394 src,
25395 0b01010101010101010101010101010101,
25396 a,
25397 );
25398 let e = _mm512_set_epi16(
25399 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25400 24, 34, 26, 36, 28, 38, 30, 40, 32,
25401 );
25402 assert_eq_m512i(r, e);
25403 }
25404
25405 #[simd_test(enable = "avx512fp16")]
25406 unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
25407 let a = _mm512_set_ph(
25408 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25409 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25410 31.0, 32.0,
25411 );
25412 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25413 0b01010101010101010101010101010101,
25414 a,
25415 );
25416 let e = _mm512_set_epi16(
25417 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25418 0, 28, 0, 30, 0, 32,
25419 );
25420 assert_eq_m512i(r, e);
25421 }
25422
25423 #[simd_test(enable = "avx512fp16,avx512vl")]
25424 unsafe fn test_mm_cvtph_epu16() {
25425 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25426 let r = _mm_cvttph_epu16(a);
25427 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25428 assert_eq_m128i(r, e);
25429 }
25430
25431 #[simd_test(enable = "avx512fp16,avx512vl")]
25432 unsafe fn test_mm_mask_cvtph_epu16() {
25433 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25434 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25435 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25436 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25437 assert_eq_m128i(r, e);
25438 }
25439
25440 #[simd_test(enable = "avx512fp16,avx512vl")]
25441 unsafe fn test_mm_maskz_cvtph_epu16() {
25442 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25443 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25444 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25445 assert_eq_m128i(r, e);
25446 }
25447
25448 #[simd_test(enable = "avx512fp16,avx512vl")]
25449 unsafe fn test_mm256_cvtph_epu16() {
25450 let a = _mm256_set_ph(
25451 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25452 );
25453 let r = _mm256_cvttph_epu16(a);
25454 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25455 assert_eq_m256i(r, e);
25456 }
25457
25458 #[simd_test(enable = "avx512fp16,avx512vl")]
25459 unsafe fn test_mm256_mask_cvtph_epu16() {
25460 let a = _mm256_set_ph(
25461 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25462 );
25463 let src = _mm256_set_epi16(
25464 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25465 );
25466 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25467 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25468 assert_eq_m256i(r, e);
25469 }
25470
25471 #[simd_test(enable = "avx512fp16,avx512vl")]
25472 unsafe fn test_mm256_maskz_cvtph_epu16() {
25473 let a = _mm256_set_ph(
25474 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25475 );
25476 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25477 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25478 assert_eq_m256i(r, e);
25479 }
25480
25481 #[simd_test(enable = "avx512fp16")]
25482 unsafe fn test_mm512_cvtph_epu16() {
25483 let a = _mm512_set_ph(
25484 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25485 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25486 31.0, 32.0,
25487 );
25488 let r = _mm512_cvttph_epu16(a);
25489 let e = _mm512_set_epi16(
25490 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25491 25, 26, 27, 28, 29, 30, 31, 32,
25492 );
25493 assert_eq_m512i(r, e);
25494 }
25495
25496 #[simd_test(enable = "avx512fp16")]
25497 unsafe fn test_mm512_mask_cvtph_epu16() {
25498 let a = _mm512_set_ph(
25499 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25500 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25501 31.0, 32.0,
25502 );
25503 let src = _mm512_set_epi16(
25504 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25505 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25506 );
25507 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25508 let e = _mm512_set_epi16(
25509 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25510 24, 34, 26, 36, 28, 38, 30, 40, 32,
25511 );
25512 assert_eq_m512i(r, e);
25513 }
25514
25515 #[simd_test(enable = "avx512fp16")]
25516 unsafe fn test_mm512_maskz_cvtph_epu16() {
25517 let a = _mm512_set_ph(
25518 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25519 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25520 31.0, 32.0,
25521 );
25522 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25523 let e = _mm512_set_epi16(
25524 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25525 0, 28, 0, 30, 0, 32,
25526 );
25527 assert_eq_m512i(r, e);
25528 }
25529
25530 #[simd_test(enable = "avx512fp16")]
25531 unsafe fn test_mm512_cvt_roundph_epu16() {
25532 let a = _mm512_set_ph(
25533 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25534 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25535 31.0, 32.0,
25536 );
25537 let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25538 let e = _mm512_set_epi16(
25539 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25540 25, 26, 27, 28, 29, 30, 31, 32,
25541 );
25542 assert_eq_m512i(r, e);
25543 }
25544
25545 #[simd_test(enable = "avx512fp16")]
25546 unsafe fn test_mm512_mask_cvt_roundph_epu16() {
25547 let a = _mm512_set_ph(
25548 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25549 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25550 31.0, 32.0,
25551 );
25552 let src = _mm512_set_epi16(
25553 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25554 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25555 );
25556 let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25557 src,
25558 0b01010101010101010101010101010101,
25559 a,
25560 );
25561 let e = _mm512_set_epi16(
25562 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25563 24, 34, 26, 36, 28, 38, 30, 40, 32,
25564 );
25565 assert_eq_m512i(r, e);
25566 }
25567
25568 #[simd_test(enable = "avx512fp16")]
25569 unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
25570 let a = _mm512_set_ph(
25571 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25572 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25573 31.0, 32.0,
25574 );
25575 let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25576 0b01010101010101010101010101010101,
25577 a,
25578 );
25579 let e = _mm512_set_epi16(
25580 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25581 0, 28, 0, 30, 0, 32,
25582 );
25583 assert_eq_m512i(r, e);
25584 }
25585
25586 #[simd_test(enable = "avx512fp16,avx512vl")]
25587 unsafe fn test_mm_cvttph_epi16() {
25588 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25589 let r = _mm_cvttph_epi16(a);
25590 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25591 assert_eq_m128i(r, e);
25592 }
25593
25594 #[simd_test(enable = "avx512fp16,avx512vl")]
25595 unsafe fn test_mm_mask_cvttph_epi16() {
25596 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25597 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25598 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25599 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25600 assert_eq_m128i(r, e);
25601 }
25602
25603 #[simd_test(enable = "avx512fp16,avx512vl")]
25604 unsafe fn test_mm_maskz_cvttph_epi16() {
25605 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25606 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25607 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25608 assert_eq_m128i(r, e);
25609 }
25610
25611 #[simd_test(enable = "avx512fp16,avx512vl")]
25612 unsafe fn test_mm256_cvttph_epi16() {
25613 let a = _mm256_set_ph(
25614 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25615 );
25616 let r = _mm256_cvttph_epi16(a);
25617 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25618 assert_eq_m256i(r, e);
25619 }
25620
25621 #[simd_test(enable = "avx512fp16,avx512vl")]
25622 unsafe fn test_mm256_mask_cvttph_epi16() {
25623 let a = _mm256_set_ph(
25624 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25625 );
25626 let src = _mm256_set_epi16(
25627 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25628 );
25629 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25630 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25631 assert_eq_m256i(r, e);
25632 }
25633
25634 #[simd_test(enable = "avx512fp16,avx512vl")]
25635 unsafe fn test_mm256_maskz_cvttph_epi16() {
25636 let a = _mm256_set_ph(
25637 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25638 );
25639 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25640 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25641 assert_eq_m256i(r, e);
25642 }
25643
25644 #[simd_test(enable = "avx512fp16")]
25645 unsafe fn test_mm512_cvttph_epi16() {
25646 let a = _mm512_set_ph(
25647 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25648 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25649 31.0, 32.0,
25650 );
25651 let r = _mm512_cvttph_epi16(a);
25652 let e = _mm512_set_epi16(
25653 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25654 25, 26, 27, 28, 29, 30, 31, 32,
25655 );
25656 assert_eq_m512i(r, e);
25657 }
25658
25659 #[simd_test(enable = "avx512fp16")]
25660 unsafe fn test_mm512_mask_cvttph_epi16() {
25661 let a = _mm512_set_ph(
25662 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25663 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25664 31.0, 32.0,
25665 );
25666 let src = _mm512_set_epi16(
25667 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25668 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25669 );
25670 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25671 let e = _mm512_set_epi16(
25672 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25673 24, 34, 26, 36, 28, 38, 30, 40, 32,
25674 );
25675 assert_eq_m512i(r, e);
25676 }
25677
25678 #[simd_test(enable = "avx512fp16")]
25679 unsafe fn test_mm512_maskz_cvttph_epi16() {
25680 let a = _mm512_set_ph(
25681 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25682 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25683 31.0, 32.0,
25684 );
25685 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25686 let e = _mm512_set_epi16(
25687 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25688 0, 28, 0, 30, 0, 32,
25689 );
25690 assert_eq_m512i(r, e);
25691 }
25692
25693 #[simd_test(enable = "avx512fp16")]
25694 unsafe fn test_mm512_cvtt_roundph_epi16() {
25695 let a = _mm512_set_ph(
25696 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25697 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25698 31.0, 32.0,
25699 );
25700 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25701 let e = _mm512_set_epi16(
25702 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25703 25, 26, 27, 28, 29, 30, 31, 32,
25704 );
25705 assert_eq_m512i(r, e);
25706 }
25707
25708 #[simd_test(enable = "avx512fp16")]
25709 unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
25710 let a = _mm512_set_ph(
25711 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25712 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25713 31.0, 32.0,
25714 );
25715 let src = _mm512_set_epi16(
25716 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25717 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25718 );
25719 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25720 src,
25721 0b01010101010101010101010101010101,
25722 a,
25723 );
25724 let e = _mm512_set_epi16(
25725 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25726 24, 34, 26, 36, 28, 38, 30, 40, 32,
25727 );
25728 assert_eq_m512i(r, e);
25729 }
25730
25731 #[simd_test(enable = "avx512fp16")]
25732 unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
25733 let a = _mm512_set_ph(
25734 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25735 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25736 31.0, 32.0,
25737 );
25738 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25739 0b01010101010101010101010101010101,
25740 a,
25741 );
25742 let e = _mm512_set_epi16(
25743 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25744 0, 28, 0, 30, 0, 32,
25745 );
25746 assert_eq_m512i(r, e);
25747 }
25748
25749 #[simd_test(enable = "avx512fp16,avx512vl")]
25750 unsafe fn test_mm_cvttph_epu16() {
25751 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25752 let r = _mm_cvttph_epu16(a);
25753 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25754 assert_eq_m128i(r, e);
25755 }
25756
25757 #[simd_test(enable = "avx512fp16,avx512vl")]
25758 unsafe fn test_mm_mask_cvttph_epu16() {
25759 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25760 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25761 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25762 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25763 assert_eq_m128i(r, e);
25764 }
25765
25766 #[simd_test(enable = "avx512fp16,avx512vl")]
25767 unsafe fn test_mm_maskz_cvttph_epu16() {
25768 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25769 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25770 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25771 assert_eq_m128i(r, e);
25772 }
25773
25774 #[simd_test(enable = "avx512fp16,avx512vl")]
25775 unsafe fn test_mm256_cvttph_epu16() {
25776 let a = _mm256_set_ph(
25777 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25778 );
25779 let r = _mm256_cvttph_epu16(a);
25780 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25781 assert_eq_m256i(r, e);
25782 }
25783
25784 #[simd_test(enable = "avx512fp16,avx512vl")]
25785 unsafe fn test_mm256_mask_cvttph_epu16() {
25786 let a = _mm256_set_ph(
25787 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25788 );
25789 let src = _mm256_set_epi16(
25790 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25791 );
25792 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25793 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25794 assert_eq_m256i(r, e);
25795 }
25796
25797 #[simd_test(enable = "avx512fp16,avx512vl")]
25798 unsafe fn test_mm256_maskz_cvttph_epu16() {
25799 let a = _mm256_set_ph(
25800 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25801 );
25802 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25803 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25804 assert_eq_m256i(r, e);
25805 }
25806
25807 #[simd_test(enable = "avx512fp16")]
25808 unsafe fn test_mm512_cvttph_epu16() {
25809 let a = _mm512_set_ph(
25810 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25811 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25812 31.0, 32.0,
25813 );
25814 let r = _mm512_cvttph_epu16(a);
25815 let e = _mm512_set_epi16(
25816 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25817 25, 26, 27, 28, 29, 30, 31, 32,
25818 );
25819 assert_eq_m512i(r, e);
25820 }
25821
25822 #[simd_test(enable = "avx512fp16")]
25823 unsafe fn test_mm512_mask_cvttph_epu16() {
25824 let a = _mm512_set_ph(
25825 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25826 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25827 31.0, 32.0,
25828 );
25829 let src = _mm512_set_epi16(
25830 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25831 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25832 );
25833 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25834 let e = _mm512_set_epi16(
25835 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25836 24, 34, 26, 36, 28, 38, 30, 40, 32,
25837 );
25838 assert_eq_m512i(r, e);
25839 }
25840
25841 #[simd_test(enable = "avx512fp16")]
25842 unsafe fn test_mm512_maskz_cvttph_epu16() {
25843 let a = _mm512_set_ph(
25844 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25845 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25846 31.0, 32.0,
25847 );
25848 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25849 let e = _mm512_set_epi16(
25850 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25851 0, 28, 0, 30, 0, 32,
25852 );
25853 assert_eq_m512i(r, e);
25854 }
25855
25856 #[simd_test(enable = "avx512fp16")]
25857 unsafe fn test_mm512_cvtt_roundph_epu16() {
25858 let a = _mm512_set_ph(
25859 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25860 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25861 31.0, 32.0,
25862 );
25863 let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
25864 let e = _mm512_set_epi16(
25865 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25866 25, 26, 27, 28, 29, 30, 31, 32,
25867 );
25868 assert_eq_m512i(r, e);
25869 }
25870
25871 #[simd_test(enable = "avx512fp16")]
25872 unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
25873 let a = _mm512_set_ph(
25874 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25875 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25876 31.0, 32.0,
25877 );
25878 let src = _mm512_set_epi16(
25879 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25880 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25881 );
25882 let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25883 src,
25884 0b01010101010101010101010101010101,
25885 a,
25886 );
25887 let e = _mm512_set_epi16(
25888 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25889 24, 34, 26, 36, 28, 38, 30, 40, 32,
25890 );
25891 assert_eq_m512i(r, e);
25892 }
25893
25894 #[simd_test(enable = "avx512fp16")]
25895 unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
25896 let a = _mm512_set_ph(
25897 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25898 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25899 31.0, 32.0,
25900 );
25901 let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25902 0b01010101010101010101010101010101,
25903 a,
25904 );
25905 let e = _mm512_set_epi16(
25906 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25907 0, 28, 0, 30, 0, 32,
25908 );
25909 assert_eq_m512i(r, e);
25910 }
25911
25912 #[simd_test(enable = "avx512fp16,avx512vl")]
25913 unsafe fn test_mm_cvtph_epi32() {
25914 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25915 let r = _mm_cvtph_epi32(a);
25916 let e = _mm_set_epi32(1, 2, 3, 4);
25917 assert_eq_m128i(r, e);
25918 }
25919
25920 #[simd_test(enable = "avx512fp16,avx512vl")]
25921 unsafe fn test_mm_mask_cvtph_epi32() {
25922 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25923 let src = _mm_set_epi32(10, 11, 12, 13);
25924 let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
25925 let e = _mm_set_epi32(10, 2, 12, 4);
25926 assert_eq_m128i(r, e);
25927 }
25928
25929 #[simd_test(enable = "avx512fp16,avx512vl")]
25930 unsafe fn test_mm_maskz_cvtph_epi32() {
25931 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25932 let r = _mm_maskz_cvtph_epi32(0b0101, a);
25933 let e = _mm_set_epi32(0, 2, 0, 4);
25934 assert_eq_m128i(r, e);
25935 }
25936
25937 #[simd_test(enable = "avx512fp16,avx512vl")]
25938 unsafe fn test_mm256_cvtph_epi32() {
25939 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25940 let r = _mm256_cvtph_epi32(a);
25941 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
25942 assert_eq_m256i(r, e);
25943 }
25944
25945 #[simd_test(enable = "avx512fp16,avx512vl")]
25946 unsafe fn test_mm256_mask_cvtph_epi32() {
25947 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25948 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
25949 let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
25950 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
25951 assert_eq_m256i(r, e);
25952 }
25953
25954 #[simd_test(enable = "avx512fp16,avx512vl")]
25955 unsafe fn test_mm256_maskz_cvtph_epi32() {
25956 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25957 let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
25958 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
25959 assert_eq_m256i(r, e);
25960 }
25961
25962 #[simd_test(enable = "avx512fp16")]
25963 unsafe fn test_mm512_cvtph_epi32() {
25964 let a = _mm256_set_ph(
25965 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25966 );
25967 let r = _mm512_cvtph_epi32(a);
25968 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25969 assert_eq_m512i(r, e);
25970 }
25971
25972 #[simd_test(enable = "avx512fp16")]
25973 unsafe fn test_mm512_mask_cvtph_epi32() {
25974 let a = _mm256_set_ph(
25975 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25976 );
25977 let src = _mm512_set_epi32(
25978 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25979 );
25980 let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a);
25981 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25982 assert_eq_m512i(r, e);
25983 }
25984
25985 #[simd_test(enable = "avx512fp16")]
25986 unsafe fn test_mm512_maskz_cvtph_epi32() {
25987 let a = _mm256_set_ph(
25988 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25989 );
25990 let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a);
25991 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25992 assert_eq_m512i(r, e);
25993 }
25994
25995 #[simd_test(enable = "avx512fp16")]
25996 unsafe fn test_mm512_cvt_roundph_epi32() {
25997 let a = _mm256_set_ph(
25998 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25999 );
26000 let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26001 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26002 assert_eq_m512i(r, e);
26003 }
26004
26005 #[simd_test(enable = "avx512fp16")]
26006 unsafe fn test_mm512_mask_cvt_roundph_epi32() {
26007 let a = _mm256_set_ph(
26008 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26009 );
26010 let src = _mm512_set_epi32(
26011 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26012 );
26013 let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26014 src,
26015 0b0101010101010101,
26016 a,
26017 );
26018 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26019 assert_eq_m512i(r, e);
26020 }
26021
26022 #[simd_test(enable = "avx512fp16")]
26023 unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
26024 let a = _mm256_set_ph(
26025 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26026 );
26027 let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26028 0b0101010101010101,
26029 a,
26030 );
26031 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26032 assert_eq_m512i(r, e);
26033 }
26034
26035 #[simd_test(enable = "avx512fp16")]
26036 unsafe fn test_mm_cvtsh_i32() {
26037 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26038 let r = _mm_cvtsh_i32(a);
26039 assert_eq!(r, 1);
26040 }
26041
26042 #[simd_test(enable = "avx512fp16")]
26043 unsafe fn test_mm_cvt_roundsh_i32() {
26044 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26045 let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26046 assert_eq!(r, 1);
26047 }
26048
26049 #[simd_test(enable = "avx512fp16,avx512vl")]
26050 unsafe fn test_mm_cvtph_epu32() {
26051 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26052 let r = _mm_cvtph_epu32(a);
26053 let e = _mm_set_epi32(1, 2, 3, 4);
26054 assert_eq_m128i(r, e);
26055 }
26056
26057 #[simd_test(enable = "avx512fp16,avx512vl")]
26058 unsafe fn test_mm_mask_cvtph_epu32() {
26059 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26060 let src = _mm_set_epi32(10, 11, 12, 13);
26061 let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
26062 let e = _mm_set_epi32(10, 2, 12, 4);
26063 assert_eq_m128i(r, e);
26064 }
26065
26066 #[simd_test(enable = "avx512fp16,avx512vl")]
26067 unsafe fn test_mm_maskz_cvtph_epu32() {
26068 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26069 let r = _mm_maskz_cvtph_epu32(0b0101, a);
26070 let e = _mm_set_epi32(0, 2, 0, 4);
26071 assert_eq_m128i(r, e);
26072 }
26073
26074 #[simd_test(enable = "avx512fp16,avx512vl")]
26075 unsafe fn test_mm256_cvtph_epu32() {
26076 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26077 let r = _mm256_cvtph_epu32(a);
26078 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26079 assert_eq_m256i(r, e);
26080 }
26081
26082 #[simd_test(enable = "avx512fp16,avx512vl")]
26083 unsafe fn test_mm256_mask_cvtph_epu32() {
26084 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26085 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26086 let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
26087 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26088 assert_eq_m256i(r, e);
26089 }
26090
26091 #[simd_test(enable = "avx512fp16,avx512vl")]
26092 unsafe fn test_mm256_maskz_cvtph_epu32() {
26093 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26094 let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
26095 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26096 assert_eq_m256i(r, e);
26097 }
26098
26099 #[simd_test(enable = "avx512fp16")]
26100 unsafe fn test_mm512_cvtph_epu32() {
26101 let a = _mm256_set_ph(
26102 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26103 );
26104 let r = _mm512_cvtph_epu32(a);
26105 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26106 assert_eq_m512i(r, e);
26107 }
26108
26109 #[simd_test(enable = "avx512fp16")]
26110 unsafe fn test_mm512_mask_cvtph_epu32() {
26111 let a = _mm256_set_ph(
26112 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26113 );
26114 let src = _mm512_set_epi32(
26115 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26116 );
26117 let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a);
26118 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26119 assert_eq_m512i(r, e);
26120 }
26121
26122 #[simd_test(enable = "avx512fp16")]
26123 unsafe fn test_mm512_maskz_cvtph_epu32() {
26124 let a = _mm256_set_ph(
26125 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26126 );
26127 let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a);
26128 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26129 assert_eq_m512i(r, e);
26130 }
26131
26132 #[simd_test(enable = "avx512fp16")]
26133 unsafe fn test_mm512_cvt_roundph_epu32() {
26134 let a = _mm256_set_ph(
26135 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26136 );
26137 let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26138 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26139 assert_eq_m512i(r, e);
26140 }
26141
26142 #[simd_test(enable = "avx512fp16")]
26143 unsafe fn test_mm512_mask_cvt_roundph_epu32() {
26144 let a = _mm256_set_ph(
26145 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26146 );
26147 let src = _mm512_set_epi32(
26148 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26149 );
26150 let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26151 src,
26152 0b0101010101010101,
26153 a,
26154 );
26155 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26156 assert_eq_m512i(r, e);
26157 }
26158
26159 #[simd_test(enable = "avx512fp16")]
26160 unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
26161 let a = _mm256_set_ph(
26162 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26163 );
26164 let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26165 0b0101010101010101,
26166 a,
26167 );
26168 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26169 assert_eq_m512i(r, e);
26170 }
26171
26172 #[simd_test(enable = "avx512fp16")]
26173 unsafe fn test_mm_cvtsh_u32() {
26174 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26175 let r = _mm_cvtsh_u32(a);
26176 assert_eq!(r, 1);
26177 }
26178
26179 #[simd_test(enable = "avx512fp16")]
26180 unsafe fn test_mm_cvt_roundsh_u32() {
26181 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26182 let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26183 assert_eq!(r, 1);
26184 }
26185
26186 #[simd_test(enable = "avx512fp16,avx512vl")]
26187 unsafe fn test_mm_cvttph_epi32() {
26188 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26189 let r = _mm_cvttph_epi32(a);
26190 let e = _mm_set_epi32(1, 2, 3, 4);
26191 assert_eq_m128i(r, e);
26192 }
26193
26194 #[simd_test(enable = "avx512fp16,avx512vl")]
26195 unsafe fn test_mm_mask_cvttph_epi32() {
26196 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26197 let src = _mm_set_epi32(10, 11, 12, 13);
26198 let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
26199 let e = _mm_set_epi32(10, 2, 12, 4);
26200 assert_eq_m128i(r, e);
26201 }
26202
26203 #[simd_test(enable = "avx512fp16,avx512vl")]
26204 unsafe fn test_mm_maskz_cvttph_epi32() {
26205 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26206 let r = _mm_maskz_cvttph_epi32(0b0101, a);
26207 let e = _mm_set_epi32(0, 2, 0, 4);
26208 assert_eq_m128i(r, e);
26209 }
26210
26211 #[simd_test(enable = "avx512fp16,avx512vl")]
26212 unsafe fn test_mm256_cvttph_epi32() {
26213 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26214 let r = _mm256_cvttph_epi32(a);
26215 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26216 assert_eq_m256i(r, e);
26217 }
26218
26219 #[simd_test(enable = "avx512fp16,avx512vl")]
26220 unsafe fn test_mm256_mask_cvttph_epi32() {
26221 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26222 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26223 let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
26224 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26225 assert_eq_m256i(r, e);
26226 }
26227
26228 #[simd_test(enable = "avx512fp16,avx512vl")]
26229 unsafe fn test_mm256_maskz_cvttph_epi32() {
26230 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26231 let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
26232 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26233 assert_eq_m256i(r, e);
26234 }
26235
26236 #[simd_test(enable = "avx512fp16")]
26237 unsafe fn test_mm512_cvttph_epi32() {
26238 let a = _mm256_set_ph(
26239 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26240 );
26241 let r = _mm512_cvttph_epi32(a);
26242 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26243 assert_eq_m512i(r, e);
26244 }
26245
26246 #[simd_test(enable = "avx512fp16")]
26247 unsafe fn test_mm512_mask_cvttph_epi32() {
26248 let a = _mm256_set_ph(
26249 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26250 );
26251 let src = _mm512_set_epi32(
26252 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26253 );
26254 let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a);
26255 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26256 assert_eq_m512i(r, e);
26257 }
26258
26259 #[simd_test(enable = "avx512fp16")]
26260 unsafe fn test_mm512_maskz_cvttph_epi32() {
26261 let a = _mm256_set_ph(
26262 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26263 );
26264 let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a);
26265 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26266 assert_eq_m512i(r, e);
26267 }
26268
26269 #[simd_test(enable = "avx512fp16")]
26270 unsafe fn test_mm512_cvtt_roundph_epi32() {
26271 let a = _mm256_set_ph(
26272 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26273 );
26274 let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26275 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26276 assert_eq_m512i(r, e);
26277 }
26278
26279 #[simd_test(enable = "avx512fp16")]
26280 unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
26281 let a = _mm256_set_ph(
26282 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26283 );
26284 let src = _mm512_set_epi32(
26285 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26286 );
26287 let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26288 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26289 assert_eq_m512i(r, e);
26290 }
26291
26292 #[simd_test(enable = "avx512fp16")]
26293 unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
26294 let a = _mm256_set_ph(
26295 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26296 );
26297 let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26298 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26299 assert_eq_m512i(r, e);
26300 }
26301
26302 #[simd_test(enable = "avx512fp16")]
26303 unsafe fn test_mm_cvttsh_i32() {
26304 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26305 let r = _mm_cvttsh_i32(a);
26306 assert_eq!(r, 1);
26307 }
26308
26309 #[simd_test(enable = "avx512fp16")]
26310 unsafe fn test_mm_cvtt_roundsh_i32() {
26311 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26312 let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26313 assert_eq!(r, 1);
26314 }
26315
26316 #[simd_test(enable = "avx512fp16,avx512vl")]
26317 unsafe fn test_mm_cvttph_epu32() {
26318 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26319 let r = _mm_cvttph_epu32(a);
26320 let e = _mm_set_epi32(1, 2, 3, 4);
26321 assert_eq_m128i(r, e);
26322 }
26323
26324 #[simd_test(enable = "avx512fp16,avx512vl")]
26325 unsafe fn test_mm_mask_cvttph_epu32() {
26326 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26327 let src = _mm_set_epi32(10, 11, 12, 13);
26328 let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
26329 let e = _mm_set_epi32(10, 2, 12, 4);
26330 assert_eq_m128i(r, e);
26331 }
26332
26333 #[simd_test(enable = "avx512fp16,avx512vl")]
26334 unsafe fn test_mm_maskz_cvttph_epu32() {
26335 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26336 let r = _mm_maskz_cvttph_epu32(0b0101, a);
26337 let e = _mm_set_epi32(0, 2, 0, 4);
26338 assert_eq_m128i(r, e);
26339 }
26340
26341 #[simd_test(enable = "avx512fp16,avx512vl")]
26342 unsafe fn test_mm256_cvttph_epu32() {
26343 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26344 let r = _mm256_cvttph_epu32(a);
26345 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26346 assert_eq_m256i(r, e);
26347 }
26348
26349 #[simd_test(enable = "avx512fp16,avx512vl")]
26350 unsafe fn test_mm256_mask_cvttph_epu32() {
26351 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26352 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26353 let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
26354 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26355 assert_eq_m256i(r, e);
26356 }
26357
26358 #[simd_test(enable = "avx512fp16,avx512vl")]
26359 unsafe fn test_mm256_maskz_cvttph_epu32() {
26360 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26361 let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
26362 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26363 assert_eq_m256i(r, e);
26364 }
26365
26366 #[simd_test(enable = "avx512fp16")]
26367 unsafe fn test_mm512_cvttph_epu32() {
26368 let a = _mm256_set_ph(
26369 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26370 );
26371 let r = _mm512_cvttph_epu32(a);
26372 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26373 assert_eq_m512i(r, e);
26374 }
26375
26376 #[simd_test(enable = "avx512fp16")]
26377 unsafe fn test_mm512_mask_cvttph_epu32() {
26378 let a = _mm256_set_ph(
26379 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26380 );
26381 let src = _mm512_set_epi32(
26382 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26383 );
26384 let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a);
26385 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26386 assert_eq_m512i(r, e);
26387 }
26388
26389 #[simd_test(enable = "avx512fp16")]
26390 unsafe fn test_mm512_maskz_cvttph_epu32() {
26391 let a = _mm256_set_ph(
26392 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26393 );
26394 let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a);
26395 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26396 assert_eq_m512i(r, e);
26397 }
26398
26399 #[simd_test(enable = "avx512fp16")]
26400 unsafe fn test_mm512_cvtt_roundph_epu32() {
26401 let a = _mm256_set_ph(
26402 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26403 );
26404 let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26405 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26406 assert_eq_m512i(r, e);
26407 }
26408
26409 #[simd_test(enable = "avx512fp16")]
26410 unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
26411 let a = _mm256_set_ph(
26412 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26413 );
26414 let src = _mm512_set_epi32(
26415 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26416 );
26417 let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26418 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26419 assert_eq_m512i(r, e);
26420 }
26421
26422 #[simd_test(enable = "avx512fp16")]
26423 unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
26424 let a = _mm256_set_ph(
26425 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26426 );
26427 let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26428 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26429 assert_eq_m512i(r, e);
26430 }
26431
26432 #[simd_test(enable = "avx512fp16")]
26433 unsafe fn test_mm_cvttsh_u32() {
26434 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26435 let r = _mm_cvttsh_u32(a);
26436 assert_eq!(r, 1);
26437 }
26438
26439 #[simd_test(enable = "avx512fp16")]
26440 unsafe fn test_mm_cvtt_roundsh_u32() {
26441 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26442 let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26443 assert_eq!(r, 1);
26444 }
26445
26446 #[simd_test(enable = "avx512fp16,avx512vl")]
26447 unsafe fn test_mm_cvtph_epi64() {
26448 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26449 let r = _mm_cvtph_epi64(a);
26450 let e = _mm_set_epi64x(1, 2);
26451 assert_eq_m128i(r, e);
26452 }
26453
26454 #[simd_test(enable = "avx512fp16,avx512vl")]
26455 unsafe fn test_mm_mask_cvtph_epi64() {
26456 let src = _mm_set_epi64x(3, 4);
26457 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26458 let r = _mm_mask_cvtph_epi64(src, 0b01, a);
26459 let e = _mm_set_epi64x(3, 2);
26460 assert_eq_m128i(r, e);
26461 }
26462
26463 #[simd_test(enable = "avx512fp16,avx512vl")]
26464 unsafe fn test_mm_maskz_cvtph_epi64() {
26465 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26466 let r = _mm_maskz_cvtph_epi64(0b01, a);
26467 let e = _mm_set_epi64x(0, 2);
26468 assert_eq_m128i(r, e);
26469 }
26470
26471 #[simd_test(enable = "avx512fp16,avx512vl")]
26472 unsafe fn test_mm256_cvtph_epi64() {
26473 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26474 let r = _mm256_cvtph_epi64(a);
26475 let e = _mm256_set_epi64x(1, 2, 3, 4);
26476 assert_eq_m256i(r, e);
26477 }
26478
26479 #[simd_test(enable = "avx512fp16,avx512vl")]
26480 unsafe fn test_mm256_mask_cvtph_epi64() {
26481 let src = _mm256_set_epi64x(5, 6, 7, 8);
26482 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26483 let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
26484 let e = _mm256_set_epi64x(5, 2, 7, 4);
26485 assert_eq_m256i(r, e);
26486 }
26487
26488 #[simd_test(enable = "avx512fp16,avx512vl")]
26489 unsafe fn test_mm256_maskz_cvtph_epi64() {
26490 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26491 let r = _mm256_maskz_cvtph_epi64(0b0101, a);
26492 let e = _mm256_set_epi64x(0, 2, 0, 4);
26493 assert_eq_m256i(r, e);
26494 }
26495
26496 #[simd_test(enable = "avx512fp16")]
26497 unsafe fn test_mm512_cvtph_epi64() {
26498 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26499 let r = _mm512_cvtph_epi64(a);
26500 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26501 assert_eq_m512i(r, e);
26502 }
26503
26504 #[simd_test(enable = "avx512fp16")]
26505 unsafe fn test_mm512_mask_cvtph_epi64() {
26506 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26507 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26508 let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
26509 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26510 assert_eq_m512i(r, e);
26511 }
26512
26513 #[simd_test(enable = "avx512fp16")]
26514 unsafe fn test_mm512_maskz_cvtph_epi64() {
26515 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26516 let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
26517 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26518 assert_eq_m512i(r, e);
26519 }
26520
26521 #[simd_test(enable = "avx512fp16")]
26522 unsafe fn test_mm512_cvt_roundph_epi64() {
26523 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26524 let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26525 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26526 assert_eq_m512i(r, e);
26527 }
26528
26529 #[simd_test(enable = "avx512fp16")]
26530 unsafe fn test_mm512_mask_cvt_roundph_epi64() {
26531 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26532 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26533 let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26534 src, 0b01010101, a,
26535 );
26536 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26537 assert_eq_m512i(r, e);
26538 }
26539
26540 #[simd_test(enable = "avx512fp16")]
26541 unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
26542 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26543 let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26544 0b01010101, a,
26545 );
26546 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26547 assert_eq_m512i(r, e);
26548 }
26549
26550 #[simd_test(enable = "avx512fp16,avx512vl")]
26551 unsafe fn test_mm_cvtph_epu64() {
26552 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26553 let r = _mm_cvtph_epu64(a);
26554 let e = _mm_set_epi64x(1, 2);
26555 assert_eq_m128i(r, e);
26556 }
26557
26558 #[simd_test(enable = "avx512fp16,avx512vl")]
26559 unsafe fn test_mm_mask_cvtph_epu64() {
26560 let src = _mm_set_epi64x(3, 4);
26561 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26562 let r = _mm_mask_cvtph_epu64(src, 0b01, a);
26563 let e = _mm_set_epi64x(3, 2);
26564 assert_eq_m128i(r, e);
26565 }
26566
26567 #[simd_test(enable = "avx512fp16,avx512vl")]
26568 unsafe fn test_mm_maskz_cvtph_epu64() {
26569 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26570 let r = _mm_maskz_cvtph_epu64(0b01, a);
26571 let e = _mm_set_epi64x(0, 2);
26572 assert_eq_m128i(r, e);
26573 }
26574
26575 #[simd_test(enable = "avx512fp16,avx512vl")]
26576 unsafe fn test_mm256_cvtph_epu64() {
26577 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26578 let r = _mm256_cvtph_epu64(a);
26579 let e = _mm256_set_epi64x(1, 2, 3, 4);
26580 assert_eq_m256i(r, e);
26581 }
26582
26583 #[simd_test(enable = "avx512fp16,avx512vl")]
26584 unsafe fn test_mm256_mask_cvtph_epu64() {
26585 let src = _mm256_set_epi64x(5, 6, 7, 8);
26586 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26587 let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
26588 let e = _mm256_set_epi64x(5, 2, 7, 4);
26589 assert_eq_m256i(r, e);
26590 }
26591
26592 #[simd_test(enable = "avx512fp16,avx512vl")]
26593 unsafe fn test_mm256_maskz_cvtph_epu64() {
26594 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26595 let r = _mm256_maskz_cvtph_epu64(0b0101, a);
26596 let e = _mm256_set_epi64x(0, 2, 0, 4);
26597 assert_eq_m256i(r, e);
26598 }
26599
26600 #[simd_test(enable = "avx512fp16")]
26601 unsafe fn test_mm512_cvtph_epu64() {
26602 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26603 let r = _mm512_cvtph_epu64(a);
26604 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26605 assert_eq_m512i(r, e);
26606 }
26607
26608 #[simd_test(enable = "avx512fp16")]
26609 unsafe fn test_mm512_mask_cvtph_epu64() {
26610 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26611 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26612 let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
26613 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26614 assert_eq_m512i(r, e);
26615 }
26616
26617 #[simd_test(enable = "avx512fp16")]
26618 unsafe fn test_mm512_maskz_cvtph_epu64() {
26619 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26620 let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
26621 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26622 assert_eq_m512i(r, e);
26623 }
26624
26625 #[simd_test(enable = "avx512fp16")]
26626 unsafe fn test_mm512_cvt_roundph_epu64() {
26627 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26628 let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26629 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26630 assert_eq_m512i(r, e);
26631 }
26632
26633 #[simd_test(enable = "avx512fp16")]
26634 unsafe fn test_mm512_mask_cvt_roundph_epu64() {
26635 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26636 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26637 let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26638 src, 0b01010101, a,
26639 );
26640 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26641 assert_eq_m512i(r, e);
26642 }
26643
26644 #[simd_test(enable = "avx512fp16")]
26645 unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
26646 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26647 let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26648 0b01010101, a,
26649 );
26650 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26651 assert_eq_m512i(r, e);
26652 }
26653
26654 #[simd_test(enable = "avx512fp16,avx512vl")]
26655 unsafe fn test_mm_cvttph_epi64() {
26656 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26657 let r = _mm_cvttph_epi64(a);
26658 let e = _mm_set_epi64x(1, 2);
26659 assert_eq_m128i(r, e);
26660 }
26661
26662 #[simd_test(enable = "avx512fp16,avx512vl")]
26663 unsafe fn test_mm_mask_cvttph_epi64() {
26664 let src = _mm_set_epi64x(3, 4);
26665 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26666 let r = _mm_mask_cvttph_epi64(src, 0b01, a);
26667 let e = _mm_set_epi64x(3, 2);
26668 assert_eq_m128i(r, e);
26669 }
26670
26671 #[simd_test(enable = "avx512fp16,avx512vl")]
26672 unsafe fn test_mm_maskz_cvttph_epi64() {
26673 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26674 let r = _mm_maskz_cvttph_epi64(0b01, a);
26675 let e = _mm_set_epi64x(0, 2);
26676 assert_eq_m128i(r, e);
26677 }
26678
26679 #[simd_test(enable = "avx512fp16,avx512vl")]
26680 unsafe fn test_mm256_cvttph_epi64() {
26681 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26682 let r = _mm256_cvttph_epi64(a);
26683 let e = _mm256_set_epi64x(1, 2, 3, 4);
26684 assert_eq_m256i(r, e);
26685 }
26686
26687 #[simd_test(enable = "avx512fp16,avx512vl")]
26688 unsafe fn test_mm256_mask_cvttph_epi64() {
26689 let src = _mm256_set_epi64x(5, 6, 7, 8);
26690 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26691 let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
26692 let e = _mm256_set_epi64x(5, 2, 7, 4);
26693 assert_eq_m256i(r, e);
26694 }
26695
26696 #[simd_test(enable = "avx512fp16,avx512vl")]
26697 unsafe fn test_mm256_maskz_cvttph_epi64() {
26698 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26699 let r = _mm256_maskz_cvttph_epi64(0b0101, a);
26700 let e = _mm256_set_epi64x(0, 2, 0, 4);
26701 assert_eq_m256i(r, e);
26702 }
26703
26704 #[simd_test(enable = "avx512fp16")]
26705 unsafe fn test_mm512_cvttph_epi64() {
26706 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26707 let r = _mm512_cvttph_epi64(a);
26708 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26709 assert_eq_m512i(r, e);
26710 }
26711
26712 #[simd_test(enable = "avx512fp16")]
26713 unsafe fn test_mm512_mask_cvttph_epi64() {
26714 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26715 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26716 let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
26717 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26718 assert_eq_m512i(r, e);
26719 }
26720
26721 #[simd_test(enable = "avx512fp16")]
26722 unsafe fn test_mm512_maskz_cvttph_epi64() {
26723 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26724 let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
26725 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26726 assert_eq_m512i(r, e);
26727 }
26728
26729 #[simd_test(enable = "avx512fp16")]
26730 unsafe fn test_mm512_cvtt_roundph_epi64() {
26731 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26732 let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
26733 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26734 assert_eq_m512i(r, e);
26735 }
26736
26737 #[simd_test(enable = "avx512fp16")]
26738 unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
26739 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26740 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26741 let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26742 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26743 assert_eq_m512i(r, e);
26744 }
26745
26746 #[simd_test(enable = "avx512fp16")]
26747 unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
26748 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26749 let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26750 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26751 assert_eq_m512i(r, e);
26752 }
26753
26754 #[simd_test(enable = "avx512fp16,avx512vl")]
26755 unsafe fn test_mm_cvttph_epu64() {
26756 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26757 let r = _mm_cvttph_epu64(a);
26758 let e = _mm_set_epi64x(1, 2);
26759 assert_eq_m128i(r, e);
26760 }
26761
26762 #[simd_test(enable = "avx512fp16,avx512vl")]
26763 unsafe fn test_mm_mask_cvttph_epu64() {
26764 let src = _mm_set_epi64x(3, 4);
26765 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26766 let r = _mm_mask_cvttph_epu64(src, 0b01, a);
26767 let e = _mm_set_epi64x(3, 2);
26768 assert_eq_m128i(r, e);
26769 }
26770
26771 #[simd_test(enable = "avx512fp16,avx512vl")]
26772 unsafe fn test_mm_maskz_cvttph_epu64() {
26773 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26774 let r = _mm_maskz_cvttph_epu64(0b01, a);
26775 let e = _mm_set_epi64x(0, 2);
26776 assert_eq_m128i(r, e);
26777 }
26778
26779 #[simd_test(enable = "avx512fp16,avx512vl")]
26780 unsafe fn test_mm256_cvttph_epu64() {
26781 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26782 let r = _mm256_cvttph_epu64(a);
26783 let e = _mm256_set_epi64x(1, 2, 3, 4);
26784 assert_eq_m256i(r, e);
26785 }
26786
26787 #[simd_test(enable = "avx512fp16,avx512vl")]
26788 unsafe fn test_mm256_mask_cvttph_epu64() {
26789 let src = _mm256_set_epi64x(5, 6, 7, 8);
26790 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26791 let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
26792 let e = _mm256_set_epi64x(5, 2, 7, 4);
26793 assert_eq_m256i(r, e);
26794 }
26795
26796 #[simd_test(enable = "avx512fp16,avx512vl")]
26797 unsafe fn test_mm256_maskz_cvttph_epu64() {
26798 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26799 let r = _mm256_maskz_cvttph_epu64(0b0101, a);
26800 let e = _mm256_set_epi64x(0, 2, 0, 4);
26801 assert_eq_m256i(r, e);
26802 }
26803
26804 #[simd_test(enable = "avx512fp16")]
26805 unsafe fn test_mm512_cvttph_epu64() {
26806 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26807 let r = _mm512_cvttph_epu64(a);
26808 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26809 assert_eq_m512i(r, e);
26810 }
26811
26812 #[simd_test(enable = "avx512fp16")]
26813 unsafe fn test_mm512_mask_cvttph_epu64() {
26814 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26815 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26816 let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
26817 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26818 assert_eq_m512i(r, e);
26819 }
26820
26821 #[simd_test(enable = "avx512fp16")]
26822 unsafe fn test_mm512_maskz_cvttph_epu64() {
26823 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26824 let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
26825 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26826 assert_eq_m512i(r, e);
26827 }
26828
26829 #[simd_test(enable = "avx512fp16")]
26830 unsafe fn test_mm512_cvtt_roundph_epu64() {
26831 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26832 let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
26833 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26834 assert_eq_m512i(r, e);
26835 }
26836
26837 #[simd_test(enable = "avx512fp16")]
26838 unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
26839 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26840 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26841 let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26842 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26843 assert_eq_m512i(r, e);
26844 }
26845
26846 #[simd_test(enable = "avx512fp16")]
26847 unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
26848 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26849 let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26850 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26851 assert_eq_m512i(r, e);
26852 }
26853
26854 #[simd_test(enable = "avx512fp16,avx512vl")]
26855 unsafe fn test_mm_cvtxph_ps() {
26856 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26857 let r = _mm_cvtxph_ps(a);
26858 let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
26859 assert_eq_m128(r, e);
26860 }
26861
26862 #[simd_test(enable = "avx512fp16,avx512vl")]
26863 unsafe fn test_mm_mask_cvtxph_ps() {
26864 let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
26865 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26866 let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
26867 let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0);
26868 assert_eq_m128(r, e);
26869 }
26870
26871 #[simd_test(enable = "avx512fp16,avx512vl")]
26872 unsafe fn test_mm_maskz_cvtxph_ps() {
26873 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26874 let r = _mm_maskz_cvtxph_ps(0b0101, a);
26875 let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
26876 assert_eq_m128(r, e);
26877 }
26878
26879 #[simd_test(enable = "avx512fp16,avx512vl")]
26880 unsafe fn test_mm256_cvtxph_ps() {
26881 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26882 let r = _mm256_cvtxph_ps(a);
26883 let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26884 assert_eq_m256(r, e);
26885 }
26886
26887 #[simd_test(enable = "avx512fp16,avx512vl")]
26888 unsafe fn test_mm256_mask_cvtxph_ps() {
26889 let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
26890 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26891 let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
26892 let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
26893 assert_eq_m256(r, e);
26894 }
26895
26896 #[simd_test(enable = "avx512fp16,avx512vl")]
26897 unsafe fn test_mm256_maskz_cvtxph_ps() {
26898 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26899 let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
26900 let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
26901 assert_eq_m256(r, e);
26902 }
26903
26904 #[simd_test(enable = "avx512fp16")]
26905 unsafe fn test_mm512_cvtxph_ps() {
26906 let a = _mm256_set_ph(
26907 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26908 );
26909 let r = _mm512_cvtxph_ps(a);
26910 let e = _mm512_set_ps(
26911 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26912 );
26913 assert_eq_m512(r, e);
26914 }
26915
26916 #[simd_test(enable = "avx512fp16")]
26917 unsafe fn test_mm512_mask_cvtxph_ps() {
26918 let src = _mm512_set_ps(
26919 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
26920 24.0, 25.0,
26921 );
26922 let a = _mm256_set_ph(
26923 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26924 );
26925 let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a);
26926 let e = _mm512_set_ps(
26927 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
26928 16.0,
26929 );
26930 assert_eq_m512(r, e);
26931 }
26932
26933 #[simd_test(enable = "avx512fp16")]
26934 unsafe fn test_mm512_maskz_cvtxph_ps() {
26935 let a = _mm256_set_ph(
26936 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26937 );
26938 let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a);
26939 let e = _mm512_set_ps(
26940 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
26941 );
26942 assert_eq_m512(r, e);
26943 }
26944
26945 #[simd_test(enable = "avx512fp16")]
26946 unsafe fn test_mm512_cvtx_roundph_ps() {
26947 let a = _mm256_set_ph(
26948 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26949 );
26950 let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
26951 let e = _mm512_set_ps(
26952 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26953 );
26954 assert_eq_m512(r, e);
26955 }
26956
26957 #[simd_test(enable = "avx512fp16")]
26958 unsafe fn test_mm512_mask_cvtx_roundph_ps() {
26959 let src = _mm512_set_ps(
26960 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
26961 24.0, 25.0,
26962 );
26963 let a = _mm256_set_ph(
26964 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26965 );
26966 let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26967 let e = _mm512_set_ps(
26968 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
26969 16.0,
26970 );
26971 assert_eq_m512(r, e);
26972 }
26973
26974 #[simd_test(enable = "avx512fp16")]
26975 unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
26976 let a = _mm256_set_ph(
26977 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26978 );
26979 let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26980 let e = _mm512_set_ps(
26981 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
26982 );
26983 assert_eq_m512(r, e);
26984 }
26985
26986 #[simd_test(enable = "avx512fp16")]
26987 unsafe fn test_mm_cvtsh_ss() {
26988 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
26989 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
26990 let r = _mm_cvtsh_ss(a, b);
26991 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
26992 assert_eq_m128(r, e);
26993 }
26994
26995 #[simd_test(enable = "avx512fp16")]
26996 unsafe fn test_mm_mask_cvtsh_ss() {
26997 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
26998 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
26999 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27000 let r = _mm_mask_cvtsh_ss(src, 0, a, b);
27001 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27002 assert_eq_m128(r, e);
27003 let r = _mm_mask_cvtsh_ss(src, 1, a, b);
27004 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27005 assert_eq_m128(r, e);
27006 }
27007
27008 #[simd_test(enable = "avx512fp16")]
27009 unsafe fn test_mm_maskz_cvtsh_ss() {
27010 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27011 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27012 let r = _mm_maskz_cvtsh_ss(0, a, b);
27013 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27014 assert_eq_m128(r, e);
27015 let r = _mm_maskz_cvtsh_ss(1, a, b);
27016 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27017 assert_eq_m128(r, e);
27018 }
27019
27020 #[simd_test(enable = "avx512fp16")]
27021 unsafe fn test_mm_cvt_roundsh_ss() {
27022 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27023 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27024 let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27025 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27026 assert_eq_m128(r, e);
27027 }
27028
27029 #[simd_test(enable = "avx512fp16")]
27030 unsafe fn test_mm_mask_cvt_roundsh_ss() {
27031 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27032 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27033 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27034 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27035 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27036 assert_eq_m128(r, e);
27037 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27038 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27039 assert_eq_m128(r, e);
27040 }
27041
27042 #[simd_test(enable = "avx512fp16")]
27043 unsafe fn test_mm_maskz_cvt_roundsh_ss() {
27044 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27045 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27046 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
27047 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27048 assert_eq_m128(r, e);
27049 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b);
27050 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27051 assert_eq_m128(r, e);
27052 }
27053
27054 #[simd_test(enable = "avx512fp16,avx512vl")]
27055 unsafe fn test_mm_cvtph_pd() {
27056 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27057 let r = _mm_cvtph_pd(a);
27058 let e = _mm_set_pd(1.0, 2.0);
27059 assert_eq_m128d(r, e);
27060 }
27061
27062 #[simd_test(enable = "avx512fp16,avx512vl")]
27063 unsafe fn test_mm_mask_cvtph_pd() {
27064 let src = _mm_set_pd(10.0, 11.0);
27065 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27066 let r = _mm_mask_cvtph_pd(src, 0b01, a);
27067 let e = _mm_set_pd(10.0, 2.0);
27068 assert_eq_m128d(r, e);
27069 }
27070
27071 #[simd_test(enable = "avx512fp16,avx512vl")]
27072 unsafe fn test_mm_maskz_cvtph_pd() {
27073 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27074 let r = _mm_maskz_cvtph_pd(0b01, a);
27075 let e = _mm_set_pd(0.0, 2.0);
27076 assert_eq_m128d(r, e);
27077 }
27078
27079 #[simd_test(enable = "avx512fp16,avx512vl")]
27080 unsafe fn test_mm256_cvtph_pd() {
27081 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27082 let r = _mm256_cvtph_pd(a);
27083 let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
27084 assert_eq_m256d(r, e);
27085 }
27086
27087 #[simd_test(enable = "avx512fp16,avx512vl")]
27088 unsafe fn test_mm256_mask_cvtph_pd() {
27089 let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
27090 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27091 let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
27092 let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0);
27093 assert_eq_m256d(r, e);
27094 }
27095
27096 #[simd_test(enable = "avx512fp16,avx512vl")]
27097 unsafe fn test_mm256_maskz_cvtph_pd() {
27098 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27099 let r = _mm256_maskz_cvtph_pd(0b0101, a);
27100 let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
27101 assert_eq_m256d(r, e);
27102 }
27103
27104 #[simd_test(enable = "avx512fp16")]
27105 unsafe fn test_mm512_cvtph_pd() {
27106 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27107 let r = _mm512_cvtph_pd(a);
27108 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27109 assert_eq_m512d(r, e);
27110 }
27111
27112 #[simd_test(enable = "avx512fp16")]
27113 unsafe fn test_mm512_mask_cvtph_pd() {
27114 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27115 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27116 let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
27117 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27118 assert_eq_m512d(r, e);
27119 }
27120
27121 #[simd_test(enable = "avx512fp16")]
27122 unsafe fn test_mm512_maskz_cvtph_pd() {
27123 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27124 let r = _mm512_maskz_cvtph_pd(0b01010101, a);
27125 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27126 assert_eq_m512d(r, e);
27127 }
27128
27129 #[simd_test(enable = "avx512fp16")]
27130 unsafe fn test_mm512_cvt_roundph_pd() {
27131 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27132 let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27133 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27134 assert_eq_m512d(r, e);
27135 }
27136
27137 #[simd_test(enable = "avx512fp16")]
27138 unsafe fn test_mm512_mask_cvt_roundph_pd() {
27139 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27140 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27141 let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27142 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27143 assert_eq_m512d(r, e);
27144 }
27145
27146 #[simd_test(enable = "avx512fp16")]
27147 unsafe fn test_mm512_maskz_cvt_roundph_pd() {
27148 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27149 let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
27150 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27151 assert_eq_m512d(r, e);
27152 }
27153
27154 #[simd_test(enable = "avx512fp16")]
27155 unsafe fn test_mm_cvtsh_sd() {
27156 let a = _mm_setr_pd(2.0, 20.0);
27157 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27158 let r = _mm_cvtsh_sd(a, b);
27159 let e = _mm_setr_pd(1.0, 20.0);
27160 assert_eq_m128d(r, e);
27161 }
27162
27163 #[simd_test(enable = "avx512fp16")]
27164 unsafe fn test_mm_mask_cvtsh_sd() {
27165 let src = _mm_setr_pd(3.0, 11.0);
27166 let a = _mm_setr_pd(2.0, 20.0);
27167 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27168 let r = _mm_mask_cvtsh_sd(src, 0, a, b);
27169 let e = _mm_setr_pd(3.0, 20.0);
27170 assert_eq_m128d(r, e);
27171 let r = _mm_mask_cvtsh_sd(src, 1, a, b);
27172 let e = _mm_setr_pd(1.0, 20.0);
27173 assert_eq_m128d(r, e);
27174 }
27175
27176 #[simd_test(enable = "avx512fp16")]
27177 unsafe fn test_mm_maskz_cvtsh_sd() {
27178 let a = _mm_setr_pd(2.0, 20.0);
27179 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27180 let r = _mm_maskz_cvtsh_sd(0, a, b);
27181 let e = _mm_setr_pd(0.0, 20.0);
27182 assert_eq_m128d(r, e);
27183 let r = _mm_maskz_cvtsh_sd(1, a, b);
27184 let e = _mm_setr_pd(1.0, 20.0);
27185 assert_eq_m128d(r, e);
27186 }
27187
27188 #[simd_test(enable = "avx512fp16")]
27189 unsafe fn test_mm_cvt_roundsh_sd() {
27190 let a = _mm_setr_pd(2.0, 20.0);
27191 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27192 let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27193 let e = _mm_setr_pd(1.0, 20.0);
27194 assert_eq_m128d(r, e);
27195 }
27196
27197 #[simd_test(enable = "avx512fp16")]
27198 unsafe fn test_mm_mask_cvt_roundsh_sd() {
27199 let src = _mm_setr_pd(3.0, 11.0);
27200 let a = _mm_setr_pd(2.0, 20.0);
27201 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27202 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27203 let e = _mm_setr_pd(3.0, 20.0);
27204 assert_eq_m128d(r, e);
27205 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27206 let e = _mm_setr_pd(1.0, 20.0);
27207 assert_eq_m128d(r, e);
27208 }
27209
27210 #[simd_test(enable = "avx512fp16")]
27211 unsafe fn test_mm_maskz_cvt_roundsh_sd() {
27212 let a = _mm_setr_pd(2.0, 20.0);
27213 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27214 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
27215 let e = _mm_setr_pd(0.0, 20.0);
27216 assert_eq_m128d(r, e);
27217 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b);
27218 let e = _mm_setr_pd(1.0, 20.0);
27219 assert_eq_m128d(r, e);
27220 }
27221
27222 #[simd_test(enable = "avx512fp16")]
27223 unsafe fn test_mm_cvtsh_h() {
27224 let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0);
27225 let r = _mm_cvtsh_h(a);
27226 assert_eq!(r, 1.0);
27227 }
27228
27229 #[simd_test(enable = "avx512fp16")]
27230 unsafe fn test_mm256_cvtsh_h() {
27231 let a = _mm256_setr_ph(
27232 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27233 );
27234 let r = _mm256_cvtsh_h(a);
27235 assert_eq!(r, 1.0);
27236 }
27237
27238 #[simd_test(enable = "avx512fp16")]
27239 unsafe fn test_mm512_cvtsh_h() {
27240 let a = _mm512_setr_ph(
27241 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27242 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
27243 31.0, 32.0,
27244 );
27245 let r = _mm512_cvtsh_h(a);
27246 assert_eq!(r, 1.0);
27247 }
27248
27249 #[simd_test(enable = "avx512fp16")]
27250 unsafe fn test_mm_cvtsi128_si16() {
27251 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
27252 let r = _mm_cvtsi128_si16(a);
27253 assert_eq!(r, 1);
27254 }
27255
27256 #[simd_test(enable = "avx512fp16")]
27257 unsafe fn test_mm_cvtsi16_si128() {
27258 let a = 1;
27259 let r = _mm_cvtsi16_si128(a);
27260 let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
27261 assert_eq_m128i(r, e);
27262 }
27263}
27264