1use crate::arch::asm;
2use crate::core_arch::{simd::*, x86::*};
3use crate::intrinsics::{fmaf16, simd::*};
4use crate::ptr;
5
6/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
7///
8/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ph)
9#[inline]
10#[target_feature(enable = "avx512fp16")]
11#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12pub fn _mm_set_ph(
13 e7: f16,
14 e6: f16,
15 e5: f16,
16 e4: f16,
17 e3: f16,
18 e2: f16,
19 e1: f16,
20 e0: f16,
21) -> __m128h {
22 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
23}
24
25/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ph)
28#[inline]
29#[target_feature(enable = "avx512fp16")]
30#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
31pub fn _mm256_set_ph(
32 e15: f16,
33 e14: f16,
34 e13: f16,
35 e12: f16,
36 e11: f16,
37 e10: f16,
38 e9: f16,
39 e8: f16,
40 e7: f16,
41 e6: f16,
42 e5: f16,
43 e4: f16,
44 e3: f16,
45 e2: f16,
46 e1: f16,
47 e0: f16,
48) -> __m256h {
49 __m256h([
50 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
51 ])
52}
53
54/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values.
55///
56/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ph)
57#[inline]
58#[target_feature(enable = "avx512fp16")]
59#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
60pub fn _mm512_set_ph(
61 e31: f16,
62 e30: f16,
63 e29: f16,
64 e28: f16,
65 e27: f16,
66 e26: f16,
67 e25: f16,
68 e24: f16,
69 e23: f16,
70 e22: f16,
71 e21: f16,
72 e20: f16,
73 e19: f16,
74 e18: f16,
75 e17: f16,
76 e16: f16,
77 e15: f16,
78 e14: f16,
79 e13: f16,
80 e12: f16,
81 e11: f16,
82 e10: f16,
83 e9: f16,
84 e8: f16,
85 e7: f16,
86 e6: f16,
87 e5: f16,
88 e4: f16,
89 e3: f16,
90 e2: f16,
91 e1: f16,
92 e0: f16,
93) -> __m512h {
94 __m512h([
95 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
96 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
97 ])
98}
99
100/// Copy half-precision (16-bit) floating-point elements from a to the lower element of dst and zero
101/// the upper 7 elements.
102///
103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sh)
104#[inline]
105#[target_feature(enable = "avx512fp16")]
106#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
107pub fn _mm_set_sh(a: f16) -> __m128h {
108 __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
109}
110
111/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
112///
113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ph)
114#[inline]
115#[target_feature(enable = "avx512fp16")]
116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
117pub fn _mm_set1_ph(a: f16) -> __m128h {
118 unsafe { transmute(src:f16x8::splat(a)) }
119}
120
121/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
122///
123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ph)
124#[inline]
125#[target_feature(enable = "avx512fp16")]
126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127pub fn _mm256_set1_ph(a: f16) -> __m256h {
128 unsafe { transmute(src:f16x16::splat(a)) }
129}
130
131/// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst.
132///
133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ph)
134#[inline]
135#[target_feature(enable = "avx512fp16")]
136#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
137pub fn _mm512_set1_ph(a: f16) -> __m512h {
138 unsafe { transmute(src:f16x32::splat(a)) }
139}
140
141/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
142///
143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ph)
144#[inline]
145#[target_feature(enable = "avx512fp16")]
146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
147pub fn _mm_setr_ph(
148 e0: f16,
149 e1: f16,
150 e2: f16,
151 e3: f16,
152 e4: f16,
153 e5: f16,
154 e6: f16,
155 e7: f16,
156) -> __m128h {
157 __m128h([e0, e1, e2, e3, e4, e5, e6, e7])
158}
159
160/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
161///
162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ph)
163#[inline]
164#[target_feature(enable = "avx512fp16")]
165#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
166pub fn _mm256_setr_ph(
167 e0: f16,
168 e1: f16,
169 e2: f16,
170 e3: f16,
171 e4: f16,
172 e5: f16,
173 e6: f16,
174 e7: f16,
175 e8: f16,
176 e9: f16,
177 e10: f16,
178 e11: f16,
179 e12: f16,
180 e13: f16,
181 e14: f16,
182 e15: f16,
183) -> __m256h {
184 __m256h([
185 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
186 ])
187}
188
189/// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order.
190///
191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ph)
192#[inline]
193#[target_feature(enable = "avx512fp16")]
194#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
195pub fn _mm512_setr_ph(
196 e0: f16,
197 e1: f16,
198 e2: f16,
199 e3: f16,
200 e4: f16,
201 e5: f16,
202 e6: f16,
203 e7: f16,
204 e8: f16,
205 e9: f16,
206 e10: f16,
207 e11: f16,
208 e12: f16,
209 e13: f16,
210 e14: f16,
211 e15: f16,
212 e16: f16,
213 e17: f16,
214 e18: f16,
215 e19: f16,
216 e20: f16,
217 e21: f16,
218 e22: f16,
219 e23: f16,
220 e24: f16,
221 e25: f16,
222 e26: f16,
223 e27: f16,
224 e28: f16,
225 e29: f16,
226 e30: f16,
227 e31: f16,
228) -> __m512h {
229 __m512h([
230 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
231 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
232 ])
233}
234
235/// Return vector of type __m128h with all elements set to zero.
236///
237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
238#[inline]
239#[target_feature(enable = "avx512fp16,avx512vl")]
240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
241pub fn _mm_setzero_ph() -> __m128h {
242 unsafe { transmute(src:f16x8::ZERO) }
243}
244
245/// Return vector of type __m256h with all elements set to zero.
246///
247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
248#[inline]
249#[target_feature(enable = "avx512fp16,avx512vl")]
250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
251pub fn _mm256_setzero_ph() -> __m256h {
252 f16x16::ZERO.as_m256h()
253}
254
255/// Return vector of type __m512h with all elements set to zero.
256///
257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
258#[inline]
259#[target_feature(enable = "avx512fp16")]
260#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
261pub fn _mm512_setzero_ph() -> __m512h {
262 f16x32::ZERO.as_m512h()
263}
264
265/// Return vector of type `__m128h` with indetermination elements.
266/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
267/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
268/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
269///
270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
271#[inline]
272#[target_feature(enable = "avx512fp16,avx512vl")]
273#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
274pub fn _mm_undefined_ph() -> __m128h {
275 f16x8::ZERO.as_m128h()
276}
277
278/// Return vector of type `__m256h` with indetermination elements.
279/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
280/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
281/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
282///
283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
284#[inline]
285#[target_feature(enable = "avx512fp16,avx512vl")]
286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
287pub fn _mm256_undefined_ph() -> __m256h {
288 f16x16::ZERO.as_m256h()
289}
290
291/// Return vector of type `__m512h` with indetermination elements.
292/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
293/// picks some valid value and is not equivalent to [`mem::MaybeUninit`](crate::mem::MaybeUninit).
294/// In practice, this is typically equivalent to [`mem::zeroed`](crate::mem::zeroed).
295///
296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
297#[inline]
298#[target_feature(enable = "avx512fp16")]
299#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
300pub fn _mm512_undefined_ph() -> __m512h {
301 f16x32::ZERO.as_m512h()
302}
303
304/// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and
305/// does not generate any instructions, thus it has zero latency.
306///
307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
308#[inline]
309#[target_feature(enable = "avx512fp16")]
310#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
311pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
312 unsafe { transmute(src:a) }
313}
314
315/// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and
316/// does not generate any instructions, thus it has zero latency.
317///
318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
319#[inline]
320#[target_feature(enable = "avx512fp16")]
321#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
322pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
323 unsafe { transmute(src:a) }
324}
325
326/// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and
327/// does not generate any instructions, thus it has zero latency.
328///
329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
330#[inline]
331#[target_feature(enable = "avx512fp16")]
332#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
333pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
334 unsafe { transmute(src:a) }
335}
336
337/// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and
338/// does not generate any instructions, thus it has zero latency.
339///
340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
341#[inline]
342#[target_feature(enable = "avx512fp16")]
343#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
344pub fn _mm_castph_pd(a: __m128h) -> __m128d {
345 unsafe { transmute(src:a) }
346}
347
348/// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and
349/// does not generate any instructions, thus it has zero latency.
350///
351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
352#[inline]
353#[target_feature(enable = "avx512fp16")]
354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
355pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
356 unsafe { transmute(src:a) }
357}
358
359/// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and
360/// does not generate any instructions, thus it has zero latency.
361///
362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
363#[inline]
364#[target_feature(enable = "avx512fp16")]
365#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
366pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
367 unsafe { transmute(src:a) }
368}
369
370/// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and
371/// does not generate any instructions, thus it has zero latency.
372///
373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
374#[inline]
375#[target_feature(enable = "avx512fp16")]
376#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
377pub fn _mm_castps_ph(a: __m128) -> __m128h {
378 unsafe { transmute(src:a) }
379}
380
381/// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and
382/// does not generate any instructions, thus it has zero latency.
383///
384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
385#[inline]
386#[target_feature(enable = "avx512fp16")]
387#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
388pub fn _mm256_castps_ph(a: __m256) -> __m256h {
389 unsafe { transmute(src:a) }
390}
391
392/// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and
393/// does not generate any instructions, thus it has zero latency.
394///
395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
396#[inline]
397#[target_feature(enable = "avx512fp16")]
398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
399pub fn _mm512_castps_ph(a: __m512) -> __m512h {
400 unsafe { transmute(src:a) }
401}
402
403/// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and
404/// does not generate any instructions, thus it has zero latency.
405///
406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
407#[inline]
408#[target_feature(enable = "avx512fp16")]
409#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
410pub fn _mm_castph_ps(a: __m128h) -> __m128 {
411 unsafe { transmute(src:a) }
412}
413
414/// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and
415/// does not generate any instructions, thus it has zero latency.
416///
417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
418#[inline]
419#[target_feature(enable = "avx512fp16")]
420#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
421pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
422 unsafe { transmute(src:a) }
423}
424
425/// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and
426/// does not generate any instructions, thus it has zero latency.
427///
428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
429#[inline]
430#[target_feature(enable = "avx512fp16")]
431#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
432pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
433 unsafe { transmute(src:a) }
434}
435
436/// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and
437/// does not generate any instructions, thus it has zero latency.
438///
439/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
440#[inline]
441#[target_feature(enable = "avx512fp16")]
442#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
443pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
444 unsafe { transmute(src:a) }
445}
446
447/// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and
448/// does not generate any instructions, thus it has zero latency.
449///
450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
451#[inline]
452#[target_feature(enable = "avx512fp16")]
453#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
454pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
455 unsafe { transmute(src:a) }
456}
457
458/// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and
459/// does not generate any instructions, thus it has zero latency.
460///
461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
462#[inline]
463#[target_feature(enable = "avx512fp16")]
464#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
465pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
466 unsafe { transmute(src:a) }
467}
468
469/// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and
470/// does not generate any instructions, thus it has zero latency.
471///
472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
473#[inline]
474#[target_feature(enable = "avx512fp16")]
475#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
476pub fn _mm_castph_si128(a: __m128h) -> __m128i {
477 unsafe { transmute(src:a) }
478}
479
480/// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and
481/// does not generate any instructions, thus it has zero latency.
482///
483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
484#[inline]
485#[target_feature(enable = "avx512fp16")]
486#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
487pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
488 unsafe { transmute(src:a) }
489}
490
491/// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and
492/// does not generate any instructions, thus it has zero latency.
493///
494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
495#[inline]
496#[target_feature(enable = "avx512fp16")]
497#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
498pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
499 unsafe { transmute(src:a) }
500}
501
502/// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and
503/// does not generate any instructions, thus it has zero latency.
504///
505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
506#[inline]
507#[target_feature(enable = "avx512fp16")]
508#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
509pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
510 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
511}
512
513/// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and
514/// does not generate any instructions, thus it has zero latency.
515///
516/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
517#[inline]
518#[target_feature(enable = "avx512fp16")]
519#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
520pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
521 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
522}
523
524/// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and
525/// does not generate any instructions, thus it has zero latency.
526///
527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
528#[inline]
529#[target_feature(enable = "avx512fp16")]
530#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
531pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
532 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
533}
534
535/// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined.
536/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
537/// but most of the time it does not generate any instructions.
538///
539/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
540#[inline]
541#[target_feature(enable = "avx512fp16")]
542#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
543pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
544 unsafe {
545 simd_shuffle!(
546 a,
547 _mm_undefined_ph(),
548 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
549 )
550 }
551}
552
553/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined.
554/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
555/// but most of the time it does not generate any instructions.
556///
557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
558#[inline]
559#[target_feature(enable = "avx512fp16")]
560#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
561pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
562 unsafe {
563 simd_shuffle!(
564 a,
565 _mm_undefined_ph(),
566 [
567 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
568 8, 8, 8, 8
569 ]
570 )
571 }
572}
573
574/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined.
575/// In practice, the upper elements are zeroed. This intrinsic can generate the `vzeroupper` instruction,
576/// but most of the time it does not generate any instructions.
577///
578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
579#[inline]
580#[target_feature(enable = "avx512fp16")]
581#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
582pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
583 unsafe {
584 simd_shuffle!(
585 a,
586 _mm256_undefined_ph(),
587 [
588 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
589 16, 16, 16, 16, 16, 16, 16, 16, 16
590 ]
591 )
592 }
593}
594
595/// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed.
596/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
597/// any instructions.
598///
599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
600#[inline]
601#[target_feature(enable = "avx512fp16")]
602#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
603pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
604 unsafe {
605 simd_shuffle!(
606 a,
607 _mm_setzero_ph(),
608 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
609 )
610 }
611}
612
613/// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed.
614/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
615/// any instructions.
616///
617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
618#[inline]
619#[target_feature(enable = "avx512fp16")]
620#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
621pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
622 unsafe {
623 simd_shuffle!(
624 a,
625 _mm256_setzero_ph(),
626 [
627 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16,
628 16, 16, 16, 16, 16, 16, 16, 16, 16
629 ]
630 )
631 }
632}
633
634/// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed.
635/// This intrinsic can generate the `vzeroupper` instruction, but most of the time it does not generate
636/// any instructions.
637///
638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
639#[inline]
640#[target_feature(enable = "avx512fp16")]
641#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
642pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
643 unsafe {
644 simd_shuffle!(
645 a,
646 _mm_setzero_ph(),
647 [
648 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
649 8, 8, 8, 8
650 ]
651 )
652 }
653}
654
655macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
656 ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
657 let dst: $mask_type;
658 asm!(
659 "vcmpph {k}, {a}, {b}, {imm8}",
660 k = lateout(kreg) dst,
661 a = in($reg) $a,
662 b = in($reg) $b,
663 imm8 = const IMM5,
664 options(pure, nomem, nostack)
665 );
666 dst
667 }};
668 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
669 let dst: $mask_type;
670 asm!(
671 "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
672 k = lateout(kreg) dst,
673 mask = in(kreg) $mask,
674 a = in($reg) $a,
675 b = in($reg) $b,
676 imm8 = const IMM5,
677 options(pure, nomem, nostack)
678 );
679 dst
680 }};
681}
682
683/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
684/// operand specified by imm8, and store the results in mask vector k.
685///
686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ph_mask)
687#[inline]
688#[target_feature(enable = "avx512fp16,avx512vl")]
689#[rustc_legacy_const_generics(2)]
690#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
691pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
692 unsafe {
693 static_assert_uimm_bits!(IMM5, 5);
694 cmp_asm!(__mmask8, xmm_reg, a, b)
695 }
696}
697
698/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
699/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
700/// zeroed out when the corresponding mask bit is not set).
701///
702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ph_mask)
703#[inline]
704#[target_feature(enable = "avx512fp16,avx512vl")]
705#[rustc_legacy_const_generics(3)]
706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
707pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
708 unsafe {
709 static_assert_uimm_bits!(IMM5, 5);
710 cmp_asm!(__mmask8, k1, xmm_reg, a, b)
711 }
712}
713
714/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
715/// operand specified by imm8, and store the results in mask vector k.
716///
717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ph_mask)
718#[inline]
719#[target_feature(enable = "avx512fp16,avx512vl")]
720#[rustc_legacy_const_generics(2)]
721#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
722pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
723 unsafe {
724 static_assert_uimm_bits!(IMM5, 5);
725 cmp_asm!(__mmask16, ymm_reg, a, b)
726 }
727}
728
729/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
730/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
731/// zeroed out when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_ph_mask)
734#[inline]
735#[target_feature(enable = "avx512fp16,avx512vl")]
736#[rustc_legacy_const_generics(3)]
737#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
738pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
739 k1: __mmask16,
740 a: __m256h,
741 b: __m256h,
742) -> __mmask16 {
743 unsafe {
744 static_assert_uimm_bits!(IMM5, 5);
745 cmp_asm!(__mmask16, k1, ymm_reg, a, b)
746 }
747}
748
749/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
750/// operand specified by imm8, and store the results in mask vector k.
751///
752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ph_mask)
753#[inline]
754#[target_feature(enable = "avx512fp16")]
755#[rustc_legacy_const_generics(2)]
756#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
757pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
758 unsafe {
759 static_assert_uimm_bits!(IMM5, 5);
760 cmp_asm!(__mmask32, zmm_reg, a, b)
761 }
762}
763
764/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
765/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
766/// zeroed out when the corresponding mask bit is not set).
767///
768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ph_mask)
769#[inline]
770#[target_feature(enable = "avx512fp16")]
771#[rustc_legacy_const_generics(3)]
772#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
773pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
774 k1: __mmask32,
775 a: __m512h,
776 b: __m512h,
777) -> __mmask32 {
778 unsafe {
779 static_assert_uimm_bits!(IMM5, 5);
780 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
781 }
782}
783
784/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
785/// operand specified by imm8, and store the results in mask vector k.
786///
787/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
788///
789/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
790#[inline]
791#[target_feature(enable = "avx512fp16")]
792#[rustc_legacy_const_generics(2, 3)]
793#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
794pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
795 a: __m512h,
796 b: __m512h,
797) -> __mmask32 {
798 unsafe {
799 static_assert_uimm_bits!(IMM5, 5);
800 static_assert_sae!(SAE);
801 if SAE == _MM_FROUND_NO_EXC {
802 let dst: __mmask32;
803 asm!(
804 "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
805 k = lateout(kreg) dst,
806 a = in(zmm_reg) a,
807 b = in(zmm_reg) b,
808 imm8 = const IMM5,
809 options(pure, nomem, nostack)
810 );
811 dst
812 } else {
813 cmp_asm!(__mmask32, zmm_reg, a, b)
814 }
815 }
816}
817
818/// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
819/// operand specified by imm8, and store the results in mask vector k using zeromask k (elements are
820/// zeroed out when the corresponding mask bit is not set).
821///
822/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
823///
824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
825#[inline]
826#[target_feature(enable = "avx512fp16")]
827#[rustc_legacy_const_generics(3, 4)]
828#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
829pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
830 k1: __mmask32,
831 a: __m512h,
832 b: __m512h,
833) -> __mmask32 {
834 unsafe {
835 static_assert_uimm_bits!(IMM5, 5);
836 static_assert_sae!(SAE);
837 if SAE == _MM_FROUND_NO_EXC {
838 let dst: __mmask32;
839 asm!(
840 "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
841 k = lateout(kreg) dst,
842 k1 = in(kreg) k1,
843 a = in(zmm_reg) a,
844 b = in(zmm_reg) b,
845 imm8 = const IMM5,
846 options(pure, nomem, nostack)
847 );
848 dst
849 } else {
850 cmp_asm!(__mmask32, k1, zmm_reg, a, b)
851 }
852 }
853}
854
855/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
856/// operand specified by imm8, and store the result in mask vector k. Exceptions can be suppressed by
857/// passing _MM_FROUND_NO_EXC in the sae parameter.
858///
859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sh_mask)
860#[inline]
861#[target_feature(enable = "avx512fp16")]
862#[rustc_legacy_const_generics(2, 3)]
863#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
864pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
865 static_assert_uimm_bits!(IMM5, 5);
866 static_assert_sae!(SAE);
867 _mm_mask_cmp_round_sh_mask::<IMM5, SAE>(k1:0xff, a, b)
868}
869
870/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
871/// operand specified by imm8, and store the result in mask vector k using zeromask k1. Exceptions can be
872/// suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
873///
874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sh_mask)
875#[inline]
876#[target_feature(enable = "avx512fp16")]
877#[rustc_legacy_const_generics(3, 4)]
878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
879pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
880 k1: __mmask8,
881 a: __m128h,
882 b: __m128h,
883) -> __mmask8 {
884 unsafe {
885 static_assert_uimm_bits!(IMM5, 5);
886 static_assert_sae!(SAE);
887 vcmpsh(a, b, IMM5, mask:k1, SAE)
888 }
889}
890
891/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
892/// operand specified by imm8, and store the result in mask vector k.
893///
894/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sh_mask)
895#[inline]
896#[target_feature(enable = "avx512fp16")]
897#[rustc_legacy_const_generics(2)]
898#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
899pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
900 static_assert_uimm_bits!(IMM5, 5);
901 _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
902}
903
904/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
905/// operand specified by imm8, and store the result in mask vector k using zeromask k1.
906///
907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sh_mask)
908#[inline]
909#[target_feature(enable = "avx512fp16")]
910#[rustc_legacy_const_generics(3)]
911#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
912pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
913 static_assert_uimm_bits!(IMM5, 5);
914 _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
915}
916
917/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
918/// operand specified by imm8, and return the boolean result (0 or 1).
919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
920///
921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sh)
922#[inline]
923#[target_feature(enable = "avx512fp16")]
924#[rustc_legacy_const_generics(2, 3)]
925#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
926pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
927 unsafe {
928 static_assert_uimm_bits!(IMM5, 5);
929 static_assert_sae!(SAE);
930 vcomish(a, b, IMM5, SAE)
931 }
932}
933
934/// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison
935/// operand specified by imm8, and return the boolean result (0 or 1).
936///
937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_sh)
938#[inline]
939#[target_feature(enable = "avx512fp16")]
940#[rustc_legacy_const_generics(2)]
941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
942pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
943 static_assert_uimm_bits!(IMM5, 5);
944 _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
945}
946
947/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and return
948/// the boolean result (0 or 1).
949///
950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
951#[inline]
952#[target_feature(enable = "avx512fp16")]
953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
954pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
955 _mm_comi_sh::<_CMP_EQ_OS>(a, b)
956}
957
958/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
959/// and return the boolean result (0 or 1).
960///
961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
962#[inline]
963#[target_feature(enable = "avx512fp16")]
964#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
965pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
966 _mm_comi_sh::<_CMP_GE_OS>(a, b)
967}
968
969/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
970/// the boolean result (0 or 1).
971///
972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
973#[inline]
974#[target_feature(enable = "avx512fp16")]
975#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
976pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
977 _mm_comi_sh::<_CMP_GT_OS>(a, b)
978}
979
980/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
981/// return the boolean result (0 or 1).
982///
983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
984#[inline]
985#[target_feature(enable = "avx512fp16")]
986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
987pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
988 _mm_comi_sh::<_CMP_LE_OS>(a, b)
989}
990
991/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
992/// the boolean result (0 or 1).
993///
994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
995#[inline]
996#[target_feature(enable = "avx512fp16")]
997#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
998pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
999 _mm_comi_sh::<_CMP_LT_OS>(a, b)
1000}
1001
1002/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1003/// the boolean result (0 or 1).
1004///
1005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
1006#[inline]
1007#[target_feature(enable = "avx512fp16")]
1008#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1009pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1010 _mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1011}
1012
1013/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
1014/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1015///
1016/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
1017#[inline]
1018#[target_feature(enable = "avx512fp16")]
1019#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1020pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
1021 _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
1022}
1023
1024/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than-or-equal,
1025/// and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1026///
1027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
1028#[inline]
1029#[target_feature(enable = "avx512fp16")]
1030#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1031pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
1032 _mm_comi_sh::<_CMP_GE_OQ>(a, b)
1033}
1034
1035/// Compare the lower half-precision (16-bit) floating-point elements in a and b for greater-than, and return
1036/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1037///
1038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
1039#[inline]
1040#[target_feature(enable = "avx512fp16")]
1041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1042pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
1043 _mm_comi_sh::<_CMP_GT_OQ>(a, b)
1044}
1045
1046/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than-or-equal, and
1047/// return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1048///
1049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
1050#[inline]
1051#[target_feature(enable = "avx512fp16")]
1052#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1053pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
1054 _mm_comi_sh::<_CMP_LE_OQ>(a, b)
1055}
1056
1057/// Compare the lower half-precision (16-bit) floating-point elements in a and b for less-than, and return
1058/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1059///
1060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
1061#[inline]
1062#[target_feature(enable = "avx512fp16")]
1063#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1064pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
1065 _mm_comi_sh::<_CMP_LT_OQ>(a, b)
1066}
1067
1068/// Compare the lower half-precision (16-bit) floating-point elements in a and b for not-equal, and return
1069/// the boolean result (0 or 1). This instruction will not signal an exception for QNaNs.
1070///
1071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
1072#[inline]
1073#[target_feature(enable = "avx512fp16")]
1074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1075pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1076 _mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1077}
1078
1079/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1080/// a new vector. The address must be aligned to 16 bytes or a general-protection exception may be generated.
1081///
1082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ph)
1083#[inline]
1084#[target_feature(enable = "avx512fp16,avx512vl")]
1085#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1086pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
1087 *mem_addr.cast()
1088}
1089
1090/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1091/// a new vector. The address must be aligned to 32 bytes or a general-protection exception may be generated.
1092///
1093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ph)
1094#[inline]
1095#[target_feature(enable = "avx512fp16,avx512vl")]
1096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1097pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
1098 *mem_addr.cast()
1099}
1100
1101/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1102/// a new vector. The address must be aligned to 64 bytes or a general-protection exception may be generated.
1103///
1104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ph)
1105#[inline]
1106#[target_feature(enable = "avx512fp16")]
1107#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1108pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
1109 *mem_addr.cast()
1110}
1111
1112/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector,
1113/// and zero the upper elements
1114///
1115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sh)
1116#[inline]
1117#[target_feature(enable = "avx512fp16")]
1118#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1119pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
1120 _mm_set_sh(*mem_addr)
1121}
1122
1123/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1124/// using writemask k (the element is copied from src when mask bit 0 is not set), and zero the upper elements.
1125///
1126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sh)
1127#[inline]
1128#[target_feature(enable = "avx512fp16")]
1129#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1130pub unsafe fn _mm_mask_load_sh(src: __m128h, k: __mmask8, mem_addr: *const f16) -> __m128h {
1131 let mut dst: __m128h = src;
1132 asm!(
1133 vpl!("vmovsh {dst}{{{k}}}"),
1134 dst = inout(xmm_reg) dst,
1135 k = in(kreg) k,
1136 p = in(reg) mem_addr,
1137 options(pure, readonly, nostack, preserves_flags)
1138 );
1139 dst
1140}
1141
1142/// Load a half-precision (16-bit) floating-point element from memory into the lower element of a new vector
1143/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and zero the upper elements.
1144///
1145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sh)
1146#[inline]
1147#[target_feature(enable = "avx512fp16")]
1148#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1149pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
1150 let mut dst: __m128h;
1151 asm!(
1152 vpl!("vmovsh {dst}{{{k}}}{{z}}"),
1153 dst = out(xmm_reg) dst,
1154 k = in(kreg) k,
1155 p = in(reg) mem_addr,
1156 options(pure, readonly, nostack, preserves_flags)
1157 );
1158 dst
1159}
1160
1161/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
1162/// a new vector. The address does not need to be aligned to any particular boundary.
1163///
1164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ph)
1165#[inline]
1166#[target_feature(enable = "avx512fp16,avx512vl")]
1167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1168pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
1169 ptr::read_unaligned(src:mem_addr.cast())
1170}
1171
1172/// Load 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from memory into
1173/// a new vector. The address does not need to be aligned to any particular boundary.
1174///
1175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ph)
1176#[inline]
1177#[target_feature(enable = "avx512fp16,avx512vl")]
1178#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1179pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
1180 ptr::read_unaligned(src:mem_addr.cast())
1181}
1182
1183/// Load 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from memory into
1184/// a new vector. The address does not need to be aligned to any particular boundary.
1185///
1186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ph)
1187#[inline]
1188#[target_feature(enable = "avx512fp16")]
1189#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1190pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
1191 ptr::read_unaligned(src:mem_addr.cast())
1192}
1193
1194/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1195/// using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper
1196/// 7 packed elements from a to the upper elements of dst.
1197///
1198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
1199#[inline]
1200#[target_feature(enable = "avx512fp16")]
1201#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1202pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1203 unsafe {
1204 let mut mov: f16 = simd_extract!(src, 0);
1205 if (k & 1) != 0 {
1206 mov = simd_extract!(b, 0);
1207 }
1208 simd_insert!(a, 0, mov)
1209 }
1210}
1211
1212/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst
1213/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
1214/// elements from a to the upper elements of dst.
1215///
1216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
1217#[inline]
1218#[target_feature(enable = "avx512fp16")]
1219#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1220pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1221 unsafe {
1222 let mut mov: f16 = 0.;
1223 if (k & 1) != 0 {
1224 mov = simd_extract!(b, 0);
1225 }
1226 simd_insert!(a, 0, mov)
1227 }
1228}
1229
1230/// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst,
1231/// and copy the upper 7 packed elements from a to the upper elements of dst.
1232///
1233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
1234#[inline]
1235#[target_feature(enable = "avx512fp16")]
1236#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1237pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
1238 unsafe {
1239 let mov: f16 = simd_extract!(b, 0);
1240 simd_insert!(a, 0, mov)
1241 }
1242}
1243
1244/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1245/// The address must be aligned to 16 bytes or a general-protection exception may be generated.
1246///
1247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ph)
1248#[inline]
1249#[target_feature(enable = "avx512fp16,avx512vl")]
1250#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1251pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
1252 *mem_addr.cast() = a;
1253}
1254
1255/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1256/// The address must be aligned to 32 bytes or a general-protection exception may be generated.
1257///
1258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ph)
1259#[inline]
1260#[target_feature(enable = "avx512fp16,avx512vl")]
1261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1262pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
1263 *mem_addr.cast() = a;
1264}
1265
1266/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1267/// The address must be aligned to 64 bytes or a general-protection exception may be generated.
1268///
1269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ph)
1270#[inline]
1271#[target_feature(enable = "avx512fp16")]
1272#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1273pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
1274 *mem_addr.cast() = a;
1275}
1276
1277/// Store the lower half-precision (16-bit) floating-point element from a into memory.
1278///
1279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sh)
1280#[inline]
1281#[target_feature(enable = "avx512fp16")]
1282#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1283pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
1284 *mem_addr = simd_extract!(a, 0);
1285}
1286
1287/// Store the lower half-precision (16-bit) floating-point element from a into memory using writemask k
1288///
1289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sh)
1290#[inline]
1291#[target_feature(enable = "avx512fp16")]
1292#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1293pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
1294 asm!(
1295 vps!("vmovdqu16", "{{{k}}}, {src}"),
1296 p = in(reg) mem_addr,
1297 k = in(kreg) k,
1298 src = in(xmm_reg) a,
1299 options(nostack, preserves_flags)
1300 );
1301}
1302
1303/// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory.
1304/// The address does not need to be aligned to any particular boundary.
1305///
1306/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ph)
1307#[inline]
1308#[target_feature(enable = "avx512fp16,avx512vl")]
1309#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1310pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
1311 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1312}
1313
1314/// Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from a into memory.
1315/// The address does not need to be aligned to any particular boundary.
1316///
1317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ph)
1318#[inline]
1319#[target_feature(enable = "avx512fp16,avx512vl")]
1320#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1321pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
1322 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1323}
1324
1325/// Store 512-bits (composed of 32 packed half-precision (16-bit) floating-point elements) from a into memory.
1326/// The address does not need to be aligned to any particular boundary.
1327///
1328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ph)
1329#[inline]
1330#[target_feature(enable = "avx512fp16")]
1331#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1332pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
1333 ptr::write_unaligned(dst:mem_addr.cast(), src:a);
1334}
1335
1336/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1337///
1338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ph)
1339#[inline]
1340#[target_feature(enable = "avx512fp16,avx512vl")]
1341#[cfg_attr(test, assert_instr(vaddph))]
1342#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1343pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
1344 unsafe { simd_add(x:a, y:b) }
1345}
1346
1347/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1348/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1349///
1350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ph)
1351#[inline]
1352#[target_feature(enable = "avx512fp16,avx512vl")]
1353#[cfg_attr(test, assert_instr(vaddph))]
1354#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1355pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1356 unsafe {
1357 let r: __m128h = _mm_add_ph(a, b);
1358 simd_select_bitmask(m:k, yes:r, no:src)
1359 }
1360}
1361
1362/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1363/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1364///
1365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ph)
1366#[inline]
1367#[target_feature(enable = "avx512fp16,avx512vl")]
1368#[cfg_attr(test, assert_instr(vaddph))]
1369#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1370pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1371 unsafe {
1372 let r: __m128h = _mm_add_ph(a, b);
1373 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1374 }
1375}
1376
1377/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1378///
1379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ph)
1380#[inline]
1381#[target_feature(enable = "avx512fp16,avx512vl")]
1382#[cfg_attr(test, assert_instr(vaddph))]
1383#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1384pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
1385 unsafe { simd_add(x:a, y:b) }
1386}
1387
1388/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1389/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1390///
1391/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ph)
1392#[inline]
1393#[target_feature(enable = "avx512fp16,avx512vl")]
1394#[cfg_attr(test, assert_instr(vaddph))]
1395#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1396pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1397 unsafe {
1398 let r: __m256h = _mm256_add_ph(a, b);
1399 simd_select_bitmask(m:k, yes:r, no:src)
1400 }
1401}
1402
1403/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1404/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1405///
1406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ph)
1407#[inline]
1408#[target_feature(enable = "avx512fp16,avx512vl")]
1409#[cfg_attr(test, assert_instr(vaddph))]
1410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1411pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1412 unsafe {
1413 let r: __m256h = _mm256_add_ph(a, b);
1414 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1415 }
1416}
1417
1418/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1419///
1420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ph)
1421#[inline]
1422#[target_feature(enable = "avx512fp16")]
1423#[cfg_attr(test, assert_instr(vaddph))]
1424#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1425pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
1426 unsafe { simd_add(x:a, y:b) }
1427}
1428
1429/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1430/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1431///
1432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ph)
1433#[inline]
1434#[target_feature(enable = "avx512fp16")]
1435#[cfg_attr(test, assert_instr(vaddph))]
1436#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1437pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1438 unsafe {
1439 let r: __m512h = _mm512_add_ph(a, b);
1440 simd_select_bitmask(m:k, yes:r, no:src)
1441 }
1442}
1443
1444/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1445/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1446///
1447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ph)
1448#[inline]
1449#[target_feature(enable = "avx512fp16")]
1450#[cfg_attr(test, assert_instr(vaddph))]
1451#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1452pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1453 unsafe {
1454 let r: __m512h = _mm512_add_ph(a, b);
1455 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1456 }
1457}
1458
1459/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1460/// Rounding is done according to the rounding parameter, which can be one of:
1461///
1462/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1463/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1464/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1465/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1466/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1467///
1468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ph)
1469#[inline]
1470#[target_feature(enable = "avx512fp16")]
1471#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1472#[rustc_legacy_const_generics(2)]
1473#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1474pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1475 unsafe {
1476 static_assert_rounding!(ROUNDING);
1477 vaddph(a, b, ROUNDING)
1478 }
1479}
1480
1481/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1482/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1483/// Rounding is done according to the rounding parameter, which can be one of:
1484///
1485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1490///
1491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ph)
1492#[inline]
1493#[target_feature(enable = "avx512fp16")]
1494#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1495#[rustc_legacy_const_generics(4)]
1496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1497pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
1498 src: __m512h,
1499 k: __mmask32,
1500 a: __m512h,
1501 b: __m512h,
1502) -> __m512h {
1503 unsafe {
1504 static_assert_rounding!(ROUNDING);
1505 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1506 simd_select_bitmask(m:k, yes:r, no:src)
1507 }
1508}
1509
1510/// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
1511/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1512/// Rounding is done according to the rounding parameter, which can be one of:
1513///
1514/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1515/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1516/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1517/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1518///
1519/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ph)
1520#[inline]
1521#[target_feature(enable = "avx512fp16")]
1522#[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
1523#[rustc_legacy_const_generics(3)]
1524#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1525pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
1526 k: __mmask32,
1527 a: __m512h,
1528 b: __m512h,
1529) -> __m512h {
1530 unsafe {
1531 static_assert_rounding!(ROUNDING);
1532 let r: __m512h = _mm512_add_round_ph::<ROUNDING>(a, b);
1533 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1534 }
1535}
1536
1537/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1538/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1539/// Rounding is done according to the rounding parameter, which can be one of:
1540///
1541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1546///
1547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sh)
1548#[inline]
1549#[target_feature(enable = "avx512fp16")]
1550#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1551#[rustc_legacy_const_generics(2)]
1552#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1553pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1554 static_assert_rounding!(ROUNDING);
1555 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1556}
1557
1558/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1559/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1560/// writemask k (the element is copied from src when mask bit 0 is not set).
1561/// Rounding is done according to the rounding parameter, which can be one of:
1562///
1563/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1564/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1565/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1566/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1567/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1568///
1569/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sh)
1570#[inline]
1571#[target_feature(enable = "avx512fp16")]
1572#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1573#[rustc_legacy_const_generics(4)]
1574#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1575pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
1576 src: __m128h,
1577 k: __mmask8,
1578 a: __m128h,
1579 b: __m128h,
1580) -> __m128h {
1581 unsafe {
1582 static_assert_rounding!(ROUNDING);
1583 vaddsh(a, b, src, k, ROUNDING)
1584 }
1585}
1586
1587/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1588/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1589/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1590/// Rounding is done according to the rounding parameter, which can be one of:
1591///
1592/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1593/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1594/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1595/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1596/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1597///
1598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sh)
1599#[inline]
1600#[target_feature(enable = "avx512fp16")]
1601#[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
1602#[rustc_legacy_const_generics(3)]
1603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1604pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1605 static_assert_rounding!(ROUNDING);
1606 _mm_mask_add_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1607}
1608
1609/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1610/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1611///
1612/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sh)
1613#[inline]
1614#[target_feature(enable = "avx512fp16")]
1615#[cfg_attr(test, assert_instr(vaddsh))]
1616#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1617pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
1618 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) + _mm_cvtsh_h(b)) }
1619}
1620
1621/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1622/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1623/// writemask k (the element is copied from src when mask bit 0 is not set).
1624///
1625/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sh)
1626#[inline]
1627#[target_feature(enable = "avx512fp16")]
1628#[cfg_attr(test, assert_instr(vaddsh))]
1629#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1630pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1631 unsafe {
1632 let extractsrc: f16 = simd_extract!(src, 0);
1633 let mut add: f16 = extractsrc;
1634 if (k & 0b00000001) != 0 {
1635 let extracta: f16 = simd_extract!(a, 0);
1636 let extractb: f16 = simd_extract!(b, 0);
1637 add = extracta + extractb;
1638 }
1639 simd_insert!(a, 0, add)
1640 }
1641}
1642
1643/// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
1644/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1645/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1646///
1647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sh)
1648#[inline]
1649#[target_feature(enable = "avx512fp16")]
1650#[cfg_attr(test, assert_instr(vaddsh))]
1651#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1652pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1653 unsafe {
1654 let mut add: f16 = 0.;
1655 if (k & 0b00000001) != 0 {
1656 let extracta: f16 = simd_extract!(a, 0);
1657 let extractb: f16 = simd_extract!(b, 0);
1658 add = extracta + extractb;
1659 }
1660 simd_insert!(a, 0, add)
1661 }
1662}
1663
1664/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1665///
1666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ph)
1667#[inline]
1668#[target_feature(enable = "avx512fp16,avx512vl")]
1669#[cfg_attr(test, assert_instr(vsubph))]
1670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1671pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
1672 unsafe { simd_sub(lhs:a, rhs:b) }
1673}
1674
1675/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1676/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1677///
1678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ph)
1679#[inline]
1680#[target_feature(enable = "avx512fp16,avx512vl")]
1681#[cfg_attr(test, assert_instr(vsubph))]
1682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1683pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1684 unsafe {
1685 let r: __m128h = _mm_sub_ph(a, b);
1686 simd_select_bitmask(m:k, yes:r, no:src)
1687 }
1688}
1689
1690/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1691/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ph)
1694#[inline]
1695#[target_feature(enable = "avx512fp16,avx512vl")]
1696#[cfg_attr(test, assert_instr(vsubph))]
1697#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1698pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1699 unsafe {
1700 let r: __m128h = _mm_sub_ph(a, b);
1701 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
1702 }
1703}
1704
1705/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1706///
1707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ph)
1708#[inline]
1709#[target_feature(enable = "avx512fp16,avx512vl")]
1710#[cfg_attr(test, assert_instr(vsubph))]
1711#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1712pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
1713 unsafe { simd_sub(lhs:a, rhs:b) }
1714}
1715
1716/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1717/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ph)
1720#[inline]
1721#[target_feature(enable = "avx512fp16,avx512vl")]
1722#[cfg_attr(test, assert_instr(vsubph))]
1723#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1724pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1725 unsafe {
1726 let r: __m256h = _mm256_sub_ph(a, b);
1727 simd_select_bitmask(m:k, yes:r, no:src)
1728 }
1729}
1730
1731/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1732/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1733///
1734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ph)
1735#[inline]
1736#[target_feature(enable = "avx512fp16,avx512vl")]
1737#[cfg_attr(test, assert_instr(vsubph))]
1738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1739pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
1740 unsafe {
1741 let r: __m256h = _mm256_sub_ph(a, b);
1742 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
1743 }
1744}
1745
1746/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1747///
1748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ph)
1749#[inline]
1750#[target_feature(enable = "avx512fp16")]
1751#[cfg_attr(test, assert_instr(vsubph))]
1752#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1753pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
1754 unsafe { simd_sub(lhs:a, rhs:b) }
1755}
1756
1757/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1758/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1759///
1760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ph)
1761#[inline]
1762#[target_feature(enable = "avx512fp16")]
1763#[cfg_attr(test, assert_instr(vsubph))]
1764#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1765pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1766 unsafe {
1767 let r: __m512h = _mm512_sub_ph(a, b);
1768 simd_select_bitmask(m:k, yes:r, no:src)
1769 }
1770}
1771
1772/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1773/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ph)
1776#[inline]
1777#[target_feature(enable = "avx512fp16")]
1778#[cfg_attr(test, assert_instr(vsubph))]
1779#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1780pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
1781 unsafe {
1782 let r: __m512h = _mm512_sub_ph(a, b);
1783 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1784 }
1785}
1786
1787/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst.
1788/// Rounding is done according to the rounding parameter, which can be one of:
1789///
1790/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1791/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1792/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1793/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1794/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1795///
1796/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ph)
1797#[inline]
1798#[target_feature(enable = "avx512fp16")]
1799#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1800#[rustc_legacy_const_generics(2)]
1801#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1802pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
1803 unsafe {
1804 static_assert_rounding!(ROUNDING);
1805 vsubph(a, b, ROUNDING)
1806 }
1807}
1808
1809/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1810/// writemask k (elements are copied from src when the corresponding mask bit is not set).
1811/// Rounding is done according to the rounding parameter, which can be one of:
1812///
1813/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1814/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1815/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1816/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1817/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1818///
1819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ph)
1820#[inline]
1821#[target_feature(enable = "avx512fp16")]
1822#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1823#[rustc_legacy_const_generics(4)]
1824#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1825pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
1826 src: __m512h,
1827 k: __mmask32,
1828 a: __m512h,
1829 b: __m512h,
1830) -> __m512h {
1831 unsafe {
1832 static_assert_rounding!(ROUNDING);
1833 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1834 simd_select_bitmask(m:k, yes:r, no:src)
1835 }
1836}
1837
1838/// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using
1839/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1840/// Rounding is done according to the rounding parameter, which can be one of:
1841///
1842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1847///
1848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ph)
1849#[inline]
1850#[target_feature(enable = "avx512fp16")]
1851#[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
1852#[rustc_legacy_const_generics(3)]
1853#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1854pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
1855 k: __mmask32,
1856 a: __m512h,
1857 b: __m512h,
1858) -> __m512h {
1859 unsafe {
1860 static_assert_rounding!(ROUNDING);
1861 let r: __m512h = _mm512_sub_round_ph::<ROUNDING>(a, b);
1862 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
1863 }
1864}
1865
1866/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1867/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1868/// Rounding is done according to the rounding parameter, which can be one of:
1869///
1870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1875///
1876/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sh)
1877#[inline]
1878#[target_feature(enable = "avx512fp16")]
1879#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1880#[rustc_legacy_const_generics(2)]
1881#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1882pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
1883 static_assert_rounding!(ROUNDING);
1884 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
1885}
1886
1887/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1888/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1889/// writemask k (the element is copied from src when mask bit 0 is not set).
1890/// Rounding is done according to the rounding parameter, which can be one of:
1891///
1892/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1893/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1894/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1895/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1896/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1897///
1898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sh)
1899#[inline]
1900#[target_feature(enable = "avx512fp16")]
1901#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1902#[rustc_legacy_const_generics(4)]
1903#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1904pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
1905 src: __m128h,
1906 k: __mmask8,
1907 a: __m128h,
1908 b: __m128h,
1909) -> __m128h {
1910 unsafe {
1911 static_assert_rounding!(ROUNDING);
1912 vsubsh(a, b, src, k, ROUNDING)
1913 }
1914}
1915
1916/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1917/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1918/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1919/// Rounding is done according to the rounding parameter, which can be one of:
1920///
1921/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1922/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1923/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1924/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1925/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1926///
1927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sh)
1928#[inline]
1929#[target_feature(enable = "avx512fp16")]
1930#[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
1931#[rustc_legacy_const_generics(3)]
1932#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1933pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1934 static_assert_rounding!(ROUNDING);
1935 _mm_mask_sub_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
1936}
1937
1938/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1939/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
1940///
1941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sh)
1942#[inline]
1943#[target_feature(enable = "avx512fp16")]
1944#[cfg_attr(test, assert_instr(vsubsh))]
1945#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1946pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
1947 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) - _mm_cvtsh_h(b)) }
1948}
1949
1950/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1951/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1952/// writemask k (the element is copied from src when mask bit 0 is not set).
1953///
1954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sh)
1955#[inline]
1956#[target_feature(enable = "avx512fp16")]
1957#[cfg_attr(test, assert_instr(vsubsh))]
1958#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1959pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1960 unsafe {
1961 let extractsrc: f16 = simd_extract!(src, 0);
1962 let mut add: f16 = extractsrc;
1963 if (k & 0b00000001) != 0 {
1964 let extracta: f16 = simd_extract!(a, 0);
1965 let extractb: f16 = simd_extract!(b, 0);
1966 add = extracta - extractb;
1967 }
1968 simd_insert!(a, 0, add)
1969 }
1970}
1971
1972/// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the
1973/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
1974/// zeromask k (the element is zeroed out when mask bit 0 is not set).
1975///
1976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sh)
1977#[inline]
1978#[target_feature(enable = "avx512fp16")]
1979#[cfg_attr(test, assert_instr(vsubsh))]
1980#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
1981pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
1982 unsafe {
1983 let mut add: f16 = 0.;
1984 if (k & 0b00000001) != 0 {
1985 let extracta: f16 = simd_extract!(a, 0);
1986 let extractb: f16 = simd_extract!(b, 0);
1987 add = extracta - extractb;
1988 }
1989 simd_insert!(a, 0, add)
1990 }
1991}
1992
1993/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
1994///
1995/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ph)
1996#[inline]
1997#[target_feature(enable = "avx512fp16,avx512vl")]
1998#[cfg_attr(test, assert_instr(vmulph))]
1999#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2000pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
2001 unsafe { simd_mul(x:a, y:b) }
2002}
2003
2004/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2005/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2006///
2007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ph)
2008#[inline]
2009#[target_feature(enable = "avx512fp16,avx512vl")]
2010#[cfg_attr(test, assert_instr(vmulph))]
2011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2012pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2013 unsafe {
2014 let r: __m128h = _mm_mul_ph(a, b);
2015 simd_select_bitmask(m:k, yes:r, no:src)
2016 }
2017}
2018
2019/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2020/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2021///
2022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ph)
2023#[inline]
2024#[target_feature(enable = "avx512fp16,avx512vl")]
2025#[cfg_attr(test, assert_instr(vmulph))]
2026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2027pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2028 unsafe {
2029 let r: __m128h = _mm_mul_ph(a, b);
2030 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2031 }
2032}
2033
2034/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2035///
2036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ph)
2037#[inline]
2038#[target_feature(enable = "avx512fp16,avx512vl")]
2039#[cfg_attr(test, assert_instr(vmulph))]
2040#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2041pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
2042 unsafe { simd_mul(x:a, y:b) }
2043}
2044
2045/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2046/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ph)
2049#[inline]
2050#[target_feature(enable = "avx512fp16,avx512vl")]
2051#[cfg_attr(test, assert_instr(vmulph))]
2052#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2053pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2054 unsafe {
2055 let r: __m256h = _mm256_mul_ph(a, b);
2056 simd_select_bitmask(m:k, yes:r, no:src)
2057 }
2058}
2059
2060/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2061/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ph)
2064#[inline]
2065#[target_feature(enable = "avx512fp16,avx512vl")]
2066#[cfg_attr(test, assert_instr(vmulph))]
2067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2068pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2069 unsafe {
2070 let r: __m256h = _mm256_mul_ph(a, b);
2071 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2072 }
2073}
2074
2075/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2076///
2077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ph)
2078#[inline]
2079#[target_feature(enable = "avx512fp16")]
2080#[cfg_attr(test, assert_instr(vmulph))]
2081#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2082pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
2083 unsafe { simd_mul(x:a, y:b) }
2084}
2085
2086/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2087/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2088///
2089/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ph)
2090#[inline]
2091#[target_feature(enable = "avx512fp16")]
2092#[cfg_attr(test, assert_instr(vmulph))]
2093#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2094pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2095 unsafe {
2096 let r: __m512h = _mm512_mul_ph(a, b);
2097 simd_select_bitmask(m:k, yes:r, no:src)
2098 }
2099}
2100
2101/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2102/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2103///
2104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ph)
2105#[inline]
2106#[target_feature(enable = "avx512fp16")]
2107#[cfg_attr(test, assert_instr(vmulph))]
2108#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2109pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2110 unsafe {
2111 let r: __m512h = _mm512_mul_ph(a, b);
2112 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2113 }
2114}
2115
2116/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst.
2117/// Rounding is done according to the rounding parameter, which can be one of:
2118///
2119/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2120/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2121/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2122/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2123/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2124///
2125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ph)
2126#[inline]
2127#[target_feature(enable = "avx512fp16")]
2128#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2129#[rustc_legacy_const_generics(2)]
2130#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2131pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2132 unsafe {
2133 static_assert_rounding!(ROUNDING);
2134 vmulph(a, b, ROUNDING)
2135 }
2136}
2137
2138/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2139/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2140/// Rounding is done according to the rounding parameter, which can be one of:
2141///
2142/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2143/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2144/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2145/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2147///
2148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ph)
2149#[inline]
2150#[target_feature(enable = "avx512fp16")]
2151#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2152#[rustc_legacy_const_generics(4)]
2153#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2154pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
2155 src: __m512h,
2156 k: __mmask32,
2157 a: __m512h,
2158 b: __m512h,
2159) -> __m512h {
2160 unsafe {
2161 static_assert_rounding!(ROUNDING);
2162 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2163 simd_select_bitmask(m:k, yes:r, no:src)
2164 }
2165}
2166
2167/// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using
2168/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2169/// Rounding is done according to the rounding parameter, which can be one of:
2170///
2171/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2172/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2173/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2174/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2176///
2177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ph)
2178#[inline]
2179#[target_feature(enable = "avx512fp16")]
2180#[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
2181#[rustc_legacy_const_generics(3)]
2182#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2183pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
2184 k: __mmask32,
2185 a: __m512h,
2186 b: __m512h,
2187) -> __m512h {
2188 unsafe {
2189 static_assert_rounding!(ROUNDING);
2190 let r: __m512h = _mm512_mul_round_ph::<ROUNDING>(a, b);
2191 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2192 }
2193}
2194
2195/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2196/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2197/// Rounding is done according to the rounding parameter, which can be one of:
2198///
2199/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2200/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2201/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2202/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2203/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2204///
2205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sh)
2206#[inline]
2207#[target_feature(enable = "avx512fp16")]
2208#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2209#[rustc_legacy_const_generics(2)]
2210#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2211pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2212 static_assert_rounding!(ROUNDING);
2213 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2214}
2215
2216/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2217/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2218/// writemask k (the element is copied from src when mask bit 0 is not set).
2219/// Rounding is done according to the rounding parameter, which can be one of:
2220///
2221/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2222/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2223/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2224/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2225/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2226///
2227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sh)
2228#[inline]
2229#[target_feature(enable = "avx512fp16")]
2230#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2231#[rustc_legacy_const_generics(4)]
2232#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2233pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
2234 src: __m128h,
2235 k: __mmask8,
2236 a: __m128h,
2237 b: __m128h,
2238) -> __m128h {
2239 unsafe {
2240 static_assert_rounding!(ROUNDING);
2241 vmulsh(a, b, src, k, ROUNDING)
2242 }
2243}
2244
2245/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2246/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2247/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2248/// Rounding is done according to the rounding parameter, which can be one of:
2249///
2250/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2251/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2252/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2253/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2254/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2255///
2256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sh)
2257#[inline]
2258#[target_feature(enable = "avx512fp16")]
2259#[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
2260#[rustc_legacy_const_generics(3)]
2261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2262pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2263 static_assert_rounding!(ROUNDING);
2264 _mm_mask_mul_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2265}
2266
2267/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2268/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2269///
2270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sh)
2271#[inline]
2272#[target_feature(enable = "avx512fp16")]
2273#[cfg_attr(test, assert_instr(vmulsh))]
2274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2275pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
2276 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) * _mm_cvtsh_h(b)) }
2277}
2278
2279/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2280/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2281/// writemask k (the element is copied from src when mask bit 0 is not set).
2282///
2283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sh)
2284#[inline]
2285#[target_feature(enable = "avx512fp16")]
2286#[cfg_attr(test, assert_instr(vmulsh))]
2287#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2288pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2289 unsafe {
2290 let extractsrc: f16 = simd_extract!(src, 0);
2291 let mut add: f16 = extractsrc;
2292 if (k & 0b00000001) != 0 {
2293 let extracta: f16 = simd_extract!(a, 0);
2294 let extractb: f16 = simd_extract!(b, 0);
2295 add = extracta * extractb;
2296 }
2297 simd_insert!(a, 0, add)
2298 }
2299}
2300
2301/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the
2302/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2303/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2304///
2305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sh)
2306#[inline]
2307#[target_feature(enable = "avx512fp16")]
2308#[cfg_attr(test, assert_instr(vmulsh))]
2309#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2310pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2311 unsafe {
2312 let mut add: f16 = 0.;
2313 if (k & 0b00000001) != 0 {
2314 let extracta: f16 = simd_extract!(a, 0);
2315 let extractb: f16 = simd_extract!(b, 0);
2316 add = extracta * extractb;
2317 }
2318 simd_insert!(a, 0, add)
2319 }
2320}
2321
2322/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2323///
2324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ph)
2325#[inline]
2326#[target_feature(enable = "avx512fp16,avx512vl")]
2327#[cfg_attr(test, assert_instr(vdivph))]
2328#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2329pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
2330 unsafe { simd_div(lhs:a, rhs:b) }
2331}
2332
2333/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2334/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2335///
2336/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ph)
2337#[inline]
2338#[target_feature(enable = "avx512fp16,avx512vl")]
2339#[cfg_attr(test, assert_instr(vdivph))]
2340#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2341pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2342 unsafe {
2343 let r: __m128h = _mm_div_ph(a, b);
2344 simd_select_bitmask(m:k, yes:r, no:src)
2345 }
2346}
2347
2348/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2349/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2350///
2351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ph)
2352#[inline]
2353#[target_feature(enable = "avx512fp16,avx512vl")]
2354#[cfg_attr(test, assert_instr(vdivph))]
2355#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2356pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2357 unsafe {
2358 let r: __m128h = _mm_div_ph(a, b);
2359 simd_select_bitmask(m:k, yes:r, no:_mm_setzero_ph())
2360 }
2361}
2362
2363/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2364///
2365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ph)
2366#[inline]
2367#[target_feature(enable = "avx512fp16,avx512vl")]
2368#[cfg_attr(test, assert_instr(vdivph))]
2369#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2370pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
2371 unsafe { simd_div(lhs:a, rhs:b) }
2372}
2373
2374/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2375/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ph)
2378#[inline]
2379#[target_feature(enable = "avx512fp16,avx512vl")]
2380#[cfg_attr(test, assert_instr(vdivph))]
2381#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2382pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2383 unsafe {
2384 let r: __m256h = _mm256_div_ph(a, b);
2385 simd_select_bitmask(m:k, yes:r, no:src)
2386 }
2387}
2388
2389/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2390/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2391///
2392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ph)
2393#[inline]
2394#[target_feature(enable = "avx512fp16,avx512vl")]
2395#[cfg_attr(test, assert_instr(vdivph))]
2396#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2397pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
2398 unsafe {
2399 let r: __m256h = _mm256_div_ph(a, b);
2400 simd_select_bitmask(m:k, yes:r, no:_mm256_setzero_ph())
2401 }
2402}
2403
2404/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2405///
2406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ph)
2407#[inline]
2408#[target_feature(enable = "avx512fp16")]
2409#[cfg_attr(test, assert_instr(vdivph))]
2410#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2411pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
2412 unsafe { simd_div(lhs:a, rhs:b) }
2413}
2414
2415/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2416/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2417///
2418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ph)
2419#[inline]
2420#[target_feature(enable = "avx512fp16")]
2421#[cfg_attr(test, assert_instr(vdivph))]
2422#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2423pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2424 unsafe {
2425 let r: __m512h = _mm512_div_ph(a, b);
2426 simd_select_bitmask(m:k, yes:r, no:src)
2427 }
2428}
2429
2430/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2431/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2432///
2433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ph)
2434#[inline]
2435#[target_feature(enable = "avx512fp16")]
2436#[cfg_attr(test, assert_instr(vdivph))]
2437#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2438pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
2439 unsafe {
2440 let r: __m512h = _mm512_div_ph(a, b);
2441 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2442 }
2443}
2444
2445/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst.
2446/// Rounding is done according to the rounding parameter, which can be one of:
2447///
2448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2453///
2454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ph)
2455#[inline]
2456#[target_feature(enable = "avx512fp16")]
2457#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2458#[rustc_legacy_const_generics(2)]
2459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2460pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2461 unsafe {
2462 static_assert_rounding!(ROUNDING);
2463 vdivph(a, b, ROUNDING)
2464 }
2465}
2466
2467/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2468/// writemask k (elements are copied from src when the corresponding mask bit is not set).
2469/// Rounding is done according to the rounding parameter, which can be one of:
2470///
2471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2476///
2477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ph)
2478#[inline]
2479#[target_feature(enable = "avx512fp16")]
2480#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2481#[rustc_legacy_const_generics(4)]
2482#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2483pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
2484 src: __m512h,
2485 k: __mmask32,
2486 a: __m512h,
2487 b: __m512h,
2488) -> __m512h {
2489 unsafe {
2490 static_assert_rounding!(ROUNDING);
2491 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2492 simd_select_bitmask(m:k, yes:r, no:src)
2493 }
2494}
2495
2496/// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using
2497/// zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2498/// Rounding is done according to the rounding parameter, which can be one of:
2499///
2500/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2501/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2502/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2503/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2504/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2505///
2506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ph)
2507#[inline]
2508#[target_feature(enable = "avx512fp16")]
2509#[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
2510#[rustc_legacy_const_generics(3)]
2511#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2512pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
2513 k: __mmask32,
2514 a: __m512h,
2515 b: __m512h,
2516) -> __m512h {
2517 unsafe {
2518 static_assert_rounding!(ROUNDING);
2519 let r: __m512h = _mm512_div_round_ph::<ROUNDING>(a, b);
2520 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ph())
2521 }
2522}
2523
2524/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2525/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2526/// Rounding is done according to the rounding parameter, which can be one of:
2527///
2528/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2529/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2530/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2531/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2532/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2533///
2534/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sh)
2535#[inline]
2536#[target_feature(enable = "avx512fp16")]
2537#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2538#[rustc_legacy_const_generics(2)]
2539#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2540pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2541 static_assert_rounding!(ROUNDING);
2542 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2543}
2544
2545/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2546/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2547/// writemask k (the element is copied from src when mask bit 0 is not set).
2548/// Rounding is done according to the rounding parameter, which can be one of:
2549///
2550/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2551/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2552/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2553/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2554/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2555///
2556/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sh)
2557#[inline]
2558#[target_feature(enable = "avx512fp16")]
2559#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2560#[rustc_legacy_const_generics(4)]
2561#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2562pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
2563 src: __m128h,
2564 k: __mmask8,
2565 a: __m128h,
2566 b: __m128h,
2567) -> __m128h {
2568 unsafe {
2569 static_assert_rounding!(ROUNDING);
2570 vdivsh(a, b, src, k, ROUNDING)
2571 }
2572}
2573
2574/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2575/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2576/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2577/// Rounding is done according to the rounding parameter, which can be one of:
2578///
2579/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2580/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2581/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2582/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2584///
2585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sh)
2586#[inline]
2587#[target_feature(enable = "avx512fp16")]
2588#[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
2589#[rustc_legacy_const_generics(3)]
2590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2591pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2592 static_assert_rounding!(ROUNDING);
2593 _mm_mask_div_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2594}
2595
2596/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2597/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
2598///
2599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sh)
2600#[inline]
2601#[target_feature(enable = "avx512fp16")]
2602#[cfg_attr(test, assert_instr(vdivsh))]
2603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2604pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
2605 unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) / _mm_cvtsh_h(b)) }
2606}
2607
2608/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2609/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2610/// writemask k (the element is copied from src when mask bit 0 is not set).
2611///
2612/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sh)
2613#[inline]
2614#[target_feature(enable = "avx512fp16")]
2615#[cfg_attr(test, assert_instr(vdivsh))]
2616#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2617pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2618 unsafe {
2619 let extractsrc: f16 = simd_extract!(src, 0);
2620 let mut add: f16 = extractsrc;
2621 if (k & 0b00000001) != 0 {
2622 let extracta: f16 = simd_extract!(a, 0);
2623 let extractb: f16 = simd_extract!(b, 0);
2624 add = extracta / extractb;
2625 }
2626 simd_insert!(a, 0, add)
2627 }
2628}
2629
2630/// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the
2631/// lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst using
2632/// zeromask k (the element is zeroed out when mask bit 0 is not set).
2633///
2634/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sh)
2635#[inline]
2636#[target_feature(enable = "avx512fp16")]
2637#[cfg_attr(test, assert_instr(vdivsh))]
2638#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2639pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2640 unsafe {
2641 let mut add: f16 = 0.;
2642 if (k & 0b00000001) != 0 {
2643 let extracta: f16 = simd_extract!(a, 0);
2644 let extractb: f16 = simd_extract!(b, 0);
2645 add = extracta / extractb;
2646 }
2647 simd_insert!(a, 0, add)
2648 }
2649}
2650
2651/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2652/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2653/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2654///
2655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pch)
2656#[inline]
2657#[target_feature(enable = "avx512fp16,avx512vl")]
2658#[cfg_attr(test, assert_instr(vfmulcph))]
2659#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2660pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
2661 _mm_mask_mul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
2662}
2663
2664/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2665/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2666/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2667///
2668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pch)
2669#[inline]
2670#[target_feature(enable = "avx512fp16,avx512vl")]
2671#[cfg_attr(test, assert_instr(vfmulcph))]
2672#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2673pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2674 unsafe { transmute(src:vfmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2675}
2676
2677/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2678/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2679/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2680///
2681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pch)
2682#[inline]
2683#[target_feature(enable = "avx512fp16,avx512vl")]
2684#[cfg_attr(test, assert_instr(vfmulcph))]
2685#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2686pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2687 _mm_mask_mul_pch(src:_mm_setzero_ph(), k, a, b)
2688}
2689
2690/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2691/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2692/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2693///
2694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pch)
2695#[inline]
2696#[target_feature(enable = "avx512fp16,avx512vl")]
2697#[cfg_attr(test, assert_instr(vfmulcph))]
2698#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2699pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
2700 _mm256_mask_mul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
2701}
2702
2703/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2704/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2705/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2706///
2707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pch)
2708#[inline]
2709#[target_feature(enable = "avx512fp16,avx512vl")]
2710#[cfg_attr(test, assert_instr(vfmulcph))]
2711#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2712pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2713 unsafe { transmute(src:vfmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
2714}
2715
2716/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2717/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2718/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2719///
2720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pch)
2721#[inline]
2722#[target_feature(enable = "avx512fp16,avx512vl")]
2723#[cfg_attr(test, assert_instr(vfmulcph))]
2724#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2725pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
2726 _mm256_mask_mul_pch(src:_mm256_setzero_ph(), k, a, b)
2727}
2728
2729/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2730/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2731/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2732///
2733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pch)
2734#[inline]
2735#[target_feature(enable = "avx512fp16")]
2736#[cfg_attr(test, assert_instr(vfmulcph))]
2737#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2738pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
2739 _mm512_mask_mul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
2740}
2741
2742/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2743/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2744/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2745///
2746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pch)
2747#[inline]
2748#[target_feature(enable = "avx512fp16")]
2749#[cfg_attr(test, assert_instr(vfmulcph))]
2750#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2751pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2752 _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2753}
2754
2755/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2756/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2757/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2758///
2759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pch)
2760#[inline]
2761#[target_feature(enable = "avx512fp16")]
2762#[cfg_attr(test, assert_instr(vfmulcph))]
2763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2764pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
2765 _mm512_mask_mul_pch(src:_mm512_setzero_ph(), k, a, b)
2766}
2767
2768/// Multiply the packed complex numbers in a and b, and store the results in dst. Each complex number is
2769/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2770/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2771///
2772/// Rounding is done according to the rounding parameter, which can be one of:
2773///
2774/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2775/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2776/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2777/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2779///
2780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pch)
2781#[inline]
2782#[target_feature(enable = "avx512fp16")]
2783#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2784#[rustc_legacy_const_generics(2)]
2785#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2786pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
2787 static_assert_rounding!(ROUNDING);
2788 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
2789}
2790
2791/// Multiply the packed complex numbers in a and b, and store the results in dst using writemask k (the element
2792/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
2793/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2794///
2795/// Rounding is done according to the rounding parameter, which can be one of:
2796///
2797/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2798/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2799/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2800/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2801/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2802///
2803/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pch)
2804#[inline]
2805#[target_feature(enable = "avx512fp16")]
2806#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2807#[rustc_legacy_const_generics(4)]
2808#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2809pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
2810 src: __m512h,
2811 k: __mmask16,
2812 a: __m512h,
2813 b: __m512h,
2814) -> __m512h {
2815 unsafe {
2816 static_assert_rounding!(ROUNDING);
2817 transmute(src:vfmulcph_512(
2818 a:transmute(a),
2819 b:transmute(b),
2820 src:transmute(src),
2821 k,
2822 ROUNDING,
2823 ))
2824 }
2825}
2826
2827/// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element
2828/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
2829/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2830///
2831/// Rounding is done according to the rounding parameter, which can be one of:
2832///
2833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2838///
2839/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pch)
2840#[inline]
2841#[target_feature(enable = "avx512fp16")]
2842#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
2843#[rustc_legacy_const_generics(3)]
2844#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2845pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
2846 k: __mmask16,
2847 a: __m512h,
2848 b: __m512h,
2849) -> __m512h {
2850 static_assert_rounding!(ROUNDING);
2851 _mm512_mask_mul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
2852}
2853
2854/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2855/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2856/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2857/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2858///
2859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sch)
2860#[inline]
2861#[target_feature(enable = "avx512fp16")]
2862#[cfg_attr(test, assert_instr(vfmulcsh))]
2863#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2864pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
2865 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2866}
2867
2868/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2869/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2870/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent
2871/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2872///
2873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sch)
2874#[inline]
2875#[target_feature(enable = "avx512fp16")]
2876#[cfg_attr(test, assert_instr(vfmulcsh))]
2877#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2878pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2879 _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
2880}
2881
2882/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2883/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2884/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2885/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2886///
2887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sch)
2888#[inline]
2889#[target_feature(enable = "avx512fp16")]
2890#[cfg_attr(test, assert_instr(vfmulcsh))]
2891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2892pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
2893 _mm_mask_mul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
2894}
2895
2896/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst,
2897/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
2898/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2899/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2900///
2901/// Rounding is done according to the rounding parameter, which can be one of:
2902///
2903/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2904/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2905/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2906/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2907/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2908///
2909/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sch)
2910#[inline]
2911#[target_feature(enable = "avx512fp16")]
2912#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2913#[rustc_legacy_const_generics(2)]
2914#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2915pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
2916 static_assert_rounding!(ROUNDING);
2917 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
2918}
2919
2920/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2921/// writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 6 packed
2922/// elements from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2923/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2924///
2925/// Rounding is done according to the rounding parameter, which can be one of:
2926///
2927/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2928/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2929/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2930/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2931/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2932///
2933/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sch)
2934#[inline]
2935#[target_feature(enable = "avx512fp16")]
2936#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2937#[rustc_legacy_const_generics(4)]
2938#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2939pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
2940 src: __m128h,
2941 k: __mmask8,
2942 a: __m128h,
2943 b: __m128h,
2944) -> __m128h {
2945 unsafe {
2946 static_assert_rounding!(ROUNDING);
2947 transmute(src:vfmulcsh(
2948 a:transmute(a),
2949 b:transmute(b),
2950 src:transmute(src),
2951 k,
2952 ROUNDING,
2953 ))
2954 }
2955}
2956
2957/// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using
2958/// zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 6 packed elements
2959/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision
2960/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2961///
2962/// Rounding is done according to the rounding parameter, which can be one of:
2963///
2964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2969///
2970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sch)
2971#[inline]
2972#[target_feature(enable = "avx512fp16")]
2973#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
2974#[rustc_legacy_const_generics(3)]
2975#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2976pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
2977 k: __mmask8,
2978 a: __m128h,
2979 b: __m128h,
2980) -> __m128h {
2981 static_assert_rounding!(ROUNDING);
2982 _mm_mask_mul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
2983}
2984
2985/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
2986/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
2987/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
2988///
2989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_pch)
2990#[inline]
2991#[target_feature(enable = "avx512fp16,avx512vl")]
2992#[cfg_attr(test, assert_instr(vfmulcph))]
2993#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
2994pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
2995 _mm_mul_pch(a, b)
2996}
2997
2998/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
2999/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent
3000/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3001///
3002/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_pch)
3003#[inline]
3004#[target_feature(enable = "avx512fp16,avx512vl")]
3005#[cfg_attr(test, assert_instr(vfmulcph))]
3006#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3007pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3008 _mm_mask_mul_pch(src, k, a, b)
3009}
3010
3011/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3012/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3013/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3014///
3015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_pch)
3016#[inline]
3017#[target_feature(enable = "avx512fp16,avx512vl")]
3018#[cfg_attr(test, assert_instr(vfmulcph))]
3019#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3020pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3021 _mm_maskz_mul_pch(k, a, b)
3022}
3023
3024/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is
3025/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3026/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3027///
3028/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmul_pch)
3029#[inline]
3030#[target_feature(enable = "avx512fp16,avx512vl")]
3031#[cfg_attr(test, assert_instr(vfmulcph))]
3032#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3033pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
3034 _mm256_mul_pch(a, b)
3035}
3036
3037/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3038/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3039/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3040///
3041/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmul_pch)
3042#[inline]
3043#[target_feature(enable = "avx512fp16,avx512vl")]
3044#[cfg_attr(test, assert_instr(vfmulcph))]
3045#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3046pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3047 _mm256_mask_mul_pch(src, k, a, b)
3048}
3049
3050/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3051/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3052/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3053///
3054/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmul_pch)
3055#[inline]
3056#[target_feature(enable = "avx512fp16,avx512vl")]
3057#[cfg_attr(test, assert_instr(vfmulcph))]
3058#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3059pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3060 _mm256_maskz_mul_pch(k, a, b)
3061}
3062
3063/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3064/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3065///
3066/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_pch)
3067#[inline]
3068#[target_feature(enable = "avx512fp16")]
3069#[cfg_attr(test, assert_instr(vfmulcph))]
3070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3071pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
3072 _mm512_mul_pch(a, b)
3073}
3074
3075/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3076/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3077/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3078///
3079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_pch)
3080#[inline]
3081#[target_feature(enable = "avx512fp16")]
3082#[cfg_attr(test, assert_instr(vfmulcph))]
3083#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3084pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3085 _mm512_mask_mul_pch(src, k, a, b)
3086}
3087
3088/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3089/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3090/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3091///
3092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_pch)
3093#[inline]
3094#[target_feature(enable = "avx512fp16")]
3095#[cfg_attr(test, assert_instr(vfmulcph))]
3096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3097pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3098 _mm512_maskz_mul_pch(k, a, b)
3099}
3100
3101/// Multiply packed complex numbers in a and b, and store the results in dst. Each complex number is composed
3102/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3103/// Rounding is done according to the rounding parameter, which can be one of:
3104///
3105/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3106/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3107/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3108/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3109/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3110///
3111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmul_round_pch)
3112#[inline]
3113#[target_feature(enable = "avx512fp16")]
3114#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3115#[rustc_legacy_const_generics(2)]
3116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3117pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3118 static_assert_rounding!(ROUNDING);
3119 _mm512_mul_round_pch::<ROUNDING>(a, b)
3120}
3121
3122/// Multiply packed complex numbers in a and b, and store the results in dst using writemask k (the element
3123/// is copied from src when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3124/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3125/// Rounding is done according to the rounding parameter, which can be one of:
3126///
3127/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3128/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3129/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3130/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3131/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3132///
3133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmul_round_pch)
3134#[inline]
3135#[target_feature(enable = "avx512fp16")]
3136#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3137#[rustc_legacy_const_generics(4)]
3138#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3139pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
3140 src: __m512h,
3141 k: __mmask16,
3142 a: __m512h,
3143 b: __m512h,
3144) -> __m512h {
3145 static_assert_rounding!(ROUNDING);
3146 _mm512_mask_mul_round_pch::<ROUNDING>(src, k, a, b)
3147}
3148
3149/// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element
3150/// is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent half-precision
3151/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3152/// Rounding is done according to the rounding parameter, which can be one of:
3153///
3154/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3155/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3156/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3157/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3159///
3160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmul_round_pch)
3161#[inline]
3162#[target_feature(enable = "avx512fp16")]
3163#[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
3164#[rustc_legacy_const_generics(3)]
3165#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3166pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
3167 k: __mmask16,
3168 a: __m512h,
3169 b: __m512h,
3170) -> __m512h {
3171 static_assert_rounding!(ROUNDING);
3172 _mm512_maskz_mul_round_pch::<ROUNDING>(k, a, b)
3173}
3174
3175/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is
3176/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
3177/// number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3178///
3179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_sch)
3180#[inline]
3181#[target_feature(enable = "avx512fp16")]
3182#[cfg_attr(test, assert_instr(vfmulcsh))]
3183#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3184pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
3185 _mm_mul_sch(a, b)
3186}
3187
3188/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3189/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3190/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3191///
3192/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_sch)
3193#[inline]
3194#[target_feature(enable = "avx512fp16")]
3195#[cfg_attr(test, assert_instr(vfmulcsh))]
3196#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3197pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3198 _mm_mask_mul_sch(src, k, a, b)
3199}
3200
3201/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3202/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3203/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3204///
3205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_sch)
3206#[inline]
3207#[target_feature(enable = "avx512fp16")]
3208#[cfg_attr(test, assert_instr(vfmulcsh))]
3209#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3210pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3211 _mm_maskz_mul_sch(k, a, b)
3212}
3213
3214/// Multiply the lower complex numbers in a and b, and store the results in dst. Each complex number is composed
3215/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3216///
3217/// Rounding is done according to the rounding parameter, which can be one of:
3218///
3219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3224///
3225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmul_round_sch)
3226#[inline]
3227#[target_feature(enable = "avx512fp16")]
3228#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3229#[rustc_legacy_const_generics(2)]
3230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3231pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3232 static_assert_rounding!(ROUNDING);
3233 _mm_mul_round_sch::<ROUNDING>(a, b)
3234}
3235
3236/// Multiply the lower complex numbers in a and b, and store the results in dst using writemask k (the element
3237/// is copied from src when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3238/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3239///
3240/// Rounding is done according to the rounding parameter, which can be one of:
3241///
3242/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3243/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3244/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3245/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3246/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3247///
3248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmul_round_sch)
3249#[inline]
3250#[target_feature(enable = "avx512fp16")]
3251#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3252#[rustc_legacy_const_generics(4)]
3253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3254pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
3255 src: __m128h,
3256 k: __mmask8,
3257 a: __m128h,
3258 b: __m128h,
3259) -> __m128h {
3260 static_assert_rounding!(ROUNDING);
3261 _mm_mask_mul_round_sch::<ROUNDING>(src, k, a, b)
3262}
3263
3264/// Multiply the lower complex numbers in a and b, and store the results in dst using zeromask k (the element
3265/// is zeroed out when mask bit 0 is not set). Each complex number is composed of two adjacent half-precision
3266/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
3267///
3268/// Rounding is done according to the rounding parameter, which can be one of:
3269///
3270/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3271/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3272/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3273/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3274/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3275///
3276/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmul_round_sch)
3277#[inline]
3278#[target_feature(enable = "avx512fp16")]
3279#[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
3280#[rustc_legacy_const_generics(3)]
3281#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3282pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
3283 k: __mmask8,
3284 a: __m128h,
3285 b: __m128h,
3286) -> __m128h {
3287 static_assert_rounding!(ROUNDING);
3288 _mm_maskz_mul_round_sch::<ROUNDING>(k, a, b)
3289}
3290
3291/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3292/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3293/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3294/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3295///
3296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_pch)
3297#[inline]
3298#[target_feature(enable = "avx512fp16,avx512vl")]
3299#[cfg_attr(test, assert_instr(vfcmulcph))]
3300#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3301pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
3302 _mm_mask_cmul_pch(src:_mm_undefined_ph(), k:0xff, a, b)
3303}
3304
3305/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3306/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3307/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3308/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3309///
3310/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_pch)
3311#[inline]
3312#[target_feature(enable = "avx512fp16,avx512vl")]
3313#[cfg_attr(test, assert_instr(vfcmulcph))]
3314#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3315pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3316 unsafe { transmute(src:vfcmulcph_128(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3317}
3318
3319/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3320/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3321/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3322/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3323///
3324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_pch)
3325#[inline]
3326#[target_feature(enable = "avx512fp16,avx512vl")]
3327#[cfg_attr(test, assert_instr(vfcmulcph))]
3328#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3329pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3330 _mm_mask_cmul_pch(src:_mm_setzero_ph(), k, a, b)
3331}
3332
3333/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3334/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3335/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3336/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3337///
3338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmul_pch)
3339#[inline]
3340#[target_feature(enable = "avx512fp16,avx512vl")]
3341#[cfg_attr(test, assert_instr(vfcmulcph))]
3342#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3343pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
3344 _mm256_mask_cmul_pch(src:_mm256_undefined_ph(), k:0xff, a, b)
3345}
3346
3347/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3348/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3349/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3350/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3351///
3352/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmul_pch)
3353#[inline]
3354#[target_feature(enable = "avx512fp16,avx512vl")]
3355#[cfg_attr(test, assert_instr(vfcmulcph))]
3356#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3357pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3358 unsafe { transmute(src:vfcmulcph_256(a:transmute(a), b:transmute(b), src:transmute(src), k)) }
3359}
3360
3361/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3362/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3363/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3364/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3365///
3366/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cmul_pch)
3367#[inline]
3368#[target_feature(enable = "avx512fp16,avx512vl")]
3369#[cfg_attr(test, assert_instr(vfcmulcph))]
3370#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3371pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3372 _mm256_mask_cmul_pch(src:_mm256_setzero_ph(), k, a, b)
3373}
3374
3375/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3376/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3377/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3378/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3379///
3380/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_pch)
3381#[inline]
3382#[target_feature(enable = "avx512fp16")]
3383#[cfg_attr(test, assert_instr(vfcmulcph))]
3384#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3385pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
3386 _mm512_mask_cmul_pch(src:_mm512_undefined_ph(), k:0xffff, a, b)
3387}
3388
3389/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3390/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3391/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3392/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3393///
3394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_pch)
3395#[inline]
3396#[target_feature(enable = "avx512fp16")]
3397#[cfg_attr(test, assert_instr(vfcmulcph))]
3398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3399pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3400 _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3401}
3402
3403/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3404/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3405/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3406/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3407///
3408/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_pch)
3409#[inline]
3410#[target_feature(enable = "avx512fp16")]
3411#[cfg_attr(test, assert_instr(vfcmulcph))]
3412#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3413pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3414 _mm512_mask_cmul_pch(src:_mm512_setzero_ph(), k, a, b)
3415}
3416
3417/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3418/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3419/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3420/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3421///
3422/// Rounding is done according to the rounding parameter, which can be one of:
3423///
3424/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3425/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3426/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3427/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3428/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3429///
3430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmul_round_pch)
3431#[inline]
3432#[target_feature(enable = "avx512fp16")]
3433#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3434#[rustc_legacy_const_generics(2)]
3435#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3436pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3437 static_assert_rounding!(ROUNDING);
3438 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffff, a, b)
3439}
3440
3441/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3442/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3443/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3444/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3445///
3446/// Rounding is done according to the rounding parameter, which can be one of:
3447///
3448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3453///
3454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmul_round_pch)
3455#[inline]
3456#[target_feature(enable = "avx512fp16")]
3457#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3458#[rustc_legacy_const_generics(4)]
3459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3460pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
3461 src: __m512h,
3462 k: __mmask16,
3463 a: __m512h,
3464 b: __m512h,
3465) -> __m512h {
3466 unsafe {
3467 static_assert_rounding!(ROUNDING);
3468 transmute(src:vfcmulcph_512(
3469 a:transmute(a),
3470 b:transmute(b),
3471 src:transmute(src),
3472 k,
3473 ROUNDING,
3474 ))
3475 }
3476}
3477
3478/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3479/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3480/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3481/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3482///
3483/// Rounding is done according to the rounding parameter, which can be one of:
3484///
3485/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3486/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3487/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3488/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3489/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3490///
3491/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cmul_round_pch)
3492#[inline]
3493#[target_feature(enable = "avx512fp16")]
3494#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3495#[rustc_legacy_const_generics(3)]
3496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3497pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
3498 k: __mmask16,
3499 a: __m512h,
3500 b: __m512h,
3501) -> __m512h {
3502 static_assert_rounding!(ROUNDING);
3503 _mm512_mask_cmul_round_pch::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
3504}
3505
3506/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3507/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3508/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3509///
3510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_sch)
3511#[inline]
3512#[target_feature(enable = "avx512fp16")]
3513#[cfg_attr(test, assert_instr(vfcmulcsh))]
3514#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3515pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
3516 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3517}
3518
3519/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3520/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3521/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3522/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3523///
3524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_sch)
3525#[inline]
3526#[target_feature(enable = "avx512fp16")]
3527#[cfg_attr(test, assert_instr(vfcmulcsh))]
3528#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3529pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3530 _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
3531}
3532
3533/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3534/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3535/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3536/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3537///
3538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_sch)
3539#[inline]
3540#[target_feature(enable = "avx512fp16")]
3541#[cfg_attr(test, assert_instr(vfcmulcsh))]
3542#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3543pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3544 _mm_mask_cmul_sch(src:f16x8::ZERO.as_m128h(), k, a, b)
3545}
3546
3547/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3548/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3549/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3550///
3551/// Rounding is done according to the rounding parameter, which can be one of:
3552///
3553/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3554/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3555/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3556/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3557/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3558///
3559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmul_round_sch)
3560#[inline]
3561#[target_feature(enable = "avx512fp16")]
3562#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3563#[rustc_legacy_const_generics(2)]
3564#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3565pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3566 static_assert_rounding!(ROUNDING);
3567 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
3568}
3569
3570/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3571/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3572/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3573/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3574///
3575/// Rounding is done according to the rounding parameter, which can be one of:
3576///
3577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3582///
3583/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmul_round_sch)
3584#[inline]
3585#[target_feature(enable = "avx512fp16")]
3586#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3587#[rustc_legacy_const_generics(4)]
3588#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3589pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
3590 src: __m128h,
3591 k: __mmask8,
3592 a: __m128h,
3593 b: __m128h,
3594) -> __m128h {
3595 unsafe {
3596 static_assert_rounding!(ROUNDING);
3597 transmute(src:vfcmulcsh(
3598 a:transmute(a),
3599 b:transmute(b),
3600 src:transmute(src),
3601 k,
3602 ROUNDING,
3603 ))
3604 }
3605}
3606
3607/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3608/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3609/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3610/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3611///
3612/// Rounding is done according to the rounding parameter, which can be one of:
3613///
3614/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3615/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3616/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3617/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3618/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3619///
3620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cmul_round_sch)
3621#[inline]
3622#[target_feature(enable = "avx512fp16")]
3623#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3624#[rustc_legacy_const_generics(3)]
3625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3626pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
3627 k: __mmask8,
3628 a: __m128h,
3629 b: __m128h,
3630) -> __m128h {
3631 static_assert_rounding!(ROUNDING);
3632 _mm_mask_cmul_round_sch::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
3633}
3634
3635/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3636/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3637/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3638/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3639///
3640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_pch)
3641#[inline]
3642#[target_feature(enable = "avx512fp16,avx512vl")]
3643#[cfg_attr(test, assert_instr(vfcmulcph))]
3644#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3645pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
3646 _mm_cmul_pch(a, b)
3647}
3648
3649/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3650/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3651/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3652/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3653///
3654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_pch)
3655#[inline]
3656#[target_feature(enable = "avx512fp16,avx512vl")]
3657#[cfg_attr(test, assert_instr(vfcmulcph))]
3658#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3659pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3660 _mm_mask_cmul_pch(src, k, a, b)
3661}
3662
3663/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3664/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3665/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3666/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3667///
3668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_pch)
3669#[inline]
3670#[target_feature(enable = "avx512fp16,avx512vl")]
3671#[cfg_attr(test, assert_instr(vfcmulcph))]
3672#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3673pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3674 _mm_maskz_cmul_pch(k, a, b)
3675}
3676
3677/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3678/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3679/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3680/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3681///
3682/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmul_pch)
3683#[inline]
3684#[target_feature(enable = "avx512fp16,avx512vl")]
3685#[cfg_attr(test, assert_instr(vfcmulcph))]
3686#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3687pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
3688 _mm256_cmul_pch(a, b)
3689}
3690
3691/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3692/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3693/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3694/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3695///
3696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmul_pch)
3697#[inline]
3698#[target_feature(enable = "avx512fp16,avx512vl")]
3699#[cfg_attr(test, assert_instr(vfcmulcph))]
3700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3701pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3702 _mm256_mask_cmul_pch(src, k, a, b)
3703}
3704
3705/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3706/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3707/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3708/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3709///
3710/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmul_pch)
3711#[inline]
3712#[target_feature(enable = "avx512fp16,avx512vl")]
3713#[cfg_attr(test, assert_instr(vfcmulcph))]
3714#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3715pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
3716 _mm256_maskz_cmul_pch(k, a, b)
3717}
3718
3719/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3720/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3721/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3722/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3723///
3724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_pch)
3725#[inline]
3726#[target_feature(enable = "avx512fp16")]
3727#[cfg_attr(test, assert_instr(vfcmulcph))]
3728#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3729pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
3730 _mm512_cmul_pch(a, b)
3731}
3732
3733/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3734/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3735/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3736/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3737///
3738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_pch)
3739#[inline]
3740#[target_feature(enable = "avx512fp16")]
3741#[cfg_attr(test, assert_instr(vfcmulcph))]
3742#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3743pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3744 _mm512_mask_cmul_pch(src, k, a, b)
3745}
3746
3747/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3748/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3749/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3750/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3751///
3752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_pch)
3753#[inline]
3754#[target_feature(enable = "avx512fp16")]
3755#[cfg_attr(test, assert_instr(vfcmulcph))]
3756#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3757pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
3758 _mm512_maskz_cmul_pch(k, a, b)
3759}
3760
3761/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3762/// store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3763/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3764///
3765/// Rounding is done according to the rounding parameter, which can be one of:
3766///
3767/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3768/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3769/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3770/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3771/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3772///
3773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmul_round_pch)
3774#[inline]
3775#[target_feature(enable = "avx512fp16")]
3776#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3777#[rustc_legacy_const_generics(2)]
3778#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3779pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
3780 static_assert_rounding!(ROUNDING);
3781 _mm512_cmul_round_pch::<ROUNDING>(a, b)
3782}
3783
3784/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3785/// store the results in dst using writemask k (the element is copied from src when corresponding mask bit is not set).
3786/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3787/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3788///
3789/// Rounding is done according to the rounding parameter, which can be one of:
3790///
3791/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3792/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3793/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3794/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3795/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3796///
3797/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmul_round_pch)
3798#[inline]
3799#[target_feature(enable = "avx512fp16")]
3800#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3801#[rustc_legacy_const_generics(4)]
3802#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3803pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
3804 src: __m512h,
3805 k: __mmask16,
3806 a: __m512h,
3807 b: __m512h,
3808) -> __m512h {
3809 static_assert_rounding!(ROUNDING);
3810 _mm512_mask_cmul_round_pch::<ROUNDING>(src, k, a, b)
3811}
3812
3813/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and
3814/// store the results in dst using zeromask k (the element is zeroed out when corresponding mask bit is not set).
3815/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3816/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3817///
3818/// Rounding is done according to the rounding parameter, which can be one of:
3819///
3820/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3821/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3822/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3823/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3824/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3825///
3826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmul_round_pch)
3827#[inline]
3828#[target_feature(enable = "avx512fp16")]
3829#[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
3830#[rustc_legacy_const_generics(3)]
3831#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3832pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
3833 k: __mmask16,
3834 a: __m512h,
3835 b: __m512h,
3836) -> __m512h {
3837 static_assert_rounding!(ROUNDING);
3838 _mm512_maskz_cmul_round_pch::<ROUNDING>(k, a, b)
3839}
3840
3841/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3842/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3843/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3844/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3845///
3846/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_sch)
3847#[inline]
3848#[target_feature(enable = "avx512fp16")]
3849#[cfg_attr(test, assert_instr(vfcmulcsh))]
3850#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3851pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
3852 _mm_cmul_sch(a, b)
3853}
3854
3855/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3856/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3857/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3858/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3859///
3860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_sch)
3861#[inline]
3862#[target_feature(enable = "avx512fp16")]
3863#[cfg_attr(test, assert_instr(vfcmulcsh))]
3864#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3865pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3866 _mm_mask_cmul_sch(src, k, a, b)
3867}
3868
3869/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3870/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3871/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3872/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3873///
3874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_sch)
3875#[inline]
3876#[target_feature(enable = "avx512fp16")]
3877#[cfg_attr(test, assert_instr(vfcmulcsh))]
3878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3879pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
3880 _mm_maskz_cmul_sch(k, a, b)
3881}
3882
3883/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3884/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
3885/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
3886///
3887/// Rounding is done according to the rounding parameter, which can be one of:
3888///
3889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3894///
3895/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmul_round_sch)
3896#[inline]
3897#[target_feature(enable = "avx512fp16")]
3898#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3899#[rustc_legacy_const_generics(2)]
3900#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3901pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
3902 static_assert_rounding!(ROUNDING);
3903 _mm_cmul_round_sch::<ROUNDING>(a, b)
3904}
3905
3906/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3907/// and store the results in dst using writemask k (the element is copied from src when mask bit 0 is not set).
3908/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3909/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3910///
3911/// Rounding is done according to the rounding parameter, which can be one of:
3912///
3913/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3914/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3915/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3916/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3918///
3919/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmul_round_sch)
3920#[inline]
3921#[target_feature(enable = "avx512fp16")]
3922#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3923#[rustc_legacy_const_generics(4)]
3924#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3925pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
3926 src: __m128h,
3927 k: __mmask8,
3928 a: __m128h,
3929 b: __m128h,
3930) -> __m128h {
3931 static_assert_rounding!(ROUNDING);
3932 _mm_mask_cmul_round_sch::<ROUNDING>(src, k, a, b)
3933}
3934
3935/// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b,
3936/// and store the results in dst using zeromask k (the element is zeroed out when mask bit 0 is not set).
3937/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
3938/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
3939///
3940/// Rounding is done according to the rounding parameter, which can be one of:
3941///
3942/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3943/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3944/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3945/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3946/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3947///
3948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmul_round_sch)
3949#[inline]
3950#[target_feature(enable = "avx512fp16")]
3951#[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
3952#[rustc_legacy_const_generics(3)]
3953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3954pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
3955 k: __mmask8,
3956 a: __m128h,
3957 b: __m128h,
3958) -> __m128h {
3959 static_assert_rounding!(ROUNDING);
3960 _mm_maskz_cmul_round_sch::<ROUNDING>(k, a, b)
3961}
3962
3963/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3964/// the results in dst.
3965///
3966/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
3967#[inline]
3968#[target_feature(enable = "avx512fp16,avx512vl")]
3969#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3970pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
3971 unsafe { transmute(src:_mm_and_si128(a:transmute(v2), b:_mm_set1_epi16(i16::MAX))) }
3972}
3973
3974/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3975/// the result in dst.
3976///
3977/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
3978#[inline]
3979#[target_feature(enable = "avx512fp16,avx512vl")]
3980#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3981pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
3982 unsafe { transmute(src:_mm256_and_si256(a:transmute(v2), b:_mm256_set1_epi16(i16::MAX))) }
3983}
3984
3985/// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing
3986/// the result in dst.
3987///
3988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
3989#[inline]
3990#[target_feature(enable = "avx512fp16")]
3991#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
3992pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
3993 unsafe { transmute(src:_mm512_and_si512(a:transmute(v2), b:_mm512_set1_epi16(i16::MAX))) }
3994}
3995
3996/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex
3997/// number is composed of two adjacent half-precision (16-bit) floating-point elements, which defines
3998/// the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate
3999/// `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4000///
4001/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
4002#[inline]
4003#[target_feature(enable = "avx512fp16,avx512vl")]
4004#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4005pub fn _mm_conj_pch(a: __m128h) -> __m128h {
4006 unsafe { transmute(src:_mm_xor_si128(a:transmute(a), b:_mm_set1_epi32(i32::MIN))) }
4007}
4008
4009/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4010/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4011/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4012/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4013///
4014/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
4015#[inline]
4016#[target_feature(enable = "avx512fp16,avx512vl")]
4017#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4018pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
4019 unsafe {
4020 let r: __m128 = transmute(src:_mm_conj_pch(a));
4021 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4022 }
4023}
4024
4025/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4026/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4027/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4028/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4029///
4030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
4031#[inline]
4032#[target_feature(enable = "avx512fp16,avx512vl")]
4033#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4034pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
4035 _mm_mask_conj_pch(src:_mm_setzero_ph(), k, a)
4036}
4037
4038/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4039/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4040/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4041///
4042/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
4043#[inline]
4044#[target_feature(enable = "avx512fp16,avx512vl")]
4045#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4046pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
4047 unsafe { transmute(src:_mm256_xor_si256(a:transmute(a), b:_mm256_set1_epi32(i32::MIN))) }
4048}
4049
4050/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4051/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4052/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4053/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4054///
4055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
4056#[inline]
4057#[target_feature(enable = "avx512fp16,avx512vl")]
4058#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4059pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
4060 unsafe {
4061 let r: __m256 = transmute(src:_mm256_conj_pch(a));
4062 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4063 }
4064}
4065
4066/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4067/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4068/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4069/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4070///
4071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
4072#[inline]
4073#[target_feature(enable = "avx512fp16,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4075pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
4076 _mm256_mask_conj_pch(src:_mm256_setzero_ph(), k, a)
4077}
4078
4079/// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex number
4080/// is composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4081/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4082///
4083/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
4084#[inline]
4085#[target_feature(enable = "avx512fp16")]
4086#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4087pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
4088 unsafe { transmute(src:_mm512_xor_si512(a:transmute(a), b:_mm512_set1_epi32(i32::MIN))) }
4089}
4090
4091/// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k
4092/// (the element is copied from src when corresponding mask bit is not set). Each complex number is composed of two
4093/// adjacent half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4094/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4095///
4096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
4097#[inline]
4098#[target_feature(enable = "avx512fp16")]
4099#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4100pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
4101 unsafe {
4102 let r: __m512 = transmute(src:_mm512_conj_pch(a));
4103 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src)))
4104 }
4105}
4106
4107/// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k
4108/// (the element is zeroed out when corresponding mask bit is not set). Each complex number is composed of two adjacent
4109/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4110/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4111///
4112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
4113#[inline]
4114#[target_feature(enable = "avx512fp16")]
4115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4116pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
4117 _mm512_mask_conj_pch(src:_mm512_setzero_ph(), k, a)
4118}
4119
4120/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4121/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4122/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4123///
4124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pch)
4125#[inline]
4126#[target_feature(enable = "avx512fp16,avx512vl")]
4127#[cfg_attr(test, assert_instr(vfmaddcph))]
4128#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4129pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4130 _mm_mask3_fmadd_pch(a, b, c, k:0xff)
4131}
4132
4133/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4134/// and store the results in dst using writemask k (the element is copied from a when the corresponding
4135/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4136/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4137///
4138/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_pch)
4139#[inline]
4140#[target_feature(enable = "avx512fp16,avx512vl")]
4141#[cfg_attr(test, assert_instr(vfmaddcph))]
4142#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4143pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4144 unsafe {
4145 let r: __m128 = transmute(src:_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4146 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4147 }
4148}
4149
4150/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4151/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4152/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4153/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4154///
4155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_pch)
4156#[inline]
4157#[target_feature(enable = "avx512fp16,avx512vl")]
4158#[cfg_attr(test, assert_instr(vfmaddcph))]
4159#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4160pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4161 unsafe {
4162 transmute(src:vfmaddcph_mask3_128(
4163 a:transmute(a),
4164 b:transmute(b),
4165 c:transmute(src:c),
4166 k,
4167 ))
4168 }
4169}
4170
4171/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4172/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4173/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4174/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4175///
4176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_pch)
4177#[inline]
4178#[target_feature(enable = "avx512fp16,avx512vl")]
4179#[cfg_attr(test, assert_instr(vfmaddcph))]
4180#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4181pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4182 unsafe {
4183 transmute(src:vfmaddcph_maskz_128(
4184 a:transmute(a),
4185 b:transmute(b),
4186 c:transmute(src:c),
4187 k,
4188 ))
4189 }
4190}
4191
4192/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4193/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4194/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4195///
4196/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pch)
4197#[inline]
4198#[target_feature(enable = "avx512fp16,avx512vl")]
4199#[cfg_attr(test, assert_instr(vfmaddcph))]
4200#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4201pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4202 _mm256_mask3_fmadd_pch(a, b, c, k:0xff)
4203}
4204
4205/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4206/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4207/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4208/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4209///
4210/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_pch)
4211#[inline]
4212#[target_feature(enable = "avx512fp16,avx512vl")]
4213#[cfg_attr(test, assert_instr(vfmaddcph))]
4214#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4215pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4216 unsafe {
4217 let r: __m256 = transmute(src:_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4218 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4219 }
4220}
4221
4222/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4223/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4224/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4225/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4226///
4227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_pch)
4228#[inline]
4229#[target_feature(enable = "avx512fp16,avx512vl")]
4230#[cfg_attr(test, assert_instr(vfmaddcph))]
4231#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4232pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4233 unsafe {
4234 transmute(src:vfmaddcph_mask3_256(
4235 a:transmute(a),
4236 b:transmute(b),
4237 c:transmute(src:c),
4238 k,
4239 ))
4240 }
4241}
4242
4243/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4244/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4245/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4246/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4247///
4248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_pch)
4249#[inline]
4250#[target_feature(enable = "avx512fp16,avx512vl")]
4251#[cfg_attr(test, assert_instr(vfmaddcph))]
4252#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4253pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4254 unsafe {
4255 transmute(src:vfmaddcph_maskz_256(
4256 a:transmute(a),
4257 b:transmute(b),
4258 c:transmute(src:c),
4259 k,
4260 ))
4261 }
4262}
4263
4264/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4265/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4266/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4267///
4268/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pch)
4269#[inline]
4270#[target_feature(enable = "avx512fp16")]
4271#[cfg_attr(test, assert_instr(vfmaddcph))]
4272#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4273pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4274 _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4275}
4276
4277/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4278/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4279/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4280/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4281///
4282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pch)
4283#[inline]
4284#[target_feature(enable = "avx512fp16")]
4285#[cfg_attr(test, assert_instr(vfmaddcph))]
4286#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4287pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4288 _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4289}
4290
4291/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4292/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4293/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4294/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4295///
4296/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pch)
4297#[inline]
4298#[target_feature(enable = "avx512fp16")]
4299#[cfg_attr(test, assert_instr(vfmaddcph))]
4300#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4301pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4302 _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4303}
4304
4305/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4306/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4307/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4308/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4309///
4310/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pch)
4311#[inline]
4312#[target_feature(enable = "avx512fp16")]
4313#[cfg_attr(test, assert_instr(vfmaddcph))]
4314#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4315pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4316 _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4317}
4318
4319/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4320/// and store the results in dst. Each complex number is composed of two adjacent half-precision (16-bit)
4321/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4322///
4323/// Rounding is done according to the rounding parameter, which can be one of:
4324///
4325/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4326/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4327/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4328/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4329/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4330///
4331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pch)
4332#[inline]
4333#[target_feature(enable = "avx512fp16")]
4334#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4335#[rustc_legacy_const_generics(3)]
4336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4337pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4338 static_assert_rounding!(ROUNDING);
4339 _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4340}
4341
4342/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4343/// and store the results in dst using writemask k (the element is copied from a when the corresponding mask
4344/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4345/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4346///
4347/// Rounding is done according to the rounding parameter, which can be one of:
4348///
4349/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4350/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4351/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4352/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4353/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4354///
4355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pch)
4356#[inline]
4357#[target_feature(enable = "avx512fp16")]
4358#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4359#[rustc_legacy_const_generics(4)]
4360#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4361pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
4362 a: __m512h,
4363 k: __mmask16,
4364 b: __m512h,
4365 c: __m512h,
4366) -> __m512h {
4367 unsafe {
4368 static_assert_rounding!(ROUNDING);
4369 let r: __m512 = transmute(src:_mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4370 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4371 }
4372}
4373
4374/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4375/// and store the results in dst using writemask k (the element is copied from c when the corresponding
4376/// mask bit is not set). Each complex number is composed of two adjacent half-precision (16-bit)
4377/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4378///
4379/// Rounding is done according to the rounding parameter, which can be one of:
4380///
4381/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4382/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4383/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4384/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4386///
4387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pch)
4388#[inline]
4389#[target_feature(enable = "avx512fp16")]
4390#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4391#[rustc_legacy_const_generics(4)]
4392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4393pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
4394 a: __m512h,
4395 b: __m512h,
4396 c: __m512h,
4397 k: __mmask16,
4398) -> __m512h {
4399 unsafe {
4400 static_assert_rounding!(ROUNDING);
4401 transmute(src:vfmaddcph_mask3_512(
4402 a:transmute(a),
4403 b:transmute(b),
4404 c:transmute(src:c),
4405 k,
4406 ROUNDING,
4407 ))
4408 }
4409}
4410
4411/// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c,
4412/// and store the results in dst using zeromask k (the element is zeroed out when the corresponding mask
4413/// bit is not set). Each complex number is composed of two adjacent half-precision (16-bit) floating-point
4414/// elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4415///
4416/// Rounding is done according to the rounding parameter, which can be one of:
4417///
4418/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4419/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4420/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4421/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4422/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4423///
4424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pch)
4425#[inline]
4426#[target_feature(enable = "avx512fp16")]
4427#[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
4428#[rustc_legacy_const_generics(4)]
4429#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4430pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
4431 k: __mmask16,
4432 a: __m512h,
4433 b: __m512h,
4434 c: __m512h,
4435) -> __m512h {
4436 unsafe {
4437 static_assert_rounding!(ROUNDING);
4438 transmute(src:vfmaddcph_maskz_512(
4439 a:transmute(a),
4440 b:transmute(b),
4441 c:transmute(src:c),
4442 k,
4443 ROUNDING,
4444 ))
4445 }
4446}
4447
4448/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4449/// store the result in the lower elements of dst, and copy the upper 6 packed elements from a to the
4450/// upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
4451/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4452///
4453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sch)
4454#[inline]
4455#[target_feature(enable = "avx512fp16")]
4456#[cfg_attr(test, assert_instr(vfmaddcsh))]
4457#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4458pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4459 _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4460}
4461
4462/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4463/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4464/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4465/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4466/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4467///
4468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sch)
4469#[inline]
4470#[target_feature(enable = "avx512fp16")]
4471#[cfg_attr(test, assert_instr(vfmaddcsh))]
4472#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4473pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4474 _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4475}
4476
4477/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4478/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4479/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4480/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4481/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4482///
4483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sch)
4484#[inline]
4485#[target_feature(enable = "avx512fp16")]
4486#[cfg_attr(test, assert_instr(vfmaddcsh))]
4487#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4488pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4489 _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4490}
4491
4492/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4493/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4494/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4495/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4496/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4497///
4498/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sch)
4499#[inline]
4500#[target_feature(enable = "avx512fp16")]
4501#[cfg_attr(test, assert_instr(vfmaddcsh))]
4502#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4503pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4504 _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4505}
4506
4507/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4508/// store the result in the lower elements of dst. Each complex number is composed of two adjacent
4509/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4510///
4511/// Rounding is done according to the rounding parameter, which can be one of:
4512///
4513/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4514/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4515/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4516/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4517/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4518///
4519/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sch)
4520#[inline]
4521#[target_feature(enable = "avx512fp16")]
4522#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4523#[rustc_legacy_const_generics(3)]
4524#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4525pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4526 unsafe {
4527 static_assert_rounding!(ROUNDING);
4528 transmute(src:vfmaddcsh_mask(
4529 a:transmute(a),
4530 b:transmute(b),
4531 c:transmute(c),
4532 k:0xff,
4533 ROUNDING,
4534 ))
4535 }
4536}
4537
4538/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4539/// store the result in the lower elements of dst using writemask k (elements are copied from a when
4540/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4541/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4542/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4543///
4544/// Rounding is done according to the rounding parameter, which can be one of:
4545///
4546/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4547/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4548/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4549/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4551///
4552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sch)
4553#[inline]
4554#[target_feature(enable = "avx512fp16")]
4555#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4556#[rustc_legacy_const_generics(4)]
4557#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4558pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
4559 a: __m128h,
4560 k: __mmask8,
4561 b: __m128h,
4562 c: __m128h,
4563) -> __m128h {
4564 unsafe {
4565 static_assert_rounding!(ROUNDING);
4566 let a: __m128 = transmute(src:a);
4567 let r: __m128 = vfmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does
4568 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
4569 }
4570}
4571
4572/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4573/// store the result in the lower elements of dst using writemask k (elements are copied from c when
4574/// mask bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst.
4575/// Each complex number is composed of two adjacent half-precision (16-bit) floating-point elements,
4576/// which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4577///
4578/// Rounding is done according to the rounding parameter, which can be one of:
4579///
4580/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4581/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4582/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4583/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4584/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4585///
4586/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sch)
4587#[inline]
4588#[target_feature(enable = "avx512fp16")]
4589#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4590#[rustc_legacy_const_generics(4)]
4591#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4592pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
4593 a: __m128h,
4594 b: __m128h,
4595 c: __m128h,
4596 k: __mmask8,
4597) -> __m128h {
4598 unsafe {
4599 static_assert_rounding!(ROUNDING);
4600 let c: __m128 = transmute(src:c);
4601 let r: __m128 = vfmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
4602 transmute(src:_mm_move_ss(a:c, b:r))
4603 }
4604}
4605
4606/// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and
4607/// store the result in the lower elements of dst using zeromask k (elements are zeroed out when mask
4608/// bit 0 is not set), and copy the upper 6 packed elements from a to the upper elements of dst. Each
4609/// complex number is composed of two adjacent half-precision (16-bit) floating-point elements, which
4610/// defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`.
4611///
4612/// Rounding is done according to the rounding parameter, which can be one of:
4613///
4614/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4615/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4616/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4617/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4618/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4619///
4620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sch)
4621#[inline]
4622#[target_feature(enable = "avx512fp16")]
4623#[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
4624#[rustc_legacy_const_generics(4)]
4625#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4626pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
4627 k: __mmask8,
4628 a: __m128h,
4629 b: __m128h,
4630 c: __m128h,
4631) -> __m128h {
4632 unsafe {
4633 static_assert_rounding!(ROUNDING);
4634 transmute(src:vfmaddcsh_maskz(
4635 a:transmute(a),
4636 b:transmute(b),
4637 c:transmute(src:c),
4638 k,
4639 ROUNDING,
4640 ))
4641 }
4642}
4643
4644/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4645/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4646/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4647/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4648///
4649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_pch)
4650#[inline]
4651#[target_feature(enable = "avx512fp16,avx512vl")]
4652#[cfg_attr(test, assert_instr(vfcmaddcph))]
4653#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4654pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4655 _mm_mask3_fcmadd_pch(a, b, c, k:0xff)
4656}
4657
4658/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4659/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4660/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4661/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4662/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4663///
4664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_pch)
4665#[inline]
4666#[target_feature(enable = "avx512fp16,avx512vl")]
4667#[cfg_attr(test, assert_instr(vfcmaddcph))]
4668#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4669pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
4670 unsafe {
4671 let r: __m128 = transmute(src:_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4672 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4673 }
4674}
4675
4676/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4677/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4678/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4679/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4680/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4681///
4682/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_pch)
4683#[inline]
4684#[target_feature(enable = "avx512fp16,avx512vl")]
4685#[cfg_attr(test, assert_instr(vfcmaddcph))]
4686#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4687pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
4688 unsafe {
4689 transmute(src:vfcmaddcph_mask3_128(
4690 a:transmute(a),
4691 b:transmute(b),
4692 c:transmute(src:c),
4693 k,
4694 ))
4695 }
4696}
4697
4698/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4699/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4700/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4701/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4702/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4703///
4704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_pch)
4705#[inline]
4706#[target_feature(enable = "avx512fp16,avx512vl")]
4707#[cfg_attr(test, assert_instr(vfcmaddcph))]
4708#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4709pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
4710 unsafe {
4711 transmute(src:vfcmaddcph_maskz_128(
4712 a:transmute(a),
4713 b:transmute(b),
4714 c:transmute(src:c),
4715 k,
4716 ))
4717 }
4718}
4719
4720/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4721/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4722/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4723/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4724///
4725/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fcmadd_pch)
4726#[inline]
4727#[target_feature(enable = "avx512fp16,avx512vl")]
4728#[cfg_attr(test, assert_instr(vfcmaddcph))]
4729#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4730pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4731 _mm256_mask3_fcmadd_pch(a, b, c, k:0xff)
4732}
4733
4734/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4735/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4736/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4737/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4738/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4739///
4740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fcmadd_pch)
4741#[inline]
4742#[target_feature(enable = "avx512fp16,avx512vl")]
4743#[cfg_attr(test, assert_instr(vfcmaddcph))]
4744#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4745pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
4746 unsafe {
4747 let r: __m256 = transmute(src:_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
4748 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4749 }
4750}
4751
4752/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4753/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4754/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4755/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4756/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4757///
4758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fcmadd_pch)
4759#[inline]
4760#[target_feature(enable = "avx512fp16,avx512vl")]
4761#[cfg_attr(test, assert_instr(vfcmaddcph))]
4762#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4763pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
4764 unsafe {
4765 transmute(src:vfcmaddcph_mask3_256(
4766 a:transmute(a),
4767 b:transmute(b),
4768 c:transmute(src:c),
4769 k,
4770 ))
4771 }
4772}
4773
4774/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4775/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4776/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4777/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4778/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4779///
4780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fcmadd_pch)
4781#[inline]
4782#[target_feature(enable = "avx512fp16,avx512vl")]
4783#[cfg_attr(test, assert_instr(vfcmaddcph))]
4784#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4785pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
4786 unsafe {
4787 transmute(src:vfcmaddcph_maskz_256(
4788 a:transmute(a),
4789 b:transmute(b),
4790 c:transmute(src:c),
4791 k,
4792 ))
4793 }
4794}
4795
4796/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4797/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4798/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4799/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4800///
4801/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_pch)
4802#[inline]
4803#[target_feature(enable = "avx512fp16")]
4804#[cfg_attr(test, assert_instr(vfcmaddcph))]
4805#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4806pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4807 _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
4808}
4809
4810/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4811/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4812/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4813/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4814/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4815///
4816/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_pch)
4817#[inline]
4818#[target_feature(enable = "avx512fp16")]
4819#[cfg_attr(test, assert_instr(vfcmaddcph))]
4820#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4821pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
4822 _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
4823}
4824
4825/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4826/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4827/// copied from c when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4828/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4829/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4830///
4831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_pch)
4832#[inline]
4833#[target_feature(enable = "avx512fp16")]
4834#[cfg_attr(test, assert_instr(vfcmaddcph))]
4835#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4836pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
4837 _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
4838}
4839
4840/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4841/// to the corresponding complex numbers in c, and store the results in dst using zeromask k (the element is
4842/// zeroed out when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4843/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4844/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4845///
4846/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_pch)
4847#[inline]
4848#[target_feature(enable = "avx512fp16")]
4849#[cfg_attr(test, assert_instr(vfcmaddcph))]
4850#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4851pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4852 _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
4853}
4854
4855/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4856/// to the corresponding complex numbers in c, and store the results in dst. Each complex number is composed
4857/// of two adjacent half-precision (16-bit) floating-point elements, which defines the complex number
4858/// `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4859///
4860/// Rounding is done according to the rounding parameter, which can be one of:
4861///
4862/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4863/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4864/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4865/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4866/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4867///
4868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fcmadd_round_pch)
4869#[inline]
4870#[target_feature(enable = "avx512fp16")]
4871#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4872#[rustc_legacy_const_generics(3)]
4873#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4874pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
4875 static_assert_rounding!(ROUNDING);
4876 _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k:0xffff)
4877}
4878
4879/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4880/// to the corresponding complex numbers in c, and store the results in dst using writemask k (the element is
4881/// copied from a when the corresponding mask bit is not set). Each complex number is composed of two adjacent
4882/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
4883/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4884///
4885/// Rounding is done according to the rounding parameter, which can be one of:
4886///
4887/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4888/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4889/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4890/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4891/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4892///
4893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fcmadd_round_pch)
4894#[inline]
4895#[target_feature(enable = "avx512fp16")]
4896#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4897#[rustc_legacy_const_generics(4)]
4898#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4899pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
4900 a: __m512h,
4901 k: __mmask16,
4902 b: __m512h,
4903 c: __m512h,
4904) -> __m512h {
4905 unsafe {
4906 static_assert_rounding!(ROUNDING);
4907 let r: __m512 = transmute(src:_mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does
4908 transmute(src:simd_select_bitmask(m:k, yes:r, no:transmute(src:a)))
4909 }
4910}
4911
4912/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4913/// to the corresponding complex numbers in c using writemask k (the element is copied from c when the corresponding
4914/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4915/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4916/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4917///
4918/// Rounding is done according to the rounding parameter, which can be one of:
4919///
4920/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4921/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4922/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4923/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4924/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4925///
4926/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fcmadd_round_pch)
4927#[inline]
4928#[target_feature(enable = "avx512fp16")]
4929#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4930#[rustc_legacy_const_generics(4)]
4931#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4932pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
4933 a: __m512h,
4934 b: __m512h,
4935 c: __m512h,
4936 k: __mmask16,
4937) -> __m512h {
4938 unsafe {
4939 static_assert_rounding!(ROUNDING);
4940 transmute(src:vfcmaddcph_mask3_512(
4941 a:transmute(a),
4942 b:transmute(b),
4943 c:transmute(src:c),
4944 k,
4945 ROUNDING,
4946 ))
4947 }
4948}
4949
4950/// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate
4951/// to the corresponding complex numbers in c using zeromask k (the element is zeroed out when the corresponding
4952/// mask bit is not set), and store the results in dst. Each complex number is composed of two adjacent half-precision
4953/// (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
4954/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4955///
4956/// Rounding is done according to the rounding parameter, which can be one of:
4957///
4958/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
4959/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
4960/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
4961/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
4962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
4963///
4964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fcmadd_round_pch)
4965#[inline]
4966#[target_feature(enable = "avx512fp16")]
4967#[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
4968#[rustc_legacy_const_generics(4)]
4969#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4970pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
4971 k: __mmask16,
4972 a: __m512h,
4973 b: __m512h,
4974 c: __m512h,
4975) -> __m512h {
4976 unsafe {
4977 static_assert_rounding!(ROUNDING);
4978 transmute(src:vfcmaddcph_maskz_512(
4979 a:transmute(a),
4980 b:transmute(b),
4981 c:transmute(src:c),
4982 k,
4983 ROUNDING,
4984 ))
4985 }
4986}
4987
4988/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
4989/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
4990/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
4991/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
4992/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
4993///
4994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_sch)
4995#[inline]
4996#[target_feature(enable = "avx512fp16")]
4997#[cfg_attr(test, assert_instr(vfcmaddcsh))]
4998#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
4999pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5000 _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
5001}
5002
5003/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5004/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5005/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5006/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5007/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5008/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5009///
5010/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_sch)
5011#[inline]
5012#[target_feature(enable = "avx512fp16")]
5013#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5014#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5015pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5016 _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
5017}
5018
5019/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5020/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5021/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5022/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5023/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5024/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5025///
5026/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_sch)
5027#[inline]
5028#[target_feature(enable = "avx512fp16")]
5029#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5030#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5031pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5032 _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
5033}
5034
5035/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5036/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5037/// zeromask k (the element is zeroed out when the corresponding mask bit is not set), and copy the upper
5038/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5039/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5040/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5041///
5042/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_sch)
5043#[inline]
5044#[target_feature(enable = "avx512fp16")]
5045#[cfg_attr(test, assert_instr(vfcmaddcsh))]
5046#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5047pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5048 _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
5049}
5050
5051/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5052/// accumulate to the lower complex number in c, and store the result in the lower elements of dst,
5053/// and copy the upper 6 packed elements from a to the upper elements of dst. Each complex number is
5054/// composed of two adjacent half-precision (16-bit) floating-point elements, which defines the complex
5055/// number `complex = vec.fp16[0] + i * vec.fp16[1]`, or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5056///
5057/// Rounding is done according to the rounding parameter, which can be one of:
5058///
5059/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5060/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5061/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5062/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5063/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5064///
5065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fcmadd_round_sch)
5066#[inline]
5067#[target_feature(enable = "avx512fp16")]
5068#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5069#[rustc_legacy_const_generics(3)]
5070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5071pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5072 unsafe {
5073 static_assert_rounding!(ROUNDING);
5074 transmute(src:vfcmaddcsh_mask(
5075 a:transmute(a),
5076 b:transmute(b),
5077 c:transmute(c),
5078 k:0xff,
5079 ROUNDING,
5080 ))
5081 }
5082}
5083
5084/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5085/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5086/// writemask k (the element is copied from a when the corresponding mask bit is not set), and copy the upper
5087/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5088/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5089/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5090///
5091/// Rounding is done according to the rounding parameter, which can be one of:
5092///
5093/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5094/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5095/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5096/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5097/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5098///
5099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fcmadd_round_sch)
5100#[inline]
5101#[target_feature(enable = "avx512fp16")]
5102#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5103#[rustc_legacy_const_generics(4)]
5104#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5105pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
5106 a: __m128h,
5107 k: __mmask8,
5108 b: __m128h,
5109 c: __m128h,
5110) -> __m128h {
5111 unsafe {
5112 static_assert_rounding!(ROUNDING);
5113 let a: __m128 = transmute(src:a);
5114 let r: __m128 = vfcmaddcsh_mask(a, b:transmute(b), c:transmute(src:c), k, ROUNDING);
5115 transmute(src:_mm_mask_move_ss(src:a, k, a, b:r))
5116 }
5117}
5118
5119/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5120/// accumulate to the lower complex number in c, and store the result in the lower elements of dst using
5121/// writemask k (the element is copied from c when the corresponding mask bit is not set), and copy the upper
5122/// 6 packed elements from a to the upper elements of dst. Each complex number is composed of two adjacent
5123/// half-precision (16-bit) floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1]`,
5124/// or the complex conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5125///
5126/// Rounding is done according to the rounding parameter, which can be one of:
5127///
5128/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5129/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5130/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5131/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5132/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5133///
5134/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fcmadd_round_sch)
5135#[inline]
5136#[target_feature(enable = "avx512fp16")]
5137#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5138#[rustc_legacy_const_generics(4)]
5139#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5140pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
5141 a: __m128h,
5142 b: __m128h,
5143 c: __m128h,
5144 k: __mmask8,
5145) -> __m128h {
5146 unsafe {
5147 static_assert_rounding!(ROUNDING);
5148 let c: __m128 = transmute(src:c);
5149 let r: __m128 = vfcmaddcsh_mask(a:transmute(a), b:transmute(src:b), c, k, ROUNDING);
5150 transmute(src:_mm_move_ss(a:c, b:r))
5151 }
5152}
5153
5154/// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b,
5155/// accumulate to the lower complex number in c using zeromask k (the element is zeroed out when the corresponding
5156/// mask bit is not set), and store the result in the lower elements of dst, and copy the upper 6 packed elements
5157/// from a to the upper elements of dst. Each complex number is composed of two adjacent half-precision (16-bit)
5158/// floating-point elements, which defines the complex number `complex = vec.fp16[0] + i * vec.fp16[1`, or the complex
5159/// conjugate `conjugate = vec.fp16[0] - i * vec.fp16[1]`.
5160///
5161/// Rounding is done according to the rounding parameter, which can be one of:
5162///
5163/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5164/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5165/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5166/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5167/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5168///
5169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fcmadd_round_sch)
5170#[inline]
5171#[target_feature(enable = "avx512fp16")]
5172#[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
5173#[rustc_legacy_const_generics(4)]
5174#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5175pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
5176 k: __mmask8,
5177 a: __m128h,
5178 b: __m128h,
5179 c: __m128h,
5180) -> __m128h {
5181 unsafe {
5182 static_assert_rounding!(ROUNDING);
5183 transmute(src:vfcmaddcsh_maskz(
5184 a:transmute(a),
5185 b:transmute(b),
5186 c:transmute(src:c),
5187 k,
5188 ROUNDING,
5189 ))
5190 }
5191}
5192
5193/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5194/// result to packed elements in c, and store the results in dst.
5195///
5196/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ph)
5197#[inline]
5198#[target_feature(enable = "avx512fp16,avx512vl")]
5199#[cfg_attr(test, assert_instr(vfmadd))]
5200#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5201pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5202 unsafe { simd_fma(x:a, y:b, z:c) }
5203}
5204
5205/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5206/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5207/// from a when the corresponding mask bit is not set).
5208///
5209/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ph)
5210#[inline]
5211#[target_feature(enable = "avx512fp16,avx512vl")]
5212#[cfg_attr(test, assert_instr(vfmadd))]
5213#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5214pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5215 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:a) }
5216}
5217
5218/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5219/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5220/// from c when the corresponding mask bit is not set).
5221///
5222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ph)
5223#[inline]
5224#[target_feature(enable = "avx512fp16,avx512vl")]
5225#[cfg_attr(test, assert_instr(vfmadd))]
5226#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5227pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5228 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:c) }
5229}
5230
5231/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5232/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5233/// out when the corresponding mask bit is not set).
5234///
5235/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ph)
5236#[inline]
5237#[target_feature(enable = "avx512fp16,avx512vl")]
5238#[cfg_attr(test, assert_instr(vfmadd))]
5239#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5240pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5241 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ph(a, b, c), no:_mm_setzero_ph()) }
5242}
5243
5244/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5245/// result to packed elements in c, and store the results in dst.
5246///
5247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ph)
5248#[inline]
5249#[target_feature(enable = "avx512fp16,avx512vl")]
5250#[cfg_attr(test, assert_instr(vfmadd))]
5251#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5252pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5253 unsafe { simd_fma(x:a, y:b, z:c) }
5254}
5255
5256/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5257/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5258/// from a when the corresponding mask bit is not set).
5259///
5260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmadd_ph)
5261#[inline]
5262#[target_feature(enable = "avx512fp16,avx512vl")]
5263#[cfg_attr(test, assert_instr(vfmadd))]
5264#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5265pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5266 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:a) }
5267}
5268
5269/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5270/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5271/// from c when the corresponding mask bit is not set).
5272///
5273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmadd_ph)
5274#[inline]
5275#[target_feature(enable = "avx512fp16,avx512vl")]
5276#[cfg_attr(test, assert_instr(vfmadd))]
5277#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5278pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5279 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:c) }
5280}
5281
5282/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5283/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5284/// out when the corresponding mask bit is not set).
5285///
5286/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmadd_ph)
5287#[inline]
5288#[target_feature(enable = "avx512fp16,avx512vl")]
5289#[cfg_attr(test, assert_instr(vfmadd))]
5290#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5291pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5292 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
5293}
5294
5295/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5296/// result to packed elements in c, and store the results in dst.
5297///
5298/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ph)
5299#[inline]
5300#[target_feature(enable = "avx512fp16")]
5301#[cfg_attr(test, assert_instr(vfmadd))]
5302#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5303pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5304 unsafe { simd_fma(x:a, y:b, z:c) }
5305}
5306
5307/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5308/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5309/// from a when the corresponding mask bit is not set).
5310///
5311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ph)
5312#[inline]
5313#[target_feature(enable = "avx512fp16")]
5314#[cfg_attr(test, assert_instr(vfmadd))]
5315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5316pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5317 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:a) }
5318}
5319
5320/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5321/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5322/// from c when the corresponding mask bit is not set).
5323///
5324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ph)
5325#[inline]
5326#[target_feature(enable = "avx512fp16")]
5327#[cfg_attr(test, assert_instr(vfmadd))]
5328#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5329pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5330 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:c) }
5331}
5332
5333/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5334/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5335/// out when the corresponding mask bit is not set).
5336///
5337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ph)
5338#[inline]
5339#[target_feature(enable = "avx512fp16")]
5340#[cfg_attr(test, assert_instr(vfmadd))]
5341#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5342pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5343 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
5344}
5345
5346/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5347/// result to packed elements in c, and store the results in dst.
5348///
5349/// Rounding is done according to the rounding parameter, which can be one of:
5350///
5351/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5352/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5353/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5354/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5355/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5356///
5357/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ph)
5358#[inline]
5359#[target_feature(enable = "avx512fp16")]
5360#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5361#[rustc_legacy_const_generics(3)]
5362#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5363pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5364 unsafe {
5365 static_assert_rounding!(ROUNDING);
5366 vfmaddph_512(a, b, c, ROUNDING)
5367 }
5368}
5369
5370/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5371/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5372/// from a when the corresponding mask bit is not set).
5373///
5374/// Rounding is done according to the rounding parameter, which can be one of:
5375///
5376/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5377/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5378/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5379/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5380/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5381///
5382/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ph)
5383#[inline]
5384#[target_feature(enable = "avx512fp16")]
5385#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5386#[rustc_legacy_const_generics(4)]
5387#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5388pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
5389 a: __m512h,
5390 k: __mmask32,
5391 b: __m512h,
5392 c: __m512h,
5393) -> __m512h {
5394 unsafe {
5395 static_assert_rounding!(ROUNDING);
5396 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:a)
5397 }
5398}
5399
5400/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5401/// result to packed elements in c, and store the results in dst using writemask k (the element is copied
5402/// from c when the corresponding mask bit is not set).
5403///
5404/// Rounding is done according to the rounding parameter, which can be one of:
5405///
5406/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5407/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5408/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5409/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5411///
5412/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ph)
5413#[inline]
5414#[target_feature(enable = "avx512fp16")]
5415#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5416#[rustc_legacy_const_generics(4)]
5417#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5418pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
5419 a: __m512h,
5420 b: __m512h,
5421 c: __m512h,
5422 k: __mmask32,
5423) -> __m512h {
5424 unsafe {
5425 static_assert_rounding!(ROUNDING);
5426 simd_select_bitmask(m:k, yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c), no:c)
5427 }
5428}
5429
5430/// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate
5431/// result to packed elements in c, and store the results in dst using zeromask k (the element is zeroed
5432/// out when the corresponding mask bit is not set).
5433///
5434/// Rounding is done according to the rounding parameter, which can be one of:
5435///
5436/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5437/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5438/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5439/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5441///
5442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ph)
5443#[inline]
5444#[target_feature(enable = "avx512fp16")]
5445#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5446#[rustc_legacy_const_generics(4)]
5447#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5448pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
5449 k: __mmask32,
5450 a: __m512h,
5451 b: __m512h,
5452 c: __m512h,
5453) -> __m512h {
5454 unsafe {
5455 static_assert_rounding!(ROUNDING);
5456 simd_select_bitmask(
5457 m:k,
5458 yes:_mm512_fmadd_round_ph::<ROUNDING>(a, b, c),
5459 no:_mm512_setzero_ph(),
5460 )
5461 }
5462}
5463
5464/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5465/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5466/// 7 packed elements from a to the upper elements of dst.
5467///
5468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sh)
5469#[inline]
5470#[target_feature(enable = "avx512fp16")]
5471#[cfg_attr(test, assert_instr(vfmadd))]
5472#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5473pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5474 unsafe {
5475 let extracta: f16 = simd_extract!(a, 0);
5476 let extractb: f16 = simd_extract!(b, 0);
5477 let extractc: f16 = simd_extract!(c, 0);
5478 let r: f16 = fmaf16(a:extracta, b:extractb, c:extractc);
5479 simd_insert!(a, 0, r)
5480 }
5481}
5482
5483/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5484/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5485/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5486/// upper elements of dst.
5487///
5488/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sh)
5489#[inline]
5490#[target_feature(enable = "avx512fp16")]
5491#[cfg_attr(test, assert_instr(vfmadd))]
5492#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5493pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5494 unsafe {
5495 let mut fmadd: f16 = simd_extract!(a, 0);
5496 if k & 1 != 0 {
5497 let extractb: f16 = simd_extract!(b, 0);
5498 let extractc: f16 = simd_extract!(c, 0);
5499 fmadd = fmaf16(a:fmadd, b:extractb, c:extractc);
5500 }
5501 simd_insert!(a, 0, fmadd)
5502 }
5503}
5504
5505/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5506/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5507/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5508/// upper elements of dst.
5509///
5510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sh)
5511#[inline]
5512#[target_feature(enable = "avx512fp16")]
5513#[cfg_attr(test, assert_instr(vfmadd))]
5514#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5515pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5516 unsafe {
5517 let mut fmadd: f16 = simd_extract!(c, 0);
5518 if k & 1 != 0 {
5519 let extracta: f16 = simd_extract!(a, 0);
5520 let extractb: f16 = simd_extract!(b, 0);
5521 fmadd = fmaf16(a:extracta, b:extractb, c:fmadd);
5522 }
5523 simd_insert!(c, 0, fmadd)
5524 }
5525}
5526
5527/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5528/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5529/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5530/// upper elements of dst.
5531///
5532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sh)
5533#[inline]
5534#[target_feature(enable = "avx512fp16")]
5535#[cfg_attr(test, assert_instr(vfmadd))]
5536#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5537pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5538 unsafe {
5539 let mut fmadd: f16 = 0.0;
5540 if k & 1 != 0 {
5541 let extracta: f16 = simd_extract!(a, 0);
5542 let extractb: f16 = simd_extract!(b, 0);
5543 let extractc: f16 = simd_extract!(c, 0);
5544 fmadd = fmaf16(a:extracta, b:extractb, c:extractc);
5545 }
5546 simd_insert!(a, 0, fmadd)
5547 }
5548}
5549
5550/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5551/// result to the lower element in c. Store the result in the lower element of dst, and copy the upper
5552/// 7 packed elements from a to the upper elements of dst.
5553///
5554/// Rounding is done according to the rounding parameter, which can be one of:
5555///
5556/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5557/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5558/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5559/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5560/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5561///
5562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sh)
5563#[inline]
5564#[target_feature(enable = "avx512fp16")]
5565#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5566#[rustc_legacy_const_generics(3)]
5567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5568pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5569 unsafe {
5570 static_assert_rounding!(ROUNDING);
5571 let extracta: f16 = simd_extract!(a, 0);
5572 let extractb: f16 = simd_extract!(b, 0);
5573 let extractc: f16 = simd_extract!(c, 0);
5574 let r: f16 = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5575 simd_insert!(a, 0, r)
5576 }
5577}
5578
5579/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5580/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5581/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5582/// upper elements of dst.
5583///
5584/// Rounding is done according to the rounding parameter, which can be one of:
5585///
5586/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5587/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5588/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5589/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5590/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5591///
5592/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sh)
5593#[inline]
5594#[target_feature(enable = "avx512fp16")]
5595#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5596#[rustc_legacy_const_generics(4)]
5597#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5598pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
5599 a: __m128h,
5600 k: __mmask8,
5601 b: __m128h,
5602 c: __m128h,
5603) -> __m128h {
5604 unsafe {
5605 static_assert_rounding!(ROUNDING);
5606 let mut fmadd: f16 = simd_extract!(a, 0);
5607 if k & 1 != 0 {
5608 let extractb: f16 = simd_extract!(b, 0);
5609 let extractc: f16 = simd_extract!(c, 0);
5610 fmadd = vfmaddsh(a:fmadd, b:extractb, c:extractc, ROUNDING);
5611 }
5612 simd_insert!(a, 0, fmadd)
5613 }
5614}
5615
5616/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5617/// result to the lower element in c. Store the result in the lower element of dst using writemask k (the element
5618/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
5619/// upper elements of dst.
5620///
5621/// Rounding is done according to the rounding parameter, which can be one of:
5622///
5623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5628///
5629/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sh)
5630#[inline]
5631#[target_feature(enable = "avx512fp16")]
5632#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5633#[rustc_legacy_const_generics(4)]
5634#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5635pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
5636 a: __m128h,
5637 b: __m128h,
5638 c: __m128h,
5639 k: __mmask8,
5640) -> __m128h {
5641 unsafe {
5642 static_assert_rounding!(ROUNDING);
5643 let mut fmadd: f16 = simd_extract!(c, 0);
5644 if k & 1 != 0 {
5645 let extracta: f16 = simd_extract!(a, 0);
5646 let extractb: f16 = simd_extract!(b, 0);
5647 fmadd = vfmaddsh(a:extracta, b:extractb, c:fmadd, ROUNDING);
5648 }
5649 simd_insert!(c, 0, fmadd)
5650 }
5651}
5652
5653/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate
5654/// result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element
5655/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5656/// upper elements of dst.
5657///
5658/// Rounding is done according to the rounding parameter, which can be one of:
5659///
5660/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5661/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5662/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5663/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5664/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5665///
5666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sh)
5667#[inline]
5668#[target_feature(enable = "avx512fp16")]
5669#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
5670#[rustc_legacy_const_generics(4)]
5671#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5672pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
5673 k: __mmask8,
5674 a: __m128h,
5675 b: __m128h,
5676 c: __m128h,
5677) -> __m128h {
5678 unsafe {
5679 static_assert_rounding!(ROUNDING);
5680 let mut fmadd: f16 = 0.0;
5681 if k & 1 != 0 {
5682 let extracta: f16 = simd_extract!(a, 0);
5683 let extractb: f16 = simd_extract!(b, 0);
5684 let extractc: f16 = simd_extract!(c, 0);
5685 fmadd = vfmaddsh(a:extracta, b:extractb, c:extractc, ROUNDING);
5686 }
5687 simd_insert!(a, 0, fmadd)
5688 }
5689}
5690
5691/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5692/// in c from the intermediate result, and store the results in dst.
5693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5694///
5695/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ph)
5696#[inline]
5697#[target_feature(enable = "avx512fp16,avx512vl")]
5698#[cfg_attr(test, assert_instr(vfmsub))]
5699#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5700pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5701 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5702}
5703
5704/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5705/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5706/// from a when the corresponding mask bit is not set).
5707///
5708/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ph)
5709#[inline]
5710#[target_feature(enable = "avx512fp16,avx512vl")]
5711#[cfg_attr(test, assert_instr(vfmsub))]
5712#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5713pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5714 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:a) }
5715}
5716
5717/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5718/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5719/// from c when the corresponding mask bit is not set).
5720///
5721/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ph)
5722#[inline]
5723#[target_feature(enable = "avx512fp16,avx512vl")]
5724#[cfg_attr(test, assert_instr(vfmsub))]
5725#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5726pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
5727 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:c) }
5728}
5729
5730/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5731/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5732/// out when the corresponding mask bit is not set).
5733///
5734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ph)
5735#[inline]
5736#[target_feature(enable = "avx512fp16,avx512vl")]
5737#[cfg_attr(test, assert_instr(vfmsub))]
5738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5739pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5740 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ph(a, b, c), no:_mm_setzero_ph()) }
5741}
5742
5743/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5744/// in c from the intermediate result, and store the results in dst.
5745///
5746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ph)
5747#[inline]
5748#[target_feature(enable = "avx512fp16,avx512vl")]
5749#[cfg_attr(test, assert_instr(vfmsub))]
5750#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5751pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5752 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5753}
5754
5755/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5756/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5757/// from a when the corresponding mask bit is not set).
5758///
5759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsub_ph)
5760#[inline]
5761#[target_feature(enable = "avx512fp16,avx512vl")]
5762#[cfg_attr(test, assert_instr(vfmsub))]
5763#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5764pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
5765 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:a) }
5766}
5767
5768/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5769/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5770/// from c when the corresponding mask bit is not set).
5771///
5772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsub_ph)
5773#[inline]
5774#[target_feature(enable = "avx512fp16,avx512vl")]
5775#[cfg_attr(test, assert_instr(vfmsub))]
5776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5777pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
5778 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:c) }
5779}
5780
5781/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5782/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5783/// out when the corresponding mask bit is not set).
5784///
5785/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsub_ph)
5786#[inline]
5787#[target_feature(enable = "avx512fp16,avx512vl")]
5788#[cfg_attr(test, assert_instr(vfmsub))]
5789#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5790pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
5791 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
5792}
5793
5794/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5795/// in c from the intermediate result, and store the results in dst.
5796///
5797/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ph)
5798#[inline]
5799#[target_feature(enable = "avx512fp16")]
5800#[cfg_attr(test, assert_instr(vfmsub))]
5801#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5802pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5803 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
5804}
5805
5806/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5807/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5808/// from a when the corresponding mask bit is not set).
5809///
5810/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ph)
5811#[inline]
5812#[target_feature(enable = "avx512fp16")]
5813#[cfg_attr(test, assert_instr(vfmsub))]
5814#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5815pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
5816 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:a) }
5817}
5818
5819/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5820/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5821/// from c when the corresponding mask bit is not set).
5822///
5823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ph)
5824#[inline]
5825#[target_feature(enable = "avx512fp16")]
5826#[cfg_attr(test, assert_instr(vfmsub))]
5827#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5828pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
5829 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:c) }
5830}
5831
5832/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5833/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5834/// out when the corresponding mask bit is not set).
5835///
5836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ph)
5837#[inline]
5838#[target_feature(enable = "avx512fp16")]
5839#[cfg_attr(test, assert_instr(vfmsub))]
5840#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5841pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5842 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
5843}
5844
5845/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5846/// in c from the intermediate result, and store the results in dst.
5847///
5848/// Rounding is done according to the rounding parameter, which can be one of:
5849///
5850/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5851/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5852/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5853/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5854/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5855///
5856/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ph)
5857#[inline]
5858#[target_feature(enable = "avx512fp16")]
5859#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5860#[rustc_legacy_const_generics(3)]
5861#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5862pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
5863 unsafe {
5864 static_assert_rounding!(ROUNDING);
5865 vfmaddph_512(a, b, c:simd_neg(c), ROUNDING)
5866 }
5867}
5868
5869/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5870/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5871/// from a when the corresponding mask bit is not set).
5872///
5873/// Rounding is done according to the rounding parameter, which can be one of:
5874///
5875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5880///
5881/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ph)
5882#[inline]
5883#[target_feature(enable = "avx512fp16")]
5884#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5885#[rustc_legacy_const_generics(4)]
5886#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5887pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
5888 a: __m512h,
5889 k: __mmask32,
5890 b: __m512h,
5891 c: __m512h,
5892) -> __m512h {
5893 unsafe {
5894 static_assert_rounding!(ROUNDING);
5895 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:a)
5896 }
5897}
5898
5899/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5900/// in c from the intermediate result, and store the results in dst using writemask k (the element is copied
5901/// from c when the corresponding mask bit is not set).
5902///
5903/// Rounding is done according to the rounding parameter, which can be one of:
5904///
5905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5910///
5911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ph)
5912#[inline]
5913#[target_feature(enable = "avx512fp16")]
5914#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5915#[rustc_legacy_const_generics(4)]
5916#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5917pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
5918 a: __m512h,
5919 b: __m512h,
5920 c: __m512h,
5921 k: __mmask32,
5922) -> __m512h {
5923 unsafe {
5924 static_assert_rounding!(ROUNDING);
5925 simd_select_bitmask(m:k, yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c), no:c)
5926 }
5927}
5928
5929/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
5930/// in c from the intermediate result, and store the results in dst using zeromask k (the element is zeroed
5931/// out when the corresponding mask bit is not set).
5932///
5933/// Rounding is done according to the rounding parameter, which can be one of:
5934///
5935/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
5936/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
5937/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
5938/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
5939/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5940///
5941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ph)
5942#[inline]
5943#[target_feature(enable = "avx512fp16")]
5944#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
5945#[rustc_legacy_const_generics(4)]
5946#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5947pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
5948 k: __mmask32,
5949 a: __m512h,
5950 b: __m512h,
5951 c: __m512h,
5952) -> __m512h {
5953 unsafe {
5954 static_assert_rounding!(ROUNDING);
5955 simd_select_bitmask(
5956 m:k,
5957 yes:_mm512_fmsub_round_ph::<ROUNDING>(a, b, c),
5958 no:_mm512_setzero_ph(),
5959 )
5960 }
5961}
5962
5963/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5964/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
5965/// 7 packed elements from a to the upper elements of dst.
5966///
5967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sh)
5968#[inline]
5969#[target_feature(enable = "avx512fp16")]
5970#[cfg_attr(test, assert_instr(vfmsub))]
5971#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5972pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
5973 unsafe {
5974 let extracta: f16 = simd_extract!(a, 0);
5975 let extractb: f16 = simd_extract!(b, 0);
5976 let extractc: f16 = simd_extract!(c, 0);
5977 let r: f16 = fmaf16(a:extracta, b:extractb, -extractc);
5978 simd_insert!(a, 0, r)
5979 }
5980}
5981
5982/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
5983/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
5984/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
5985/// upper elements of dst.
5986///
5987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sh)
5988#[inline]
5989#[target_feature(enable = "avx512fp16")]
5990#[cfg_attr(test, assert_instr(vfmsub))]
5991#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
5992pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
5993 unsafe {
5994 let mut fmsub: f16 = simd_extract!(a, 0);
5995 if k & 1 != 0 {
5996 let extractb: f16 = simd_extract!(b, 0);
5997 let extractc: f16 = simd_extract!(c, 0);
5998 fmsub = fmaf16(a:fmsub, b:extractb, -extractc);
5999 }
6000 simd_insert!(a, 0, fmsub)
6001 }
6002}
6003
6004/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6005/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6006/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6007/// upper elements of dst.
6008///
6009/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sh)
6010#[inline]
6011#[target_feature(enable = "avx512fp16")]
6012#[cfg_attr(test, assert_instr(vfmsub))]
6013#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6014pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6015 unsafe {
6016 let mut fmsub: f16 = simd_extract!(c, 0);
6017 if k & 1 != 0 {
6018 let extracta: f16 = simd_extract!(a, 0);
6019 let extractb: f16 = simd_extract!(b, 0);
6020 fmsub = fmaf16(a:extracta, b:extractb, -fmsub);
6021 }
6022 simd_insert!(c, 0, fmsub)
6023 }
6024}
6025
6026/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6027/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6028/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6029/// upper elements of dst.
6030///
6031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sh)
6032#[inline]
6033#[target_feature(enable = "avx512fp16")]
6034#[cfg_attr(test, assert_instr(vfmsub))]
6035#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6036pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6037 unsafe {
6038 let mut fmsub: f16 = 0.0;
6039 if k & 1 != 0 {
6040 let extracta: f16 = simd_extract!(a, 0);
6041 let extractb: f16 = simd_extract!(b, 0);
6042 let extractc: f16 = simd_extract!(c, 0);
6043 fmsub = fmaf16(a:extracta, b:extractb, -extractc);
6044 }
6045 simd_insert!(a, 0, fmsub)
6046 }
6047}
6048
6049/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6050/// in c from the intermediate result. Store the result in the lower element of dst, and copy the upper
6051/// 7 packed elements from a to the upper elements of dst.
6052///
6053/// Rounding is done according to the rounding parameter, which can be one of:
6054///
6055/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6056/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6057/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6058/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6059/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6060///
6061/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sh)
6062#[inline]
6063#[target_feature(enable = "avx512fp16")]
6064#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6065#[rustc_legacy_const_generics(3)]
6066#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6067pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6068 unsafe {
6069 static_assert_rounding!(ROUNDING);
6070 let extracta: f16 = simd_extract!(a, 0);
6071 let extractb: f16 = simd_extract!(b, 0);
6072 let extractc: f16 = simd_extract!(c, 0);
6073 let r: f16 = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6074 simd_insert!(a, 0, r)
6075 }
6076}
6077
6078/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6079/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6080/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6081/// upper elements of dst.
6082///
6083/// Rounding is done according to the rounding parameter, which can be one of:
6084///
6085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6090///
6091/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sh)
6092#[inline]
6093#[target_feature(enable = "avx512fp16")]
6094#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6095#[rustc_legacy_const_generics(4)]
6096#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6097pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
6098 a: __m128h,
6099 k: __mmask8,
6100 b: __m128h,
6101 c: __m128h,
6102) -> __m128h {
6103 unsafe {
6104 static_assert_rounding!(ROUNDING);
6105 let mut fmsub: f16 = simd_extract!(a, 0);
6106 if k & 1 != 0 {
6107 let extractb: f16 = simd_extract!(b, 0);
6108 let extractc: f16 = simd_extract!(c, 0);
6109 fmsub = vfmaddsh(a:fmsub, b:extractb, -extractc, ROUNDING);
6110 }
6111 simd_insert!(a, 0, fmsub)
6112 }
6113}
6114
6115/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6116/// in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element
6117/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the
6118/// upper elements of dst.
6119///
6120/// Rounding is done according to the rounding parameter, which can be one of:
6121///
6122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6127///
6128/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sh)
6129#[inline]
6130#[target_feature(enable = "avx512fp16")]
6131#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6132#[rustc_legacy_const_generics(4)]
6133#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6134pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
6135 a: __m128h,
6136 b: __m128h,
6137 c: __m128h,
6138 k: __mmask8,
6139) -> __m128h {
6140 unsafe {
6141 static_assert_rounding!(ROUNDING);
6142 let mut fmsub: f16 = simd_extract!(c, 0);
6143 if k & 1 != 0 {
6144 let extracta: f16 = simd_extract!(a, 0);
6145 let extractb: f16 = simd_extract!(b, 0);
6146 fmsub = vfmaddsh(a:extracta, b:extractb, -fmsub, ROUNDING);
6147 }
6148 simd_insert!(c, 0, fmsub)
6149 }
6150}
6151
6152/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements
6153/// in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element
6154/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
6155/// upper elements of dst.
6156///
6157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sh)
6158#[inline]
6159#[target_feature(enable = "avx512fp16")]
6160#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
6161#[rustc_legacy_const_generics(4)]
6162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6163pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
6164 k: __mmask8,
6165 a: __m128h,
6166 b: __m128h,
6167 c: __m128h,
6168) -> __m128h {
6169 unsafe {
6170 static_assert_rounding!(ROUNDING);
6171 let mut fmsub: f16 = 0.0;
6172 if k & 1 != 0 {
6173 let extracta: f16 = simd_extract!(a, 0);
6174 let extractb: f16 = simd_extract!(b, 0);
6175 let extractc: f16 = simd_extract!(c, 0);
6176 fmsub = vfmaddsh(a:extracta, b:extractb, -extractc, ROUNDING);
6177 }
6178 simd_insert!(a, 0, fmsub)
6179 }
6180}
6181
6182/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6183/// result from packed elements in c, and store the results in dst.
6184///
6185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ph)
6186#[inline]
6187#[target_feature(enable = "avx512fp16,avx512vl")]
6188#[cfg_attr(test, assert_instr(vfnmadd))]
6189#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6190pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6191 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6192}
6193
6194/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6195/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6196/// from a when the corresponding mask bit is not set).
6197///
6198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ph)
6199#[inline]
6200#[target_feature(enable = "avx512fp16,avx512vl")]
6201#[cfg_attr(test, assert_instr(vfnmadd))]
6202#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6203pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6204 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:a) }
6205}
6206
6207/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6208/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6209/// from c when the corresponding mask bit is not set).
6210///
6211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ph)
6212#[inline]
6213#[target_feature(enable = "avx512fp16,avx512vl")]
6214#[cfg_attr(test, assert_instr(vfnmadd))]
6215#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6216pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6217 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:c) }
6218}
6219
6220/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6221/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6222/// out when the corresponding mask bit is not set).
6223///
6224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ph)
6225#[inline]
6226#[target_feature(enable = "avx512fp16,avx512vl")]
6227#[cfg_attr(test, assert_instr(vfnmadd))]
6228#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6229pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6230 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ph(a, b, c), no:_mm_setzero_ph()) }
6231}
6232
6233/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6234/// result from packed elements in c, and store the results in dst.
6235///
6236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ph)
6237#[inline]
6238#[target_feature(enable = "avx512fp16,avx512vl")]
6239#[cfg_attr(test, assert_instr(vfnmadd))]
6240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6241pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6242 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6243}
6244
6245/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6246/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6247/// from a when the corresponding mask bit is not set).
6248///
6249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmadd_ph)
6250#[inline]
6251#[target_feature(enable = "avx512fp16,avx512vl")]
6252#[cfg_attr(test, assert_instr(vfnmadd))]
6253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6254pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6255 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:a) }
6256}
6257
6258/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6259/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6260/// from c when the corresponding mask bit is not set).
6261///
6262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmadd_ph)
6263#[inline]
6264#[target_feature(enable = "avx512fp16,avx512vl")]
6265#[cfg_attr(test, assert_instr(vfnmadd))]
6266#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6267pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6268 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:c) }
6269}
6270
6271/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6272/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6273/// out when the corresponding mask bit is not set).
6274///
6275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmadd_ph)
6276#[inline]
6277#[target_feature(enable = "avx512fp16,avx512vl")]
6278#[cfg_attr(test, assert_instr(vfnmadd))]
6279#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6280pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6281 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ph(a, b, c), no:_mm256_setzero_ph()) }
6282}
6283
6284/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6285/// result from packed elements in c, and store the results in dst.
6286///
6287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ph)
6288#[inline]
6289#[target_feature(enable = "avx512fp16")]
6290#[cfg_attr(test, assert_instr(vfnmadd))]
6291#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6292pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6293 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
6294}
6295
6296/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6297/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6298/// from a when the corresponding mask bit is not set).
6299///
6300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ph)
6301#[inline]
6302#[target_feature(enable = "avx512fp16")]
6303#[cfg_attr(test, assert_instr(vfnmadd))]
6304#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6305pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6306 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:a) }
6307}
6308
6309/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6310/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6311/// from c when the corresponding mask bit is not set).
6312///
6313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ph)
6314#[inline]
6315#[target_feature(enable = "avx512fp16")]
6316#[cfg_attr(test, assert_instr(vfnmadd))]
6317#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6318pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6319 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:c) }
6320}
6321
6322/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6323/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6324/// out when the corresponding mask bit is not set).
6325///
6326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ph)
6327#[inline]
6328#[target_feature(enable = "avx512fp16")]
6329#[cfg_attr(test, assert_instr(vfnmadd))]
6330#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6331pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6332 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ph(a, b, c), no:_mm512_setzero_ph()) }
6333}
6334
6335/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6336/// result from packed elements in c, and store the results in dst.
6337///
6338/// Rounding is done according to the rounding parameter, which can be one of:
6339///
6340/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6341/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6342/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6343/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6344/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6345///
6346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ph)
6347#[inline]
6348#[target_feature(enable = "avx512fp16")]
6349#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6350#[rustc_legacy_const_generics(3)]
6351#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6352pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6353 unsafe {
6354 static_assert_rounding!(ROUNDING);
6355 vfmaddph_512(a:simd_neg(a), b, c, ROUNDING)
6356 }
6357}
6358
6359/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6360/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6361/// from a when the corresponding mask bit is not set).
6362///
6363/// Rounding is done according to the rounding parameter, which can be one of:
6364///
6365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6370///
6371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ph)
6372#[inline]
6373#[target_feature(enable = "avx512fp16")]
6374#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6375#[rustc_legacy_const_generics(4)]
6376#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6377pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
6378 a: __m512h,
6379 k: __mmask32,
6380 b: __m512h,
6381 c: __m512h,
6382) -> __m512h {
6383 unsafe {
6384 static_assert_rounding!(ROUNDING);
6385 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:a)
6386 }
6387}
6388
6389/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6390/// result from packed elements in c, and store the results in dst using writemask k (the element is copied
6391/// from c when the corresponding mask bit is not set).
6392///
6393/// Rounding is done according to the rounding parameter, which can be one of:
6394///
6395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6400///
6401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ph)
6402#[inline]
6403#[target_feature(enable = "avx512fp16")]
6404#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6405#[rustc_legacy_const_generics(4)]
6406#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6407pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
6408 a: __m512h,
6409 b: __m512h,
6410 c: __m512h,
6411 k: __mmask32,
6412) -> __m512h {
6413 unsafe {
6414 static_assert_rounding!(ROUNDING);
6415 simd_select_bitmask(m:k, yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c), no:c)
6416 }
6417}
6418
6419/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate
6420/// result from packed elements in c, and store the results in dst using zeromask k (the element is zeroed
6421/// out when the corresponding mask bit is not set).
6422///
6423/// Rounding is done according to the rounding parameter, which can be one of:
6424///
6425/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6426/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6427/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6428/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6429/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6430///
6431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ph)
6432#[inline]
6433#[target_feature(enable = "avx512fp16")]
6434#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6435#[rustc_legacy_const_generics(4)]
6436#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6437pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
6438 k: __mmask32,
6439 a: __m512h,
6440 b: __m512h,
6441 c: __m512h,
6442) -> __m512h {
6443 unsafe {
6444 static_assert_rounding!(ROUNDING);
6445 simd_select_bitmask(
6446 m:k,
6447 yes:_mm512_fnmadd_round_ph::<ROUNDING>(a, b, c),
6448 no:_mm512_setzero_ph(),
6449 )
6450 }
6451}
6452
6453/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6454/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6455/// elements from a to the upper elements of dst.
6456///
6457/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sh)
6458#[inline]
6459#[target_feature(enable = "avx512fp16")]
6460#[cfg_attr(test, assert_instr(vfnmadd))]
6461#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6462pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6463 unsafe {
6464 let extracta: f16 = simd_extract!(a, 0);
6465 let extractb: f16 = simd_extract!(b, 0);
6466 let extractc: f16 = simd_extract!(c, 0);
6467 let r: f16 = fmaf16(-extracta, b:extractb, c:extractc);
6468 simd_insert!(a, 0, r)
6469 }
6470}
6471
6472/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6473/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6474/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6475/// elements of dst.
6476///
6477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sh)
6478#[inline]
6479#[target_feature(enable = "avx512fp16")]
6480#[cfg_attr(test, assert_instr(vfnmadd))]
6481#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6482pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6483 unsafe {
6484 let mut fnmadd: f16 = simd_extract!(a, 0);
6485 if k & 1 != 0 {
6486 let extractb: f16 = simd_extract!(b, 0);
6487 let extractc: f16 = simd_extract!(c, 0);
6488 fnmadd = fmaf16(-fnmadd, b:extractb, c:extractc);
6489 }
6490 simd_insert!(a, 0, fnmadd)
6491 }
6492}
6493
6494/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6495/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6496/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6497/// elements of dst.
6498///
6499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sh)
6500#[inline]
6501#[target_feature(enable = "avx512fp16")]
6502#[cfg_attr(test, assert_instr(vfnmadd))]
6503#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6504pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6505 unsafe {
6506 let mut fnmadd: f16 = simd_extract!(c, 0);
6507 if k & 1 != 0 {
6508 let extracta: f16 = simd_extract!(a, 0);
6509 let extractb: f16 = simd_extract!(b, 0);
6510 fnmadd = fmaf16(-extracta, b:extractb, c:fnmadd);
6511 }
6512 simd_insert!(c, 0, fnmadd)
6513 }
6514}
6515
6516/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6517/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6518/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6519/// elements of dst.
6520///
6521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sh)
6522#[inline]
6523#[target_feature(enable = "avx512fp16")]
6524#[cfg_attr(test, assert_instr(vfnmadd))]
6525#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6526pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6527 unsafe {
6528 let mut fnmadd: f16 = 0.0;
6529 if k & 1 != 0 {
6530 let extracta: f16 = simd_extract!(a, 0);
6531 let extractb: f16 = simd_extract!(b, 0);
6532 let extractc: f16 = simd_extract!(c, 0);
6533 fnmadd = fmaf16(-extracta, b:extractb, c:extractc);
6534 }
6535 simd_insert!(a, 0, fnmadd)
6536 }
6537}
6538
6539/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6540/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6541/// elements from a to the upper elements of dst.
6542///
6543/// Rounding is done according to the rounding parameter, which can be one of:
6544///
6545/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6546/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6547/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6548/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6549/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6550///
6551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sh)
6552#[inline]
6553#[target_feature(enable = "avx512fp16")]
6554#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6555#[rustc_legacy_const_generics(3)]
6556#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6557pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6558 unsafe {
6559 static_assert_rounding!(ROUNDING);
6560 let extracta: f16 = simd_extract!(a, 0);
6561 let extractb: f16 = simd_extract!(b, 0);
6562 let extractc: f16 = simd_extract!(c, 0);
6563 let r: f16 = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6564 simd_insert!(a, 0, r)
6565 }
6566}
6567
6568/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6569/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6570/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6571/// elements of dst.
6572///
6573/// Rounding is done according to the rounding parameter, which can be one of:
6574///
6575/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6576/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6577/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6578/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6579/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6580///
6581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sh)
6582#[inline]
6583#[target_feature(enable = "avx512fp16")]
6584#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6585#[rustc_legacy_const_generics(4)]
6586#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6587pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
6588 a: __m128h,
6589 k: __mmask8,
6590 b: __m128h,
6591 c: __m128h,
6592) -> __m128h {
6593 unsafe {
6594 static_assert_rounding!(ROUNDING);
6595 let mut fnmadd: f16 = simd_extract!(a, 0);
6596 if k & 1 != 0 {
6597 let extractb: f16 = simd_extract!(b, 0);
6598 let extractc: f16 = simd_extract!(c, 0);
6599 fnmadd = vfmaddsh(-fnmadd, b:extractb, c:extractc, ROUNDING);
6600 }
6601 simd_insert!(a, 0, fnmadd)
6602 }
6603}
6604
6605/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6606/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6607/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6608/// elements of dst.
6609///
6610/// Rounding is done according to the rounding parameter, which can be one of:
6611///
6612/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6613/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6614/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6615/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6616/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6617///
6618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sh)
6619#[inline]
6620#[target_feature(enable = "avx512fp16")]
6621#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6622#[rustc_legacy_const_generics(4)]
6623#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6624pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
6625 a: __m128h,
6626 b: __m128h,
6627 c: __m128h,
6628 k: __mmask8,
6629) -> __m128h {
6630 unsafe {
6631 static_assert_rounding!(ROUNDING);
6632 let mut fnmadd: f16 = simd_extract!(c, 0);
6633 if k & 1 != 0 {
6634 let extracta: f16 = simd_extract!(a, 0);
6635 let extractb: f16 = simd_extract!(b, 0);
6636 fnmadd = vfmaddsh(-extracta, b:extractb, c:fnmadd, ROUNDING);
6637 }
6638 simd_insert!(c, 0, fnmadd)
6639 }
6640}
6641
6642/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6643/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
6644/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6645/// elements of dst.
6646///
6647/// Rounding is done according to the rounding parameter, which can be one of:
6648///
6649/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6650/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6651/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6652/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6654///
6655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sh)
6656#[inline]
6657#[target_feature(enable = "avx512fp16")]
6658#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
6659#[rustc_legacy_const_generics(4)]
6660#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6661pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
6662 k: __mmask8,
6663 a: __m128h,
6664 b: __m128h,
6665 c: __m128h,
6666) -> __m128h {
6667 unsafe {
6668 static_assert_rounding!(ROUNDING);
6669 let mut fnmadd: f16 = 0.0;
6670 if k & 1 != 0 {
6671 let extracta: f16 = simd_extract!(a, 0);
6672 let extractb: f16 = simd_extract!(b, 0);
6673 let extractc: f16 = simd_extract!(c, 0);
6674 fnmadd = vfmaddsh(-extracta, b:extractb, c:extractc, ROUNDING);
6675 }
6676 simd_insert!(a, 0, fnmadd)
6677 }
6678}
6679
6680/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6681/// in c from the negated intermediate result, and store the results in dst.
6682///
6683/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ph)
6684#[inline]
6685#[target_feature(enable = "avx512fp16,avx512vl")]
6686#[cfg_attr(test, assert_instr(vfnmsub))]
6687#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6688pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6689 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6690}
6691
6692/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6693/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6694/// copied from a when the corresponding mask bit is not set).
6695///
6696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ph)
6697#[inline]
6698#[target_feature(enable = "avx512fp16,avx512vl")]
6699#[cfg_attr(test, assert_instr(vfnmsub))]
6700#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6701pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6702 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:a) }
6703}
6704
6705/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6706/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6707/// copied from c when the corresponding mask bit is not set).
6708///
6709/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ph)
6710#[inline]
6711#[target_feature(enable = "avx512fp16,avx512vl")]
6712#[cfg_attr(test, assert_instr(vfnmsub))]
6713#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6714pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
6715 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:c) }
6716}
6717
6718/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6719/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6720/// zeroed out when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ph)
6723#[inline]
6724#[target_feature(enable = "avx512fp16,avx512vl")]
6725#[cfg_attr(test, assert_instr(vfnmsub))]
6726#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6727pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6728 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ph(a, b, c), no:_mm_setzero_ph()) }
6729}
6730
6731/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6732/// in c from the negated intermediate result, and store the results in dst.
6733///
6734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ph)
6735#[inline]
6736#[target_feature(enable = "avx512fp16,avx512vl")]
6737#[cfg_attr(test, assert_instr(vfnmsub))]
6738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6739pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6740 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6741}
6742
6743/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6744/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6745/// copied from a when the corresponding mask bit is not set).
6746///
6747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fnmsub_ph)
6748#[inline]
6749#[target_feature(enable = "avx512fp16,avx512vl")]
6750#[cfg_attr(test, assert_instr(vfnmsub))]
6751#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6752pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
6753 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:a) }
6754}
6755
6756/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6757/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6758/// copied from c when the corresponding mask bit is not set).
6759///
6760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fnmsub_ph)
6761#[inline]
6762#[target_feature(enable = "avx512fp16,avx512vl")]
6763#[cfg_attr(test, assert_instr(vfnmsub))]
6764#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6765pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
6766 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:c) }
6767}
6768
6769/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6770/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6771/// zeroed out when the corresponding mask bit is not set).
6772///
6773/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fnmsub_ph)
6774#[inline]
6775#[target_feature(enable = "avx512fp16,avx512vl")]
6776#[cfg_attr(test, assert_instr(vfnmsub))]
6777#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6778pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
6779 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ph(a, b, c), no:_mm256_setzero_ph()) }
6780}
6781
6782/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6783/// in c from the negated intermediate result, and store the results in dst.
6784///
6785/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ph)
6786#[inline]
6787#[target_feature(enable = "avx512fp16")]
6788#[cfg_attr(test, assert_instr(vfnmsub))]
6789#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6790pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6791 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
6792}
6793
6794/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6795/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6796/// copied from a when the corresponding mask bit is not set).
6797///
6798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ph)
6799#[inline]
6800#[target_feature(enable = "avx512fp16")]
6801#[cfg_attr(test, assert_instr(vfnmsub))]
6802#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6803pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
6804 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:a) }
6805}
6806
6807/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6808/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6809/// copied from c when the corresponding mask bit is not set).
6810///
6811/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ph)
6812#[inline]
6813#[target_feature(enable = "avx512fp16")]
6814#[cfg_attr(test, assert_instr(vfnmsub))]
6815#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6816pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
6817 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:c) }
6818}
6819
6820/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6821/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6822/// zeroed out when the corresponding mask bit is not set).
6823///
6824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ph)
6825#[inline]
6826#[target_feature(enable = "avx512fp16")]
6827#[cfg_attr(test, assert_instr(vfnmsub))]
6828#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6829pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6830 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ph(a, b, c), no:_mm512_setzero_ph()) }
6831}
6832
6833/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6834/// in c from the negated intermediate result, and store the results in dst.
6835///
6836/// Rounding is done according to the rounding parameter, which can be one of:
6837///
6838/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6839/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6840/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6841/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6842/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6843///
6844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ph)
6845#[inline]
6846#[target_feature(enable = "avx512fp16")]
6847#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6848#[rustc_legacy_const_generics(3)]
6849#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6850pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
6851 unsafe {
6852 static_assert_rounding!(ROUNDING);
6853 vfmaddph_512(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
6854 }
6855}
6856
6857/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6858/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6859/// copied from a when the corresponding mask bit is not set).
6860///
6861/// Rounding is done according to the rounding parameter, which can be one of:
6862///
6863/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6864/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6865/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6866/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6867/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6868///
6869/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ph)
6870#[inline]
6871#[target_feature(enable = "avx512fp16")]
6872#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6873#[rustc_legacy_const_generics(4)]
6874#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6875pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
6876 a: __m512h,
6877 k: __mmask32,
6878 b: __m512h,
6879 c: __m512h,
6880) -> __m512h {
6881 unsafe {
6882 static_assert_rounding!(ROUNDING);
6883 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:a)
6884 }
6885}
6886
6887/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6888/// in c from the negated intermediate result, and store the results in dst using writemask k (the element is
6889/// copied from c when the corresponding mask bit is not set).
6890///
6891/// Rounding is done according to the rounding parameter, which can be one of:
6892///
6893/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6894/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6895/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6896/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6897/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6898///
6899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ph)
6900#[inline]
6901#[target_feature(enable = "avx512fp16")]
6902#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6903#[rustc_legacy_const_generics(4)]
6904#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6905pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
6906 a: __m512h,
6907 b: __m512h,
6908 c: __m512h,
6909 k: __mmask32,
6910) -> __m512h {
6911 unsafe {
6912 static_assert_rounding!(ROUNDING);
6913 simd_select_bitmask(m:k, yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c), no:c)
6914 }
6915}
6916
6917/// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements
6918/// in c from the negated intermediate result, and store the results in dst using zeromask k (the element is
6919/// zeroed out when the corresponding mask bit is not set).
6920///
6921/// Rounding is done according to the rounding parameter, which can be one of:
6922///
6923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
6924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
6925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
6926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
6927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6928///
6929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ph)
6930#[inline]
6931#[target_feature(enable = "avx512fp16")]
6932#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
6933#[rustc_legacy_const_generics(4)]
6934#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6935pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
6936 k: __mmask32,
6937 a: __m512h,
6938 b: __m512h,
6939 c: __m512h,
6940) -> __m512h {
6941 unsafe {
6942 static_assert_rounding!(ROUNDING);
6943 simd_select_bitmask(
6944 m:k,
6945 yes:_mm512_fnmsub_round_ph::<ROUNDING>(a, b, c),
6946 no:_mm512_setzero_ph(),
6947 )
6948 }
6949}
6950
6951/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6952/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
6953/// elements from a to the upper elements of dst.
6954///
6955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sh)
6956#[inline]
6957#[target_feature(enable = "avx512fp16")]
6958#[cfg_attr(test, assert_instr(vfnmsub))]
6959#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6960pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
6961 unsafe {
6962 let extracta: f16 = simd_extract!(a, 0);
6963 let extractb: f16 = simd_extract!(b, 0);
6964 let extractc: f16 = simd_extract!(c, 0);
6965 let r: f16 = fmaf16(-extracta, b:extractb, -extractc);
6966 simd_insert!(a, 0, r)
6967 }
6968}
6969
6970/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6971/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6972/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
6973/// elements of dst.
6974///
6975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sh)
6976#[inline]
6977#[target_feature(enable = "avx512fp16")]
6978#[cfg_attr(test, assert_instr(vfnmsub))]
6979#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
6980pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
6981 unsafe {
6982 let mut fnmsub: f16 = simd_extract!(a, 0);
6983 if k & 1 != 0 {
6984 let extractb: f16 = simd_extract!(b, 0);
6985 let extractc: f16 = simd_extract!(c, 0);
6986 fnmsub = fmaf16(-fnmsub, b:extractb, -extractc);
6987 }
6988 simd_insert!(a, 0, fnmsub)
6989 }
6990}
6991
6992/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
6993/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
6994/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
6995/// elements of dst.
6996///
6997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sh)
6998#[inline]
6999#[target_feature(enable = "avx512fp16")]
7000#[cfg_attr(test, assert_instr(vfnmsub))]
7001#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7002pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7003 unsafe {
7004 let mut fnmsub: f16 = simd_extract!(c, 0);
7005 if k & 1 != 0 {
7006 let extracta: f16 = simd_extract!(a, 0);
7007 let extractb: f16 = simd_extract!(b, 0);
7008 fnmsub = fmaf16(-extracta, b:extractb, -fnmsub);
7009 }
7010 simd_insert!(c, 0, fnmsub)
7011 }
7012}
7013
7014/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7015/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7016/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7017/// elements of dst.
7018///
7019/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sh)
7020#[inline]
7021#[target_feature(enable = "avx512fp16")]
7022#[cfg_attr(test, assert_instr(vfnmsub))]
7023#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7024pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7025 unsafe {
7026 let mut fnmsub: f16 = 0.0;
7027 if k & 1 != 0 {
7028 let extracta: f16 = simd_extract!(a, 0);
7029 let extractb: f16 = simd_extract!(b, 0);
7030 let extractc: f16 = simd_extract!(c, 0);
7031 fnmsub = fmaf16(-extracta, b:extractb, -extractc);
7032 }
7033 simd_insert!(a, 0, fnmsub)
7034 }
7035}
7036
7037/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7038/// result from the lower element in c. Store the result in the lower element of dst, and copy the upper 7 packed
7039/// elements from a to the upper elements of dst.
7040///
7041/// Rounding is done according to the rounding parameter, which can be one of:
7042///
7043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7048///
7049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sh)
7050#[inline]
7051#[target_feature(enable = "avx512fp16")]
7052#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7053#[rustc_legacy_const_generics(3)]
7054#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7055pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7056 unsafe {
7057 static_assert_rounding!(ROUNDING);
7058 let extracta: f16 = simd_extract!(a, 0);
7059 let extractb: f16 = simd_extract!(b, 0);
7060 let extractc: f16 = simd_extract!(c, 0);
7061 let r: f16 = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7062 simd_insert!(a, 0, r)
7063 }
7064}
7065
7066/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7067/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7068/// is copied from a when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7069/// elements of dst.
7070///
7071/// Rounding is done according to the rounding parameter, which can be one of:
7072///
7073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7078///
7079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sh)
7080#[inline]
7081#[target_feature(enable = "avx512fp16")]
7082#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7083#[rustc_legacy_const_generics(4)]
7084#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7085pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
7086 a: __m128h,
7087 k: __mmask8,
7088 b: __m128h,
7089 c: __m128h,
7090) -> __m128h {
7091 unsafe {
7092 static_assert_rounding!(ROUNDING);
7093 let mut fnmsub: f16 = simd_extract!(a, 0);
7094 if k & 1 != 0 {
7095 let extractb: f16 = simd_extract!(b, 0);
7096 let extractc: f16 = simd_extract!(c, 0);
7097 fnmsub = vfmaddsh(-fnmsub, b:extractb, -extractc, ROUNDING);
7098 }
7099 simd_insert!(a, 0, fnmsub)
7100 }
7101}
7102
7103/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7104/// result from the lower element in c. Store the result in the lower element of dst using writemask k (the element
7105/// is copied from c when the mask bit 0 is not set), and copy the upper 7 packed elements from c to the upper
7106/// elements of dst.
7107///
7108/// Rounding is done according to the rounding parameter, which can be one of:
7109///
7110/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7111/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7112/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7113/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7114/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7115///
7116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sh)
7117#[inline]
7118#[target_feature(enable = "avx512fp16")]
7119#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7120#[rustc_legacy_const_generics(4)]
7121#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7122pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
7123 a: __m128h,
7124 b: __m128h,
7125 c: __m128h,
7126 k: __mmask8,
7127) -> __m128h {
7128 unsafe {
7129 static_assert_rounding!(ROUNDING);
7130 let mut fnmsub: f16 = simd_extract!(c, 0);
7131 if k & 1 != 0 {
7132 let extracta: f16 = simd_extract!(a, 0);
7133 let extractb: f16 = simd_extract!(b, 0);
7134 fnmsub = vfmaddsh(-extracta, b:extractb, -fnmsub, ROUNDING);
7135 }
7136 simd_insert!(c, 0, fnmsub)
7137 }
7138}
7139
7140/// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate
7141/// result from the lower element in c. Store the result in the lower element of dst using zeromask k (the element
7142/// is zeroed out when the mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
7143/// elements of dst.
7144///
7145/// Rounding is done according to the rounding parameter, which can be one of:
7146///
7147/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7148/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7149/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7150/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7151/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7152///
7153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sh)
7154#[inline]
7155#[target_feature(enable = "avx512fp16")]
7156#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
7157#[rustc_legacy_const_generics(4)]
7158#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7159pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
7160 k: __mmask8,
7161 a: __m128h,
7162 b: __m128h,
7163 c: __m128h,
7164) -> __m128h {
7165 unsafe {
7166 static_assert_rounding!(ROUNDING);
7167 let mut fnmsub: f16 = 0.0;
7168 if k & 1 != 0 {
7169 let extracta: f16 = simd_extract!(a, 0);
7170 let extractb: f16 = simd_extract!(b, 0);
7171 let extractc: f16 = simd_extract!(c, 0);
7172 fnmsub = vfmaddsh(-extracta, b:extractb, -extractc, ROUNDING);
7173 }
7174 simd_insert!(a, 0, fnmsub)
7175 }
7176}
7177
7178/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7179/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7180///
7181/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ph)
7182#[inline]
7183#[target_feature(enable = "avx512fp16,avx512vl")]
7184#[cfg_attr(test, assert_instr(vfmaddsub))]
7185#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7186pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7187 unsafe {
7188 let add: __m128h = simd_fma(x:a, y:b, z:c);
7189 let sub: __m128h = simd_fma(x:a, y:b, z:simd_neg(c));
7190 simd_shuffle!(sub, add, [0, 9, 2, 11, 4, 13, 6, 15])
7191 }
7192}
7193
7194/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7195/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7196/// (the element is copied from a when the corresponding mask bit is not set).
7197///
7198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmaddsub_ph)
7199#[inline]
7200#[target_feature(enable = "avx512fp16,avx512vl")]
7201#[cfg_attr(test, assert_instr(vfmaddsub))]
7202#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7203pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7204 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:a) }
7205}
7206
7207/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7208/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7209/// (the element is copied from c when the corresponding mask bit is not set).
7210///
7211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmaddsub_ph)
7212#[inline]
7213#[target_feature(enable = "avx512fp16,avx512vl")]
7214#[cfg_attr(test, assert_instr(vfmaddsub))]
7215#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7216pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7217 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:c) }
7218}
7219
7220/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7221/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7222/// (the element is zeroed out when the corresponding mask bit is not set).
7223///
7224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ph)
7225#[inline]
7226#[target_feature(enable = "avx512fp16,avx512vl")]
7227#[cfg_attr(test, assert_instr(vfmaddsub))]
7228#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7229pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7230 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ph(a, b, c), no:_mm_setzero_ph()) }
7231}
7232
7233/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7234/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7235///
7236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ph)
7237#[inline]
7238#[target_feature(enable = "avx512fp16,avx512vl")]
7239#[cfg_attr(test, assert_instr(vfmaddsub))]
7240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7241pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7242 unsafe {
7243 let add: __m256h = simd_fma(x:a, y:b, z:c);
7244 let sub: __m256h = simd_fma(x:a, y:b, z:simd_neg(c));
7245 simd_shuffle!(
7246 sub,
7247 add,
7248 [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
7249 )
7250 }
7251}
7252
7253/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7254/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7255/// (the element is copied from a when the corresponding mask bit is not set).
7256///
7257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmaddsub_ph)
7258#[inline]
7259#[target_feature(enable = "avx512fp16,avx512vl")]
7260#[cfg_attr(test, assert_instr(vfmaddsub))]
7261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7262pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7263 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:a) }
7264}
7265
7266/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7267/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7268/// (the element is copied from c when the corresponding mask bit is not set).
7269///
7270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmaddsub_ph)
7271#[inline]
7272#[target_feature(enable = "avx512fp16,avx512vl")]
7273#[cfg_attr(test, assert_instr(vfmaddsub))]
7274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7275pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7276 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:c) }
7277}
7278
7279/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7280/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7281/// (the element is zeroed out when the corresponding mask bit is not set).
7282///
7283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmaddsub_ph)
7284#[inline]
7285#[target_feature(enable = "avx512fp16,avx512vl")]
7286#[cfg_attr(test, assert_instr(vfmaddsub))]
7287#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7288pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7289 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ph(a, b, c), no:_mm256_setzero_ph()) }
7290}
7291
7292/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7293/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7294///
7295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ph)
7296#[inline]
7297#[target_feature(enable = "avx512fp16")]
7298#[cfg_attr(test, assert_instr(vfmaddsub))]
7299#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7300pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7301 unsafe {
7302 let add: __m512h = simd_fma(x:a, y:b, z:c);
7303 let sub: __m512h = simd_fma(x:a, y:b, z:simd_neg(c));
7304 simd_shuffle!(
7305 sub,
7306 add,
7307 [
7308 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, 20, 53,
7309 22, 55, 24, 57, 26, 59, 28, 61, 30, 63
7310 ]
7311 )
7312 }
7313}
7314
7315/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7316/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7317/// (the element is copied from a when the corresponding mask bit is not set).
7318///
7319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ph)
7320#[inline]
7321#[target_feature(enable = "avx512fp16")]
7322#[cfg_attr(test, assert_instr(vfmaddsub))]
7323#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7324pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7325 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:a) }
7326}
7327
7328/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7329/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7330/// (the element is copied from c when the corresponding mask bit is not set).
7331///
7332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ph)
7333#[inline]
7334#[target_feature(enable = "avx512fp16")]
7335#[cfg_attr(test, assert_instr(vfmaddsub))]
7336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7337pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7338 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:c) }
7339}
7340
7341/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7342/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7343/// (the element is zeroed out when the corresponding mask bit is not set).
7344///
7345/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ph)
7346#[inline]
7347#[target_feature(enable = "avx512fp16")]
7348#[cfg_attr(test, assert_instr(vfmaddsub))]
7349#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7350pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7351 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ph(a, b, c), no:_mm512_setzero_ph()) }
7352}
7353
7354/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7355/// subtract packed elements in c to/from the intermediate result, and store the results in dst.
7356///
7357/// Rounding is done according to the rounding parameter, which can be one of:
7358///
7359/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7360/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7361/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7362/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7363/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7364///
7365/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ph)
7366#[inline]
7367#[target_feature(enable = "avx512fp16")]
7368#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7369#[rustc_legacy_const_generics(3)]
7370#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7371pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
7372 a: __m512h,
7373 b: __m512h,
7374 c: __m512h,
7375) -> __m512h {
7376 unsafe {
7377 static_assert_rounding!(ROUNDING);
7378 vfmaddsubph_512(a, b, c, ROUNDING)
7379 }
7380}
7381
7382/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7383/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7384/// (the element is copied from a when the corresponding mask bit is not set).
7385///
7386/// Rounding is done according to the rounding parameter, which can be one of:
7387///
7388/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7389/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7390/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7391/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7392/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7393///
7394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ph)
7395#[inline]
7396#[target_feature(enable = "avx512fp16")]
7397#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7398#[rustc_legacy_const_generics(4)]
7399#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7400pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
7401 a: __m512h,
7402 k: __mmask32,
7403 b: __m512h,
7404 c: __m512h,
7405) -> __m512h {
7406 unsafe {
7407 static_assert_rounding!(ROUNDING);
7408 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:a)
7409 }
7410}
7411
7412/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7413/// subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7414/// (the element is copied from c when the corresponding mask bit is not set).
7415///
7416/// Rounding is done according to the rounding parameter, which can be one of:
7417///
7418/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7419/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7420/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7421/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7422/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7423///
7424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ph)
7425#[inline]
7426#[target_feature(enable = "avx512fp16")]
7427#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7428#[rustc_legacy_const_generics(4)]
7429#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7430pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
7431 a: __m512h,
7432 b: __m512h,
7433 c: __m512h,
7434 k: __mmask32,
7435) -> __m512h {
7436 unsafe {
7437 static_assert_rounding!(ROUNDING);
7438 simd_select_bitmask(m:k, yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c), no:c)
7439 }
7440}
7441
7442/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and
7443/// subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7444/// (the element is zeroed out when the corresponding mask bit is not set).
7445///
7446/// Rounding is done according to the rounding parameter, which can be one of:
7447///
7448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7453///
7454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ph)
7455#[inline]
7456#[target_feature(enable = "avx512fp16")]
7457#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
7458#[rustc_legacy_const_generics(4)]
7459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7460pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
7461 k: __mmask32,
7462 a: __m512h,
7463 b: __m512h,
7464 c: __m512h,
7465) -> __m512h {
7466 unsafe {
7467 static_assert_rounding!(ROUNDING);
7468 simd_select_bitmask(
7469 m:k,
7470 yes:_mm512_fmaddsub_round_ph::<ROUNDING>(a, b, c),
7471 no:_mm512_setzero_ph(),
7472 )
7473 }
7474}
7475
7476/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7477/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7478///
7479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ph)
7480#[inline]
7481#[target_feature(enable = "avx512fp16,avx512vl")]
7482#[cfg_attr(test, assert_instr(vfmsubadd))]
7483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7484pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7485 _mm_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7486}
7487
7488/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7489/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7490/// (the element is copied from a when the corresponding mask bit is not set).
7491///
7492/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsubadd_ph)
7493#[inline]
7494#[target_feature(enable = "avx512fp16,avx512vl")]
7495#[cfg_attr(test, assert_instr(vfmsubadd))]
7496#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7497pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
7498 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:a) }
7499}
7500
7501/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7502/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7503/// (the element is copied from c when the corresponding mask bit is not set).
7504///
7505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsubadd_ph)
7506#[inline]
7507#[target_feature(enable = "avx512fp16,avx512vl")]
7508#[cfg_attr(test, assert_instr(vfmsubadd))]
7509#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7510pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
7511 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:c) }
7512}
7513
7514/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7515/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7516/// (the element is zeroed out when the corresponding mask bit is not set).
7517///
7518/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsubadd_ph)
7519#[inline]
7520#[target_feature(enable = "avx512fp16,avx512vl")]
7521#[cfg_attr(test, assert_instr(vfmsubadd))]
7522#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7523pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
7524 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ph(a, b, c), no:_mm_setzero_ph()) }
7525}
7526
7527/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7528/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7529///
7530/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ph)
7531#[inline]
7532#[target_feature(enable = "avx512fp16,avx512vl")]
7533#[cfg_attr(test, assert_instr(vfmsubadd))]
7534#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7535pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7536 _mm256_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7537}
7538
7539/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7540/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7541/// (the element is copied from a when the corresponding mask bit is not set).
7542///
7543/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fmsubadd_ph)
7544#[inline]
7545#[target_feature(enable = "avx512fp16,avx512vl")]
7546#[cfg_attr(test, assert_instr(vfmsubadd))]
7547#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7548pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
7549 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:a) }
7550}
7551
7552/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7553/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7554/// (the element is copied from c when the corresponding mask bit is not set).
7555///
7556/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask3_fmsubadd_ph)
7557#[inline]
7558#[target_feature(enable = "avx512fp16,avx512vl")]
7559#[cfg_attr(test, assert_instr(vfmsubadd))]
7560#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7561pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
7562 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:c) }
7563}
7564
7565/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7566/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7567/// (the element is zeroed out when the corresponding mask bit is not set).
7568///
7569/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fmsubadd_ph)
7570#[inline]
7571#[target_feature(enable = "avx512fp16,avx512vl")]
7572#[cfg_attr(test, assert_instr(vfmsubadd))]
7573#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7574pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
7575 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ph(a, b, c), no:_mm256_setzero_ph()) }
7576}
7577
7578/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7579/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7580///
7581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ph)
7582#[inline]
7583#[target_feature(enable = "avx512fp16")]
7584#[cfg_attr(test, assert_instr(vfmsubadd))]
7585#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7586pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7587 _mm512_fmaddsub_ph(a, b, c:unsafe { simd_neg(c) })
7588}
7589
7590/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7591/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7592/// (the element is copied from a when the corresponding mask bit is not set).
7593///
7594/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ph)
7595#[inline]
7596#[target_feature(enable = "avx512fp16")]
7597#[cfg_attr(test, assert_instr(vfmsubadd))]
7598#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7599pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
7600 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:a) }
7601}
7602
7603/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7604/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7605/// (the element is copied from c when the corresponding mask bit is not set).
7606///
7607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ph)
7608#[inline]
7609#[target_feature(enable = "avx512fp16")]
7610#[cfg_attr(test, assert_instr(vfmsubadd))]
7611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7612pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
7613 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:c) }
7614}
7615
7616/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7617/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7618/// (the element is zeroed out when the corresponding mask bit is not set).
7619///
7620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ph)
7621#[inline]
7622#[target_feature(enable = "avx512fp16")]
7623#[cfg_attr(test, assert_instr(vfmsubadd))]
7624#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7625pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
7626 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ph(a, b, c), no:_mm512_setzero_ph()) }
7627}
7628
7629/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7630/// and add packed elements in c to/from the intermediate result, and store the results in dst.
7631///
7632/// Rounding is done according to the rounding parameter, which can be one of:
7633///
7634/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7635/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7636/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7637/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7638/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7639///
7640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ph)
7641#[inline]
7642#[target_feature(enable = "avx512fp16")]
7643#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7644#[rustc_legacy_const_generics(3)]
7645#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7646pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
7647 a: __m512h,
7648 b: __m512h,
7649 c: __m512h,
7650) -> __m512h {
7651 unsafe {
7652 static_assert_rounding!(ROUNDING);
7653 vfmaddsubph_512(a, b, c:simd_neg(c), ROUNDING)
7654 }
7655}
7656
7657/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7658/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7659/// (the element is copied from a when the corresponding mask bit is not set).
7660///
7661/// Rounding is done according to the rounding parameter, which can be one of:
7662///
7663/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7664/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7665/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7666/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7667/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7668///
7669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ph)
7670#[inline]
7671#[target_feature(enable = "avx512fp16")]
7672#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7673#[rustc_legacy_const_generics(4)]
7674#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7675pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
7676 a: __m512h,
7677 k: __mmask32,
7678 b: __m512h,
7679 c: __m512h,
7680) -> __m512h {
7681 unsafe {
7682 static_assert_rounding!(ROUNDING);
7683 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:a)
7684 }
7685}
7686
7687/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7688/// and add packed elements in c to/from the intermediate result, and store the results in dst using writemask k
7689/// (the element is copied from c when the corresponding mask bit is not set).
7690///
7691/// Rounding is done according to the rounding parameter, which can be one of:
7692///
7693/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7694/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7695/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7696/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7697/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7698///
7699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ph)
7700#[inline]
7701#[target_feature(enable = "avx512fp16")]
7702#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7703#[rustc_legacy_const_generics(4)]
7704#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7705pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
7706 a: __m512h,
7707 b: __m512h,
7708 c: __m512h,
7709 k: __mmask32,
7710) -> __m512h {
7711 unsafe {
7712 static_assert_rounding!(ROUNDING);
7713 simd_select_bitmask(m:k, yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c), no:c)
7714 }
7715}
7716
7717/// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract
7718/// and add packed elements in c to/from the intermediate result, and store the results in dst using zeromask k
7719/// (the element is zeroed out when the corresponding mask bit is not set).
7720///
7721/// Rounding is done according to the rounding parameter, which can be one of:
7722///
7723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7728///
7729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ph)
7730#[inline]
7731#[target_feature(enable = "avx512fp16")]
7732#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
7733#[rustc_legacy_const_generics(4)]
7734#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7735pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
7736 k: __mmask32,
7737 a: __m512h,
7738 b: __m512h,
7739 c: __m512h,
7740) -> __m512h {
7741 unsafe {
7742 static_assert_rounding!(ROUNDING);
7743 simd_select_bitmask(
7744 m:k,
7745 yes:_mm512_fmsubadd_round_ph::<ROUNDING>(a, b, c),
7746 no:_mm512_setzero_ph(),
7747 )
7748 }
7749}
7750
7751/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7752/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7753///
7754/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ph)
7755#[inline]
7756#[target_feature(enable = "avx512fp16,avx512vl")]
7757#[cfg_attr(test, assert_instr(vrcpph))]
7758#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7759pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
7760 _mm_mask_rcp_ph(src:_mm_undefined_ph(), k:0xff, a)
7761}
7762
7763/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7764/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7765/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7766///
7767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_ph)
7768#[inline]
7769#[target_feature(enable = "avx512fp16,avx512vl")]
7770#[cfg_attr(test, assert_instr(vrcpph))]
7771#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7772pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7773 unsafe { vrcpph_128(a, src, k) }
7774}
7775
7776/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7777/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7778/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7779///
7780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_ph)
7781#[inline]
7782#[target_feature(enable = "avx512fp16,avx512vl")]
7783#[cfg_attr(test, assert_instr(vrcpph))]
7784#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7785pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
7786 _mm_mask_rcp_ph(src:_mm_setzero_ph(), k, a)
7787}
7788
7789/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7790/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7791///
7792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ph)
7793#[inline]
7794#[target_feature(enable = "avx512fp16,avx512vl")]
7795#[cfg_attr(test, assert_instr(vrcpph))]
7796#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7797pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
7798 _mm256_mask_rcp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7799}
7800
7801/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7802/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7803/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7804///
7805/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rcp_ph)
7806#[inline]
7807#[target_feature(enable = "avx512fp16,avx512vl")]
7808#[cfg_attr(test, assert_instr(vrcpph))]
7809#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7810pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7811 unsafe { vrcpph_256(a, src, k) }
7812}
7813
7814/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7815/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7816/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7817///
7818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rcp_ph)
7819#[inline]
7820#[target_feature(enable = "avx512fp16,avx512vl")]
7821#[cfg_attr(test, assert_instr(vrcpph))]
7822#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7823pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
7824 _mm256_mask_rcp_ph(src:_mm256_setzero_ph(), k, a)
7825}
7826
7827/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`.
7828/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7829///
7830/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp_ph)
7831#[inline]
7832#[target_feature(enable = "avx512fp16")]
7833#[cfg_attr(test, assert_instr(vrcpph))]
7834#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7835pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
7836 _mm512_mask_rcp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
7837}
7838
7839/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7840/// using writemask `k` (elements are copied from `src` when the corresponding mask bit is not set).
7841/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7842///
7843/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp_ph)
7844#[inline]
7845#[target_feature(enable = "avx512fp16")]
7846#[cfg_attr(test, assert_instr(vrcpph))]
7847#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7848pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
7849 unsafe { vrcpph_512(a, src, k) }
7850}
7851
7852/// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`
7853/// using zeromask `k` (elements are zeroed out when the corresponding mask bit is not set).
7854/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7855///
7856/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp_ph)
7857#[inline]
7858#[target_feature(enable = "avx512fp16")]
7859#[cfg_attr(test, assert_instr(vrcpph))]
7860#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7861pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
7862 _mm512_mask_rcp_ph(src:_mm512_setzero_ph(), k, a)
7863}
7864
7865/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7866/// store the result in the lower element of dst, and copy the upper 7 packed elements from a to the
7867/// upper elements of dst.
7868/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7869///
7870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_sh)
7871#[inline]
7872#[target_feature(enable = "avx512fp16")]
7873#[cfg_attr(test, assert_instr(vrcpsh))]
7874#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7875pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
7876 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
7877}
7878
7879/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7880/// store the result in the lower element of dst using writemask k (the element is copied from src when
7881/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7882/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7883///
7884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp_sh)
7885#[inline]
7886#[target_feature(enable = "avx512fp16")]
7887#[cfg_attr(test, assert_instr(vrcpsh))]
7888#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7889pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7890 unsafe { vrcpsh(a, b, src, k) }
7891}
7892
7893/// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b,
7894/// store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
7895/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
7896/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7897///
7898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp_sh)
7899#[inline]
7900#[target_feature(enable = "avx512fp16")]
7901#[cfg_attr(test, assert_instr(vrcpsh))]
7902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7903pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
7904 _mm_mask_rcp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
7905}
7906
7907/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7908/// elements in a, and store the results in dst.
7909/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7910///
7911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ph)
7912#[inline]
7913#[target_feature(enable = "avx512fp16,avx512vl")]
7914#[cfg_attr(test, assert_instr(vrsqrtph))]
7915#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7916pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
7917 _mm_mask_rsqrt_ph(src:_mm_undefined_ph(), k:0xff, a)
7918}
7919
7920/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7921/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7922/// the corresponding mask bit is not set).
7923/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7924///
7925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_ph)
7926#[inline]
7927#[target_feature(enable = "avx512fp16,avx512vl")]
7928#[cfg_attr(test, assert_instr(vrsqrtph))]
7929#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7930pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
7931 unsafe { vrsqrtph_128(a, src, k) }
7932}
7933
7934/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7935/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7936/// corresponding mask bit is not set).
7937/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7938///
7939/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_ph)
7940#[inline]
7941#[target_feature(enable = "avx512fp16,avx512vl")]
7942#[cfg_attr(test, assert_instr(vrsqrtph))]
7943#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7944pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
7945 _mm_mask_rsqrt_ph(src:_mm_setzero_ph(), k, a)
7946}
7947
7948/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7949/// elements in a, and store the results in dst.
7950/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7951///
7952/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ph)
7953#[inline]
7954#[target_feature(enable = "avx512fp16,avx512vl")]
7955#[cfg_attr(test, assert_instr(vrsqrtph))]
7956#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7957pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
7958 _mm256_mask_rsqrt_ph(src:_mm256_undefined_ph(), k:0xffff, a)
7959}
7960
7961/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7962/// elements in a, and store the results in dst using writemask k (elements are copied from src when
7963/// the corresponding mask bit is not set).
7964/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7965///
7966/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_rsqrt_ph)
7967#[inline]
7968#[target_feature(enable = "avx512fp16,avx512vl")]
7969#[cfg_attr(test, assert_instr(vrsqrtph))]
7970#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7971pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
7972 unsafe { vrsqrtph_256(a, src, k) }
7973}
7974
7975/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7976/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
7977/// corresponding mask bit is not set).
7978/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7979///
7980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_rsqrt_ph)
7981#[inline]
7982#[target_feature(enable = "avx512fp16,avx512vl")]
7983#[cfg_attr(test, assert_instr(vrsqrtph))]
7984#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7985pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
7986 _mm256_mask_rsqrt_ph(src:_mm256_setzero_ph(), k, a)
7987}
7988
7989/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
7990/// elements in a, and store the results in dst.
7991/// The maximum relative error for this approximation is less than `1.5*2^-12`.
7992///
7993/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt_ph)
7994#[inline]
7995#[target_feature(enable = "avx512fp16")]
7996#[cfg_attr(test, assert_instr(vrsqrtph))]
7997#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
7998pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
7999 _mm512_mask_rsqrt_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
8000}
8001
8002/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8003/// elements in a, and store the results in dst using writemask k (elements are copied from src when
8004/// the corresponding mask bit is not set).
8005/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8006///
8007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt_ph)
8008#[inline]
8009#[target_feature(enable = "avx512fp16")]
8010#[cfg_attr(test, assert_instr(vrsqrtph))]
8011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8012pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8013 unsafe { vrsqrtph_512(a, src, k) }
8014}
8015
8016/// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point
8017/// elements in a, and store the results in dst using zeromask k (elements are zeroed out when the
8018/// corresponding mask bit is not set).
8019/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8020///
8021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt_ph)
8022#[inline]
8023#[target_feature(enable = "avx512fp16")]
8024#[cfg_attr(test, assert_instr(vrsqrtph))]
8025#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8026pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8027 _mm512_mask_rsqrt_ph(src:_mm512_setzero_ph(), k, a)
8028}
8029
8030/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8031/// element in b, store the result in the lower element of dst, and copy the upper 7 packed elements from a
8032/// to the upper elements of dst.
8033/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8034///
8035/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_sh)
8036#[inline]
8037#[target_feature(enable = "avx512fp16")]
8038#[cfg_attr(test, assert_instr(vrsqrtsh))]
8039#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8040pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8041 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8042}
8043
8044/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8045/// element in b, store the result in the lower element of dst using writemask k (the element is copied from src
8046/// when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8047/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8048///
8049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt_sh)
8050#[inline]
8051#[target_feature(enable = "avx512fp16")]
8052#[cfg_attr(test, assert_instr(vrsqrtsh))]
8053#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8054pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8055 unsafe { vrsqrtsh(a, b, src, k) }
8056}
8057
8058/// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point
8059/// element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when
8060/// mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8061/// The maximum relative error for this approximation is less than `1.5*2^-12`.
8062///
8063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt_sh)
8064#[inline]
8065#[target_feature(enable = "avx512fp16")]
8066#[cfg_attr(test, assert_instr(vrsqrtsh))]
8067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8068pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8069 _mm_mask_rsqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8070}
8071
8072/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8073/// results in dst.
8074///
8075/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ph)
8076#[inline]
8077#[target_feature(enable = "avx512fp16,avx512vl")]
8078#[cfg_attr(test, assert_instr(vsqrtph))]
8079#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8080pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
8081 unsafe { simd_fsqrt(a) }
8082}
8083
8084/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8085/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8086///
8087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ph)
8088#[inline]
8089#[target_feature(enable = "avx512fp16,avx512vl")]
8090#[cfg_attr(test, assert_instr(vsqrtph))]
8091#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8092pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8093 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:src) }
8094}
8095
8096/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8097/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8098///
8099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ph)
8100#[inline]
8101#[target_feature(enable = "avx512fp16,avx512vl")]
8102#[cfg_attr(test, assert_instr(vsqrtph))]
8103#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8104pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
8105 unsafe { simd_select_bitmask(m:k, yes:_mm_sqrt_ph(a), no:_mm_setzero_ph()) }
8106}
8107
8108/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8109/// results in dst.
8110///
8111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ph)
8112#[inline]
8113#[target_feature(enable = "avx512fp16,avx512vl")]
8114#[cfg_attr(test, assert_instr(vsqrtph))]
8115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8116pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
8117 unsafe { simd_fsqrt(a) }
8118}
8119
8120/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8121/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8122///
8123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sqrt_ph)
8124#[inline]
8125#[target_feature(enable = "avx512fp16,avx512vl")]
8126#[cfg_attr(test, assert_instr(vsqrtph))]
8127#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8128pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8129 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:src) }
8130}
8131
8132/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8133/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8134///
8135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sqrt_ph)
8136#[inline]
8137#[target_feature(enable = "avx512fp16,avx512vl")]
8138#[cfg_attr(test, assert_instr(vsqrtph))]
8139#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8140pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
8141 unsafe { simd_select_bitmask(m:k, yes:_mm256_sqrt_ph(a), no:_mm256_setzero_ph()) }
8142}
8143
8144/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8145/// results in dst.
8146///
8147/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ph)
8148#[inline]
8149#[target_feature(enable = "avx512fp16")]
8150#[cfg_attr(test, assert_instr(vsqrtph))]
8151#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8152pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
8153 unsafe { simd_fsqrt(a) }
8154}
8155
8156/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8157/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8158///
8159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ph)
8160#[inline]
8161#[target_feature(enable = "avx512fp16")]
8162#[cfg_attr(test, assert_instr(vsqrtph))]
8163#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8164pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
8165 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:src) }
8166}
8167
8168/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8169/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8170///
8171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ph)
8172#[inline]
8173#[target_feature(enable = "avx512fp16")]
8174#[cfg_attr(test, assert_instr(vsqrtph))]
8175#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8176pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
8177 unsafe { simd_select_bitmask(m:k, yes:_mm512_sqrt_ph(a), no:_mm512_setzero_ph()) }
8178}
8179
8180/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8181/// results in dst.
8182/// Rounding is done according to the rounding parameter, which can be one of:
8183///
8184/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8185/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8186/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8187/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8189///
8190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ph)
8191#[inline]
8192#[target_feature(enable = "avx512fp16")]
8193#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8194#[rustc_legacy_const_generics(1)]
8195#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8196pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
8197 unsafe {
8198 static_assert_rounding!(ROUNDING);
8199 vsqrtph_512(a, ROUNDING)
8200 }
8201}
8202
8203/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8204/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8205/// Rounding is done according to the rounding parameter, which can be one of:
8206///
8207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8212///
8213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ph)
8214#[inline]
8215#[target_feature(enable = "avx512fp16")]
8216#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8219pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
8220 src: __m512h,
8221 k: __mmask32,
8222 a: __m512h,
8223) -> __m512h {
8224 unsafe {
8225 static_assert_rounding!(ROUNDING);
8226 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:src)
8227 }
8228}
8229
8230/// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the
8231/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8232/// Rounding is done according to the rounding parameter, which can be one of:
8233///
8234/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8235/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8236/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8237/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8238/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8239///
8240/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ph)
8241#[inline]
8242#[target_feature(enable = "avx512fp16")]
8243#[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
8244#[rustc_legacy_const_generics(2)]
8245#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8246pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
8247 unsafe {
8248 static_assert_rounding!(ROUNDING);
8249 simd_select_bitmask(m:k, yes:_mm512_sqrt_round_ph::<ROUNDING>(a), no:_mm512_setzero_ph())
8250 }
8251}
8252
8253/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8254/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8255/// elements of dst.
8256///
8257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sh)
8258#[inline]
8259#[target_feature(enable = "avx512fp16")]
8260#[cfg_attr(test, assert_instr(vsqrtsh))]
8261#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8262pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
8263 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8264}
8265
8266/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8267/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8268/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8269///
8270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sh)
8271#[inline]
8272#[target_feature(enable = "avx512fp16")]
8273#[cfg_attr(test, assert_instr(vsqrtsh))]
8274#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8275pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8276 _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8277}
8278
8279/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8280/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8281/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8282///
8283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sh)
8284#[inline]
8285#[target_feature(enable = "avx512fp16")]
8286#[cfg_attr(test, assert_instr(vsqrtsh))]
8287#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8288pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8289 _mm_mask_sqrt_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8290}
8291
8292/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8293/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
8294/// elements of dst.
8295/// Rounding is done according to the rounding parameter, which can be one of:
8296///
8297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8302///
8303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sh)
8304#[inline]
8305#[target_feature(enable = "avx512fp16")]
8306#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8307#[rustc_legacy_const_generics(2)]
8308#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8309pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
8310 static_assert_rounding!(ROUNDING);
8311 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
8312}
8313
8314/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8315/// the result in the lower element of dst using writemask k (the element is copied from src when mask
8316/// bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8317/// Rounding is done according to the rounding parameter, which can be one of:
8318///
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sh)
8326#[inline]
8327#[target_feature(enable = "avx512fp16")]
8328#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8329#[rustc_legacy_const_generics(4)]
8330#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8331pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
8332 src: __m128h,
8333 k: __mmask8,
8334 a: __m128h,
8335 b: __m128h,
8336) -> __m128h {
8337 unsafe {
8338 static_assert_rounding!(ROUNDING);
8339 vsqrtsh(a, b, src, k, ROUNDING)
8340 }
8341}
8342
8343/// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store
8344/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0
8345/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
8346/// Rounding is done according to the rounding parameter, which can be one of:
8347///
8348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353///
8354/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sh)
8355#[inline]
8356#[target_feature(enable = "avx512fp16")]
8357#[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
8358#[rustc_legacy_const_generics(3)]
8359#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8360pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
8361 k: __mmask8,
8362 a: __m128h,
8363 b: __m128h,
8364) -> __m128h {
8365 static_assert_rounding!(ROUNDING);
8366 _mm_mask_sqrt_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
8367}
8368
8369/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8370/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8371/// value when inputs are NaN or signed-zero values.
8372///
8373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ph)
8374#[inline]
8375#[target_feature(enable = "avx512fp16,avx512vl")]
8376#[cfg_attr(test, assert_instr(vmaxph))]
8377#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8378pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
8379 unsafe { vmaxph_128(a, b) }
8380}
8381
8382/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8383/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8384/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8385/// NaN or signed-zero values.
8386///
8387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ph)
8388#[inline]
8389#[target_feature(enable = "avx512fp16,avx512vl")]
8390#[cfg_attr(test, assert_instr(vmaxph))]
8391#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8392pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8393 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:src) }
8394}
8395
8396/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8397/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8398/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8399/// NaN or signed-zero values.
8400///
8401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ph)
8402#[inline]
8403#[target_feature(enable = "avx512fp16,avx512vl")]
8404#[cfg_attr(test, assert_instr(vmaxph))]
8405#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8406pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8407 unsafe { simd_select_bitmask(m:k, yes:_mm_max_ph(a, b), no:_mm_setzero_ph()) }
8408}
8409
8410/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8411/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8412/// value when inputs are NaN or signed-zero values.
8413///
8414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ph)
8415#[inline]
8416#[target_feature(enable = "avx512fp16,avx512vl")]
8417#[cfg_attr(test, assert_instr(vmaxph))]
8418#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8419pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
8420 unsafe { vmaxph_256(a, b) }
8421}
8422
8423/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8424/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8425/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8426/// NaN or signed-zero values.
8427///
8428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_ph)
8429#[inline]
8430#[target_feature(enable = "avx512fp16,avx512vl")]
8431#[cfg_attr(test, assert_instr(vmaxph))]
8432#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8433pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8434 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:src) }
8435}
8436
8437/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8438/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8439/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8440/// NaN or signed-zero values.
8441///
8442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_ph)
8443#[inline]
8444#[target_feature(enable = "avx512fp16,avx512vl")]
8445#[cfg_attr(test, assert_instr(vmaxph))]
8446#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8447pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8448 unsafe { simd_select_bitmask(m:k, yes:_mm256_max_ph(a, b), no:_mm256_setzero_ph()) }
8449}
8450
8451/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8452/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum
8453/// value when inputs are NaN or signed-zero values.
8454///
8455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ph)
8456#[inline]
8457#[target_feature(enable = "avx512fp16")]
8458#[cfg_attr(test, assert_instr(vmaxph))]
8459#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8460pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
8461 _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8462}
8463
8464/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8465/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8466/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8467/// NaN or signed-zero values.
8468///
8469/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ph)
8470#[inline]
8471#[target_feature(enable = "avx512fp16")]
8472#[cfg_attr(test, assert_instr(vmaxph))]
8473#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8474pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8475 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:src) }
8476}
8477
8478/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8479/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8480/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8481/// NaN or signed-zero values.
8482///
8483/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ph)
8484#[inline]
8485#[target_feature(enable = "avx512fp16")]
8486#[cfg_attr(test, assert_instr(vmaxph))]
8487#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8488pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8489 unsafe { simd_select_bitmask(m:k, yes:_mm512_max_ph(a, b), no:_mm512_setzero_ph()) }
8490}
8491
8492/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8493/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8494/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are
8495/// NaN or signed-zero values.
8496///
8497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ph)
8498#[inline]
8499#[target_feature(enable = "avx512fp16")]
8500#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8501#[rustc_legacy_const_generics(2)]
8502#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8503pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8504 unsafe {
8505 static_assert_sae!(SAE);
8506 vmaxph_512(a, b, SAE)
8507 }
8508}
8509
8510/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8511/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8512/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8513/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8514///
8515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ph)
8516#[inline]
8517#[target_feature(enable = "avx512fp16")]
8518#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8519#[rustc_legacy_const_generics(4)]
8520#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8521pub fn _mm512_mask_max_round_ph<const SAE: i32>(
8522 src: __m512h,
8523 k: __mmask32,
8524 a: __m512h,
8525 b: __m512h,
8526) -> __m512h {
8527 unsafe {
8528 static_assert_sae!(SAE);
8529 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:src)
8530 }
8531}
8532
8533/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum
8534/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8535/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8536/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8537///
8538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ph)
8539#[inline]
8540#[target_feature(enable = "avx512fp16")]
8541#[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
8542#[rustc_legacy_const_generics(3)]
8543#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8544pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8545 unsafe {
8546 static_assert_sae!(SAE);
8547 simd_select_bitmask(m:k, yes:_mm512_max_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8548 }
8549}
8550
8551/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8552/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8553/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value
8554/// when inputs are NaN or signed-zero values.
8555///
8556/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sh)
8557#[inline]
8558#[target_feature(enable = "avx512fp16,avx512vl")]
8559#[cfg_attr(test, assert_instr(vmaxsh))]
8560#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8561pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
8562 _mm_mask_max_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8563}
8564
8565/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum
8566/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8567/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8568/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8569///
8570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sh)
8571#[inline]
8572#[target_feature(enable = "avx512fp16,avx512vl")]
8573#[cfg_attr(test, assert_instr(vmaxsh))]
8574#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8575pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8576 _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8577}
8578
8579/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8580/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8581/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8582/// for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8583///
8584/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sh)
8585#[inline]
8586#[target_feature(enable = "avx512fp16,avx512vl")]
8587#[cfg_attr(test, assert_instr(vmaxsh))]
8588#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8589pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8590 _mm_mask_max_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8591}
8592
8593/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8594/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8595/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8596/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8597///
8598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sh)
8599#[inline]
8600#[target_feature(enable = "avx512fp16,avx512vl")]
8601#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8602#[rustc_legacy_const_generics(2)]
8603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8604pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8605 static_assert_sae!(SAE);
8606 _mm_mask_max_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8607}
8608
8609/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8610/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8611/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8612/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8613/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8614///
8615/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sh)
8616#[inline]
8617#[target_feature(enable = "avx512fp16,avx512vl")]
8618#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8619#[rustc_legacy_const_generics(4)]
8620#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8621pub fn _mm_mask_max_round_sh<const SAE: i32>(
8622 src: __m128h,
8623 k: __mmask8,
8624 a: __m128h,
8625 b: __m128h,
8626) -> __m128h {
8627 unsafe {
8628 static_assert_sae!(SAE);
8629 vmaxsh(a, b, src, k, SAE)
8630 }
8631}
8632
8633/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value
8634/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8635/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8636/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8637/// (IEEE 754) maximum value when inputs are NaN or signed-zero values.
8638///
8639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sh)
8640#[inline]
8641#[target_feature(enable = "avx512fp16,avx512vl")]
8642#[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
8643#[rustc_legacy_const_generics(3)]
8644#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8645pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8646 static_assert_sae!(SAE);
8647 _mm_mask_max_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8648}
8649
8650/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8651/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8652/// when inputs are NaN or signed-zero values.
8653///
8654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ph)
8655#[inline]
8656#[target_feature(enable = "avx512fp16,avx512vl")]
8657#[cfg_attr(test, assert_instr(vminph))]
8658#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8659pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
8660 unsafe { vminph_128(a, b) }
8661}
8662
8663/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8664/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8665/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8666/// NaN or signed-zero values.
8667///
8668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ph)
8669#[inline]
8670#[target_feature(enable = "avx512fp16,avx512vl")]
8671#[cfg_attr(test, assert_instr(vminph))]
8672#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8673pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8674 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:src) }
8675}
8676
8677/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8678/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8679/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8680/// NaN or signed-zero values.
8681///
8682/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ph)
8683#[inline]
8684#[target_feature(enable = "avx512fp16,avx512vl")]
8685#[cfg_attr(test, assert_instr(vminph))]
8686#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8687pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8688 unsafe { simd_select_bitmask(m:k, yes:_mm_min_ph(a, b), no:_mm_setzero_ph()) }
8689}
8690
8691/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8692/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8693/// when inputs are NaN or signed-zero values.
8694///
8695/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ph)
8696#[inline]
8697#[target_feature(enable = "avx512fp16,avx512vl")]
8698#[cfg_attr(test, assert_instr(vminph))]
8699#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8700pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
8701 unsafe { vminph_256(a, b) }
8702}
8703
8704/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8705/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8706/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8707/// NaN or signed-zero values.
8708///
8709/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_ph)
8710#[inline]
8711#[target_feature(enable = "avx512fp16,avx512vl")]
8712#[cfg_attr(test, assert_instr(vminph))]
8713#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8714pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8715 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:src) }
8716}
8717
8718/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8719/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8720/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8721/// NaN or signed-zero values.
8722///
8723/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_ph)
8724#[inline]
8725#[target_feature(enable = "avx512fp16,avx512vl")]
8726#[cfg_attr(test, assert_instr(vminph))]
8727#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8728pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
8729 unsafe { simd_select_bitmask(m:k, yes:_mm256_min_ph(a, b), no:_mm256_setzero_ph()) }
8730}
8731
8732/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8733/// values in dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value
8734/// when inputs are NaN or signed-zero values.
8735///
8736/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ph)
8737#[inline]
8738#[target_feature(enable = "avx512fp16")]
8739#[cfg_attr(test, assert_instr(vminph))]
8740#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8741pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
8742 _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
8743}
8744
8745/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8746/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8747/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8748/// NaN or signed-zero values.
8749///
8750/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ph)
8751#[inline]
8752#[target_feature(enable = "avx512fp16")]
8753#[cfg_attr(test, assert_instr(vminph))]
8754#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8755pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8756 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:src) }
8757}
8758
8759/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8760/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8761/// Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are
8762/// NaN or signed-zero values.
8763///
8764/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ph)
8765#[inline]
8766#[target_feature(enable = "avx512fp16")]
8767#[cfg_attr(test, assert_instr(vminph))]
8768#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8769pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8770 unsafe { simd_select_bitmask(m:k, yes:_mm512_min_ph(a, b), no:_mm512_setzero_ph()) }
8771}
8772
8773/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8774/// values in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not
8775/// follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8776///
8777/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ph)
8778#[inline]
8779#[target_feature(enable = "avx512fp16")]
8780#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8781#[rustc_legacy_const_generics(2)]
8782#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8783pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
8784 unsafe {
8785 static_assert_sae!(SAE);
8786 vminph_512(a, b, SAE)
8787 }
8788}
8789
8790/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8791/// values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8792/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8793/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8794///
8795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ph)
8796#[inline]
8797#[target_feature(enable = "avx512fp16")]
8798#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8799#[rustc_legacy_const_generics(4)]
8800#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8801pub fn _mm512_mask_min_round_ph<const SAE: i32>(
8802 src: __m512h,
8803 k: __mmask32,
8804 a: __m512h,
8805 b: __m512h,
8806) -> __m512h {
8807 unsafe {
8808 static_assert_sae!(SAE);
8809 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:src)
8810 }
8811}
8812
8813/// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum
8814/// values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8815/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8816/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8817///
8818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ph)
8819#[inline]
8820#[target_feature(enable = "avx512fp16")]
8821#[cfg_attr(test, assert_instr(vminph, SAE = 8))]
8822#[rustc_legacy_const_generics(3)]
8823#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8824pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
8825 unsafe {
8826 static_assert_sae!(SAE);
8827 simd_select_bitmask(m:k, yes:_mm512_min_round_ph::<SAE>(a, b), no:_mm512_setzero_ph())
8828 }
8829}
8830
8831/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8832/// value in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
8833/// of dst. Does not follow the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when
8834/// inputs are NaN or signed-zero values.
8835///
8836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sh)
8837#[inline]
8838#[target_feature(enable = "avx512fp16,avx512vl")]
8839#[cfg_attr(test, assert_instr(vminsh))]
8840#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8841pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
8842 _mm_mask_min_sh(src:_mm_undefined_ph(), k:0xff, a, b)
8843}
8844
8845/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum
8846/// value in the lower element of dst using writemask k (the element is copied from src when mask bit 0
8847/// is not set), and copy the upper 7 packed elements from a to the upper elements of dst. Does not follow
8848/// the IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8849///
8850/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sh)
8851#[inline]
8852#[target_feature(enable = "avx512fp16,avx512vl")]
8853#[cfg_attr(test, assert_instr(vminsh))]
8854#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8855pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8856 _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
8857}
8858
8859/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8860/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8861/// copy the upper 7 packed elements from a to the upper elements of dst. Does not follow the IEEE Standard
8862/// for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8863///
8864/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sh)
8865#[inline]
8866#[target_feature(enable = "avx512fp16,avx512vl")]
8867#[cfg_attr(test, assert_instr(vminsh))]
8868#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8869pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8870 _mm_mask_min_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
8871}
8872
8873/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8874/// in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements of dst.
8875/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the
8876/// IEEE Standard for Floating-Point Arithmetic (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8877///
8878/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sh)
8879#[inline]
8880#[target_feature(enable = "avx512fp16,avx512vl")]
8881#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8882#[rustc_legacy_const_generics(2)]
8883#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8884pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
8885 static_assert_sae!(SAE);
8886 _mm_mask_min_round_sh::<SAE>(src:_mm_undefined_ph(), k:0xff, a, b)
8887}
8888
8889/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8890/// in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
8891/// and copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8892/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8893/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8894///
8895/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sh)
8896#[inline]
8897#[target_feature(enable = "avx512fp16,avx512vl")]
8898#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8899#[rustc_legacy_const_generics(4)]
8900#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8901pub fn _mm_mask_min_round_sh<const SAE: i32>(
8902 src: __m128h,
8903 k: __mmask8,
8904 a: __m128h,
8905 b: __m128h,
8906) -> __m128h {
8907 unsafe {
8908 static_assert_sae!(SAE);
8909 vminsh(a, b, src, k, SAE)
8910 }
8911}
8912
8913/// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value
8914/// in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and
8915/// copy the upper 7 packed elements from a to the upper elements of dst. Exceptions can be suppressed by
8916/// passing _MM_FROUND_NO_EXC in the sae parameter. Does not follow the IEEE Standard for Floating-Point Arithmetic
8917/// (IEEE 754) minimum value when inputs are NaN or signed-zero values.
8918///
8919/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sh)
8920#[inline]
8921#[target_feature(enable = "avx512fp16,avx512vl")]
8922#[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
8923#[rustc_legacy_const_generics(3)]
8924#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8925pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
8926 static_assert_sae!(SAE);
8927 _mm_mask_min_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
8928}
8929
8930/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8931/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8932/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8933///
8934/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ph)
8935#[inline]
8936#[target_feature(enable = "avx512fp16,avx512vl")]
8937#[cfg_attr(test, assert_instr(vgetexpph))]
8938#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8939pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
8940 _mm_mask_getexp_ph(src:_mm_undefined_ph(), k:0xff, a)
8941}
8942
8943/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8944/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8945/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8946/// `floor(log2(x))` for each element.
8947///
8948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ph)
8949#[inline]
8950#[target_feature(enable = "avx512fp16,avx512vl")]
8951#[cfg_attr(test, assert_instr(vgetexpph))]
8952#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8953pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
8954 unsafe { vgetexpph_128(a, src, k) }
8955}
8956
8957/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8958/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
8959/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
8960/// `floor(log2(x))` for each element.
8961///
8962/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ph)
8963#[inline]
8964#[target_feature(enable = "avx512fp16,avx512vl")]
8965#[cfg_attr(test, assert_instr(vgetexpph))]
8966#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8967pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
8968 _mm_mask_getexp_ph(src:_mm_setzero_ph(), k, a)
8969}
8970
8971/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8972/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
8973/// This intrinsic essentially calculates `floor(log2(x))` for each element.
8974///
8975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getexp_ph)
8976#[inline]
8977#[target_feature(enable = "avx512fp16,avx512vl")]
8978#[cfg_attr(test, assert_instr(vgetexpph))]
8979#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8980pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
8981 _mm256_mask_getexp_ph(src:_mm256_undefined_ph(), k:0xffff, a)
8982}
8983
8984/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8985/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
8986/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
8987/// `floor(log2(x))` for each element.
8988///
8989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getexp_ph)
8990#[inline]
8991#[target_feature(enable = "avx512fp16,avx512vl")]
8992#[cfg_attr(test, assert_instr(vgetexpph))]
8993#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
8994pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
8995 unsafe { vgetexpph_256(a, src, k) }
8996}
8997
8998/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
8999/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9000/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9001/// `floor(log2(x))` for each element.
9002///
9003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getexp_ph)
9004#[inline]
9005#[target_feature(enable = "avx512fp16,avx512vl")]
9006#[cfg_attr(test, assert_instr(vgetexpph))]
9007#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9008pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
9009 _mm256_mask_getexp_ph(src:_mm256_setzero_ph(), k, a)
9010}
9011
9012/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9013/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9014/// This intrinsic essentially calculates `floor(log2(x))` for each element.
9015///
9016/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ph)
9017#[inline]
9018#[target_feature(enable = "avx512fp16")]
9019#[cfg_attr(test, assert_instr(vgetexpph))]
9020#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9021pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
9022 _mm512_mask_getexp_ph(src:_mm512_undefined_ph(), k:0xffffffff, a)
9023}
9024
9025/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9026/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9027/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9028/// `floor(log2(x))` for each element.
9029///
9030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ph)
9031#[inline]
9032#[target_feature(enable = "avx512fp16")]
9033#[cfg_attr(test, assert_instr(vgetexpph))]
9034#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9035pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
9036 _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
9037}
9038
9039/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9040/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9041/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9042/// `floor(log2(x))` for each element.
9043///
9044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ph)
9045#[inline]
9046#[target_feature(enable = "avx512fp16")]
9047#[cfg_attr(test, assert_instr(vgetexpph))]
9048#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9049pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
9050 _mm512_mask_getexp_ph(src:_mm512_setzero_ph(), k, a)
9051}
9052
9053/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9054/// (16-bit) floating-point number representing the integer exponent, and store the results in dst.
9055/// This intrinsic essentially calculates `floor(log2(x))` for each element. Exceptions can be suppressed
9056/// by passing _MM_FROUND_NO_EXC in the sae parameter
9057///
9058/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ph)
9059#[inline]
9060#[target_feature(enable = "avx512fp16")]
9061#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9062#[rustc_legacy_const_generics(1)]
9063#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9064pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
9065 static_assert_sae!(SAE);
9066 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9067}
9068
9069/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9070/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k
9071/// (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates
9072/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9073///
9074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ph)
9075#[inline]
9076#[target_feature(enable = "avx512fp16")]
9077#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9078#[rustc_legacy_const_generics(3)]
9079#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9080pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
9081 src: __m512h,
9082 k: __mmask32,
9083 a: __m512h,
9084) -> __m512h {
9085 unsafe {
9086 static_assert_sae!(SAE);
9087 vgetexpph_512(a, src, k, SAE)
9088 }
9089}
9090
9091/// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision
9092/// (16-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask
9093/// k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates
9094/// `floor(log2(x))` for each element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9095///
9096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ph)
9097#[inline]
9098#[target_feature(enable = "avx512fp16")]
9099#[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
9100#[rustc_legacy_const_generics(2)]
9101#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9102pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
9103 static_assert_sae!(SAE);
9104 _mm512_mask_getexp_round_ph::<SAE>(src:_mm512_setzero_ph(), k, a)
9105}
9106
9107/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9108/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9109/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9110/// calculates `floor(log2(x))` for the lower element.
9111///
9112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sh)
9113#[inline]
9114#[target_feature(enable = "avx512fp16")]
9115#[cfg_attr(test, assert_instr(vgetexpsh))]
9116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9117pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
9118 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9119}
9120
9121/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9122/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9123/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9124/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9125/// for the lower element.
9126///
9127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sh)
9128#[inline]
9129#[target_feature(enable = "avx512fp16")]
9130#[cfg_attr(test, assert_instr(vgetexpsh))]
9131#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9132pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9133 _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9134}
9135
9136/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9137/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9138/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9139/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9140/// lower element.
9141///
9142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sh)
9143#[inline]
9144#[target_feature(enable = "avx512fp16")]
9145#[cfg_attr(test, assert_instr(vgetexpsh))]
9146#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9147pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9148 _mm_mask_getexp_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
9149}
9150
9151/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9152/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9153/// of dst, and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially
9154/// calculates `floor(log2(x))` for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9155/// in the sae parameter
9156///
9157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sh)
9158#[inline]
9159#[target_feature(enable = "avx512fp16")]
9160#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9161#[rustc_legacy_const_generics(2)]
9162#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9163pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
9164 static_assert_sae!(SAE);
9165 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9166}
9167
9168/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9169/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9170/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 7
9171/// packed elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))`
9172/// for the lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9173///
9174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sh)
9175#[inline]
9176#[target_feature(enable = "avx512fp16")]
9177#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9178#[rustc_legacy_const_generics(4)]
9179#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9180pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
9181 src: __m128h,
9182 k: __mmask8,
9183 a: __m128h,
9184 b: __m128h,
9185) -> __m128h {
9186 unsafe {
9187 static_assert_sae!(SAE);
9188 vgetexpsh(a, b, src, k, SAE)
9189 }
9190}
9191
9192/// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision
9193/// (16-bit) floating-point number representing the integer exponent, store the result in the lower element
9194/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed
9195/// elements from a to the upper elements of dst. This intrinsic essentially calculates `floor(log2(x))` for the
9196/// lower element. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9197///
9198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sh)
9199#[inline]
9200#[target_feature(enable = "avx512fp16")]
9201#[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
9202#[rustc_legacy_const_generics(3)]
9203#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9204pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
9205 static_assert_sae!(SAE);
9206 _mm_mask_getexp_round_sh::<SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9207}
9208
9209/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9210/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9211/// on the interval range defined by norm and the sign depends on sign and the source sign.
9212///
9213/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9214///
9215/// _MM_MANT_NORM_1_2 // interval [1, 2)
9216/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9217/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9218/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9219///
9220/// The sign is determined by sc which can take the following values:
9221///
9222/// _MM_MANT_SIGN_src // sign = sign(src)
9223/// _MM_MANT_SIGN_zero // sign = 0
9224/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9225///
9226/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ph)
9227#[inline]
9228#[target_feature(enable = "avx512fp16,avx512vl")]
9229#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9230#[rustc_legacy_const_generics(1, 2)]
9231#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9232pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9233 a: __m128h,
9234) -> __m128h {
9235 static_assert_uimm_bits!(NORM, 4);
9236 static_assert_uimm_bits!(SIGN, 2);
9237 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_undefined_ph(), k:0xff, a)
9238}
9239
9240/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9241/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9242/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9243/// by norm and the sign depends on sign and the source sign.
9244///
9245/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9246///
9247/// _MM_MANT_NORM_1_2 // interval [1, 2)
9248/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9249/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9250/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9251///
9252/// The sign is determined by sc which can take the following values:
9253///
9254/// _MM_MANT_SIGN_src // sign = sign(src)
9255/// _MM_MANT_SIGN_zero // sign = 0
9256/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9257///
9258/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ph)
9259#[inline]
9260#[target_feature(enable = "avx512fp16,avx512vl")]
9261#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9262#[rustc_legacy_const_generics(3, 4)]
9263#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9264pub fn _mm_mask_getmant_ph<
9265 const NORM: _MM_MANTISSA_NORM_ENUM,
9266 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9267>(
9268 src: __m128h,
9269 k: __mmask8,
9270 a: __m128h,
9271) -> __m128h {
9272 unsafe {
9273 static_assert_uimm_bits!(NORM, 4);
9274 static_assert_uimm_bits!(SIGN, 2);
9275 vgetmantph_128(a, (SIGN << 2) | NORM, src, k)
9276 }
9277}
9278
9279/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9280/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9281/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9282/// by norm and the sign depends on sign and the source sign.
9283///
9284/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9285///
9286/// _MM_MANT_NORM_1_2 // interval [1, 2)
9287/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9288/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9289/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9290///
9291/// The sign is determined by sc which can take the following values:
9292///
9293/// _MM_MANT_SIGN_src // sign = sign(src)
9294/// _MM_MANT_SIGN_zero // sign = 0
9295/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9296///
9297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ph)
9298#[inline]
9299#[target_feature(enable = "avx512fp16,avx512vl")]
9300#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9301#[rustc_legacy_const_generics(2, 3)]
9302#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9303pub fn _mm_maskz_getmant_ph<
9304 const NORM: _MM_MANTISSA_NORM_ENUM,
9305 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9306>(
9307 k: __mmask8,
9308 a: __m128h,
9309) -> __m128h {
9310 static_assert_uimm_bits!(NORM, 4);
9311 static_assert_uimm_bits!(SIGN, 2);
9312 _mm_mask_getmant_ph::<NORM, SIGN>(src:_mm_setzero_ph(), k, a)
9313}
9314
9315/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9316/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9317/// on the interval range defined by norm and the sign depends on sign and the source sign.
9318///
9319/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9320///
9321/// _MM_MANT_NORM_1_2 // interval [1, 2)
9322/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9323/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9324/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9325///
9326/// The sign is determined by sc which can take the following values:
9327///
9328/// _MM_MANT_SIGN_src // sign = sign(src)
9329/// _MM_MANT_SIGN_zero // sign = 0
9330/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9331///
9332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_getmant_ph)
9333#[inline]
9334#[target_feature(enable = "avx512fp16,avx512vl")]
9335#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9336#[rustc_legacy_const_generics(1, 2)]
9337#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9338pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9339 a: __m256h,
9340) -> __m256h {
9341 static_assert_uimm_bits!(NORM, 4);
9342 static_assert_uimm_bits!(SIGN, 2);
9343 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_undefined_ph(), k:0xffff, a)
9344}
9345
9346/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9347/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9348/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9349/// by norm and the sign depends on sign and the source sign.
9350///
9351/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9352///
9353/// _MM_MANT_NORM_1_2 // interval [1, 2)
9354/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9355/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9356/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9357///
9358/// The sign is determined by sc which can take the following values:
9359///
9360/// _MM_MANT_SIGN_src // sign = sign(src)
9361/// _MM_MANT_SIGN_zero // sign = 0
9362/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9363///
9364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_getmant_ph)
9365#[inline]
9366#[target_feature(enable = "avx512fp16,avx512vl")]
9367#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9368#[rustc_legacy_const_generics(3, 4)]
9369#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9370pub fn _mm256_mask_getmant_ph<
9371 const NORM: _MM_MANTISSA_NORM_ENUM,
9372 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9373>(
9374 src: __m256h,
9375 k: __mmask16,
9376 a: __m256h,
9377) -> __m256h {
9378 unsafe {
9379 static_assert_uimm_bits!(NORM, 4);
9380 static_assert_uimm_bits!(SIGN, 2);
9381 vgetmantph_256(a, (SIGN << 2) | NORM, src, k)
9382 }
9383}
9384
9385/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9386/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9387/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9388/// by norm and the sign depends on sign and the source sign.
9389///
9390/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9391///
9392/// _MM_MANT_NORM_1_2 // interval [1, 2)
9393/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9394/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9395/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9396///
9397/// The sign is determined by sc which can take the following values:
9398///
9399/// _MM_MANT_SIGN_src // sign = sign(src)
9400/// _MM_MANT_SIGN_zero // sign = 0
9401/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9402///
9403/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_getmant_ph)
9404#[inline]
9405#[target_feature(enable = "avx512fp16,avx512vl")]
9406#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9407#[rustc_legacy_const_generics(2, 3)]
9408#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9409pub fn _mm256_maskz_getmant_ph<
9410 const NORM: _MM_MANTISSA_NORM_ENUM,
9411 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9412>(
9413 k: __mmask16,
9414 a: __m256h,
9415) -> __m256h {
9416 static_assert_uimm_bits!(NORM, 4);
9417 static_assert_uimm_bits!(SIGN, 2);
9418 _mm256_mask_getmant_ph::<NORM, SIGN>(src:_mm256_setzero_ph(), k, a)
9419}
9420
9421/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9422/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9423/// on the interval range defined by norm and the sign depends on sign and the source sign.
9424///
9425/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9426///
9427/// _MM_MANT_NORM_1_2 // interval [1, 2)
9428/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9429/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9430/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9431///
9432/// The sign is determined by sc which can take the following values:
9433///
9434/// _MM_MANT_SIGN_src // sign = sign(src)
9435/// _MM_MANT_SIGN_zero // sign = 0
9436/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9437///
9438/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ph)
9439#[inline]
9440#[target_feature(enable = "avx512fp16")]
9441#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9442#[rustc_legacy_const_generics(1, 2)]
9443#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9444pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9445 a: __m512h,
9446) -> __m512h {
9447 static_assert_uimm_bits!(NORM, 4);
9448 static_assert_uimm_bits!(SIGN, 2);
9449 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9450}
9451
9452/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9453/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9454/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9455/// by norm and the sign depends on sign and the source sign.
9456///
9457/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9458///
9459/// _MM_MANT_NORM_1_2 // interval [1, 2)
9460/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9461/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9462/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9463///
9464/// The sign is determined by sc which can take the following values:
9465///
9466/// _MM_MANT_SIGN_src // sign = sign(src)
9467/// _MM_MANT_SIGN_zero // sign = 0
9468/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9469///
9470/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ph)
9471#[inline]
9472#[target_feature(enable = "avx512fp16")]
9473#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9474#[rustc_legacy_const_generics(3, 4)]
9475#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9476pub fn _mm512_mask_getmant_ph<
9477 const NORM: _MM_MANTISSA_NORM_ENUM,
9478 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9479>(
9480 src: __m512h,
9481 k: __mmask32,
9482 a: __m512h,
9483) -> __m512h {
9484 static_assert_uimm_bits!(NORM, 4);
9485 static_assert_uimm_bits!(SIGN, 2);
9486 _mm512_mask_getmant_round_ph::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a)
9487}
9488
9489/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9490/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9491/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9492/// by norm and the sign depends on sign and the source sign.
9493///
9494/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9495///
9496/// _MM_MANT_NORM_1_2 // interval [1, 2)
9497/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9498/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9499/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9500///
9501/// The sign is determined by sc which can take the following values:
9502///
9503/// _MM_MANT_SIGN_src // sign = sign(src)
9504/// _MM_MANT_SIGN_zero // sign = 0
9505/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9506///
9507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ph)
9508#[inline]
9509#[target_feature(enable = "avx512fp16")]
9510#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
9511#[rustc_legacy_const_generics(2, 3)]
9512#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9513pub fn _mm512_maskz_getmant_ph<
9514 const NORM: _MM_MANTISSA_NORM_ENUM,
9515 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9516>(
9517 k: __mmask32,
9518 a: __m512h,
9519) -> __m512h {
9520 static_assert_uimm_bits!(NORM, 4);
9521 static_assert_uimm_bits!(SIGN, 2);
9522 _mm512_mask_getmant_ph::<NORM, SIGN>(src:_mm512_setzero_ph(), k, a)
9523}
9524
9525/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9526/// the results in dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9527/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9528/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9529///
9530/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9531///
9532/// _MM_MANT_NORM_1_2 // interval [1, 2)
9533/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9534/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9535/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9536///
9537/// The sign is determined by sc which can take the following values:
9538///
9539/// _MM_MANT_SIGN_src // sign = sign(src)
9540/// _MM_MANT_SIGN_zero // sign = 0
9541/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9542///
9543/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9544///
9545/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ph)
9546#[inline]
9547#[target_feature(enable = "avx512fp16")]
9548#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9549#[rustc_legacy_const_generics(1, 2, 3)]
9550#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9551pub fn _mm512_getmant_round_ph<
9552 const NORM: _MM_MANTISSA_NORM_ENUM,
9553 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9554 const SAE: i32,
9555>(
9556 a: __m512h,
9557) -> __m512h {
9558 static_assert_uimm_bits!(NORM, 4);
9559 static_assert_uimm_bits!(SIGN, 2);
9560 static_assert_sae!(SAE);
9561 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
9562}
9563
9564/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9565/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9566/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9567/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9568/// in the sae parameter
9569///
9570/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9571///
9572/// _MM_MANT_NORM_1_2 // interval [1, 2)
9573/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9574/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9575/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9576///
9577/// The sign is determined by sc which can take the following values:
9578///
9579/// _MM_MANT_SIGN_src // sign = sign(src)
9580/// _MM_MANT_SIGN_zero // sign = 0
9581/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9582///
9583/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9584///
9585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ph)
9586#[inline]
9587#[target_feature(enable = "avx512fp16")]
9588#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9589#[rustc_legacy_const_generics(3, 4, 5)]
9590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9591pub fn _mm512_mask_getmant_round_ph<
9592 const NORM: _MM_MANTISSA_NORM_ENUM,
9593 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9594 const SAE: i32,
9595>(
9596 src: __m512h,
9597 k: __mmask32,
9598 a: __m512h,
9599) -> __m512h {
9600 unsafe {
9601 static_assert_uimm_bits!(NORM, 4);
9602 static_assert_uimm_bits!(SIGN, 2);
9603 static_assert_sae!(SAE);
9604 vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE)
9605 }
9606}
9607
9608/// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store
9609/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9610/// This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends on the interval range defined
9611/// by norm and the sign depends on sign and the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
9612/// in the sae parameter
9613///
9614/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9615///
9616/// _MM_MANT_NORM_1_2 // interval [1, 2)
9617/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9618/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9619/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9620///
9621/// The sign is determined by sc which can take the following values:
9622///
9623/// _MM_MANT_SIGN_src // sign = sign(src)
9624/// _MM_MANT_SIGN_zero // sign = 0
9625/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9626///
9627/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9628///
9629/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ph)
9630#[inline]
9631#[target_feature(enable = "avx512fp16")]
9632#[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
9633#[rustc_legacy_const_generics(2, 3, 4)]
9634#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9635pub fn _mm512_maskz_getmant_round_ph<
9636 const NORM: _MM_MANTISSA_NORM_ENUM,
9637 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9638 const SAE: i32,
9639>(
9640 k: __mmask32,
9641 a: __m512h,
9642) -> __m512h {
9643 static_assert_uimm_bits!(NORM, 4);
9644 static_assert_uimm_bits!(SIGN, 2);
9645 static_assert_sae!(SAE);
9646 _mm512_mask_getmant_round_ph::<NORM, SIGN, SAE>(src:_mm512_setzero_ph(), k, a)
9647}
9648
9649/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9650/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9651/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9652/// on the interval range defined by norm and the sign depends on sign and the source sign.
9653///
9654/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9655///
9656/// _MM_MANT_NORM_1_2 // interval [1, 2)
9657/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9658/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9659/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9660///
9661/// The sign is determined by sc which can take the following values:
9662///
9663/// _MM_MANT_SIGN_src // sign = sign(src)
9664/// _MM_MANT_SIGN_zero // sign = 0
9665/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9666///
9667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sh)
9668#[inline]
9669#[target_feature(enable = "avx512fp16")]
9670#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9671#[rustc_legacy_const_generics(2, 3)]
9672#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9673pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
9674 a: __m128h,
9675 b: __m128h,
9676) -> __m128h {
9677 static_assert_uimm_bits!(NORM, 4);
9678 static_assert_uimm_bits!(SIGN, 2);
9679 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9680}
9681
9682/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9683/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9684/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9685/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9686/// the source sign.
9687///
9688/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9689///
9690/// _MM_MANT_NORM_1_2 // interval [1, 2)
9691/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9692/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9693/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9694///
9695/// The sign is determined by sc which can take the following values:
9696///
9697/// _MM_MANT_SIGN_src // sign = sign(src)
9698/// _MM_MANT_SIGN_zero // sign = 0
9699/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9700///
9701/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sh)
9702#[inline]
9703#[target_feature(enable = "avx512fp16")]
9704#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9705#[rustc_legacy_const_generics(4, 5)]
9706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9707pub fn _mm_mask_getmant_sh<
9708 const NORM: _MM_MANTISSA_NORM_ENUM,
9709 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9710>(
9711 src: __m128h,
9712 k: __mmask8,
9713 a: __m128h,
9714 b: __m128h,
9715) -> __m128h {
9716 static_assert_uimm_bits!(NORM, 4);
9717 static_assert_uimm_bits!(SIGN, 2);
9718 _mm_mask_getmant_round_sh::<NORM, SIGN, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
9719}
9720
9721/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9722/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9723/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9724/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9725/// the source sign.
9726///
9727/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9728///
9729/// _MM_MANT_NORM_1_2 // interval [1, 2)
9730/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9731/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9732/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9733///
9734/// The sign is determined by sc which can take the following values:
9735///
9736/// _MM_MANT_SIGN_src // sign = sign(src)
9737/// _MM_MANT_SIGN_zero // sign = 0
9738/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9739///
9740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sh)
9741#[inline]
9742#[target_feature(enable = "avx512fp16")]
9743#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
9744#[rustc_legacy_const_generics(3, 4)]
9745#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9746pub fn _mm_maskz_getmant_sh<
9747 const NORM: _MM_MANTISSA_NORM_ENUM,
9748 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9749>(
9750 k: __mmask8,
9751 a: __m128h,
9752 b: __m128h,
9753) -> __m128h {
9754 static_assert_uimm_bits!(NORM, 4);
9755 static_assert_uimm_bits!(SIGN, 2);
9756 _mm_mask_getmant_sh::<NORM, SIGN>(src:f16x8::ZERO.as_m128h(), k, a, b)
9757}
9758
9759/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9760/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
9761/// elements of dst. This intrinsic essentially calculates `±(2^k)*|x.significand|`, where k depends
9762/// on the interval range defined by norm and the sign depends on sign and the source sign. Exceptions can
9763/// be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9764///
9765/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9766///
9767/// _MM_MANT_NORM_1_2 // interval [1, 2)
9768/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9769/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9770/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9771///
9772/// The sign is determined by sc which can take the following values:
9773///
9774/// _MM_MANT_SIGN_src // sign = sign(src)
9775/// _MM_MANT_SIGN_zero // sign = 0
9776/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9777///
9778/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9779///
9780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sh)
9781#[inline]
9782#[target_feature(enable = "avx512fp16")]
9783#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9784#[rustc_legacy_const_generics(2, 3, 4)]
9785#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9786pub fn _mm_getmant_round_sh<
9787 const NORM: _MM_MANTISSA_NORM_ENUM,
9788 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9789 const SAE: i32,
9790>(
9791 a: __m128h,
9792 b: __m128h,
9793) -> __m128h {
9794 static_assert_uimm_bits!(NORM, 4);
9795 static_assert_uimm_bits!(SIGN, 2);
9796 static_assert_sae!(SAE);
9797 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
9798}
9799
9800/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9801/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
9802/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9803/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9804/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9805///
9806/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9807///
9808/// _MM_MANT_NORM_1_2 // interval [1, 2)
9809/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9810/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9811/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9812///
9813/// The sign is determined by sc which can take the following values:
9814///
9815/// _MM_MANT_SIGN_src // sign = sign(src)
9816/// _MM_MANT_SIGN_zero // sign = 0
9817/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9818///
9819/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9820///
9821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sh)
9822#[inline]
9823#[target_feature(enable = "avx512fp16")]
9824#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9825#[rustc_legacy_const_generics(4, 5, 6)]
9826#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9827pub fn _mm_mask_getmant_round_sh<
9828 const NORM: _MM_MANTISSA_NORM_ENUM,
9829 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9830 const SAE: i32,
9831>(
9832 src: __m128h,
9833 k: __mmask8,
9834 a: __m128h,
9835 b: __m128h,
9836) -> __m128h {
9837 unsafe {
9838 static_assert_uimm_bits!(NORM, 4);
9839 static_assert_uimm_bits!(SIGN, 2);
9840 static_assert_sae!(SAE);
9841 vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE)
9842 }
9843}
9844
9845/// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store
9846/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
9847/// and copy the upper 7 packed elements from a to the upper elements of dst. This intrinsic essentially calculates
9848/// `±(2^k)*|x.significand|`, where k depends on the interval range defined by norm and the sign depends on sign and
9849/// the source sign. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9850///
9851/// The mantissa is normalized to the interval specified by interv, which can take the following values:
9852///
9853/// _MM_MANT_NORM_1_2 // interval [1, 2)
9854/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
9855/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
9856/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
9857///
9858/// The sign is determined by sc which can take the following values:
9859///
9860/// _MM_MANT_SIGN_src // sign = sign(src)
9861/// _MM_MANT_SIGN_zero // sign = 0
9862/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
9863///
9864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
9865///
9866/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sh)
9867#[inline]
9868#[target_feature(enable = "avx512fp16")]
9869#[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
9870#[rustc_legacy_const_generics(3, 4, 5)]
9871#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9872pub fn _mm_maskz_getmant_round_sh<
9873 const NORM: _MM_MANTISSA_NORM_ENUM,
9874 const SIGN: _MM_MANTISSA_SIGN_ENUM,
9875 const SAE: i32,
9876>(
9877 k: __mmask8,
9878 a: __m128h,
9879 b: __m128h,
9880) -> __m128h {
9881 static_assert_uimm_bits!(NORM, 4);
9882 static_assert_uimm_bits!(SIGN, 2);
9883 static_assert_sae!(SAE);
9884 _mm_mask_getmant_round_sh::<NORM, SIGN, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
9885}
9886
9887/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9888/// specified by imm8, and store the results in dst.
9889///
9890/// Rounding is done according to the imm8 parameter, which can be one of:
9891///
9892/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9893/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9894/// * [`_MM_FROUND_TO_POS_INF`] : round up
9895/// * [`_MM_FROUND_TO_ZERO`] : truncate
9896/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9897///
9898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ph)
9899#[inline]
9900#[target_feature(enable = "avx512fp16,avx512vl")]
9901#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9902#[rustc_legacy_const_generics(1)]
9903#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9904pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
9905 static_assert_uimm_bits!(IMM8, 8);
9906 _mm_mask_roundscale_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
9907}
9908
9909/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9910/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9911/// the corresponding mask bit is not set).
9912///
9913/// Rounding is done according to the imm8 parameter, which can be one of:
9914///
9915/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9916/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9917/// * [`_MM_FROUND_TO_POS_INF`] : round up
9918/// * [`_MM_FROUND_TO_ZERO`] : truncate
9919/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9920///
9921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ph)
9922#[inline]
9923#[target_feature(enable = "avx512fp16,avx512vl")]
9924#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9925#[rustc_legacy_const_generics(3)]
9926#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9927pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
9928 unsafe {
9929 static_assert_uimm_bits!(IMM8, 8);
9930 vrndscaleph_128(a, IMM8, src, k)
9931 }
9932}
9933
9934/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9935/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
9936/// mask bit is not set).
9937///
9938/// Rounding is done according to the imm8 parameter, which can be one of:
9939///
9940/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9941/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9942/// * [`_MM_FROUND_TO_POS_INF`] : round up
9943/// * [`_MM_FROUND_TO_ZERO`] : truncate
9944/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9945///
9946/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ph)
9947#[inline]
9948#[target_feature(enable = "avx512fp16,avx512vl")]
9949#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9950#[rustc_legacy_const_generics(2)]
9951#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9952pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
9953 static_assert_uimm_bits!(IMM8, 8);
9954 _mm_mask_roundscale_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
9955}
9956
9957/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9958/// specified by imm8, and store the results in dst.
9959///
9960/// Rounding is done according to the imm8 parameter, which can be one of:
9961///
9962/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9963/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9964/// * [`_MM_FROUND_TO_POS_INF`] : round up
9965/// * [`_MM_FROUND_TO_ZERO`] : truncate
9966/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9967///
9968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ph)
9969#[inline]
9970#[target_feature(enable = "avx512fp16,avx512vl")]
9971#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9972#[rustc_legacy_const_generics(1)]
9973#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9974pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
9975 static_assert_uimm_bits!(IMM8, 8);
9976 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
9977}
9978
9979/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
9980/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
9981/// the corresponding mask bit is not set).
9982///
9983/// Rounding is done according to the imm8 parameter, which can be one of:
9984///
9985/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9986/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9987/// * [`_MM_FROUND_TO_POS_INF`] : round up
9988/// * [`_MM_FROUND_TO_ZERO`] : truncate
9989/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9990///
9991/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ph)
9992#[inline]
9993#[target_feature(enable = "avx512fp16,avx512vl")]
9994#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
9995#[rustc_legacy_const_generics(3)]
9996#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
9997pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
9998 src: __m256h,
9999 k: __mmask16,
10000 a: __m256h,
10001) -> __m256h {
10002 unsafe {
10003 static_assert_uimm_bits!(IMM8, 8);
10004 vrndscaleph_256(a, IMM8, src, k)
10005 }
10006}
10007
10008/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10009/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10010/// mask bit is not set).
10011///
10012/// Rounding is done according to the imm8 parameter, which can be one of:
10013///
10014/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10015/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10016/// * [`_MM_FROUND_TO_POS_INF`] : round up
10017/// * [`_MM_FROUND_TO_ZERO`] : truncate
10018/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10019///
10020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ph)
10021#[inline]
10022#[target_feature(enable = "avx512fp16,avx512vl")]
10023#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10024#[rustc_legacy_const_generics(2)]
10025#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10026pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10027 static_assert_uimm_bits!(IMM8, 8);
10028 _mm256_mask_roundscale_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10029}
10030
10031/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10032/// specified by imm8, and store the results in dst.
10033///
10034/// Rounding is done according to the imm8 parameter, which can be one of:
10035///
10036/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10037/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10038/// * [`_MM_FROUND_TO_POS_INF`] : round up
10039/// * [`_MM_FROUND_TO_ZERO`] : truncate
10040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10041///
10042/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ph)
10043#[inline]
10044#[target_feature(enable = "avx512fp16")]
10045#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10046#[rustc_legacy_const_generics(1)]
10047#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10048pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10049 static_assert_uimm_bits!(IMM8, 8);
10050 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10051}
10052
10053/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10054/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10055/// the corresponding mask bit is not set).
10056///
10057/// Rounding is done according to the imm8 parameter, which can be one of:
10058///
10059/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10060/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10061/// * [`_MM_FROUND_TO_POS_INF`] : round up
10062/// * [`_MM_FROUND_TO_ZERO`] : truncate
10063/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10064///
10065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ph)
10066#[inline]
10067#[target_feature(enable = "avx512fp16")]
10068#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10069#[rustc_legacy_const_generics(3)]
10070#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10071pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
10072 src: __m512h,
10073 k: __mmask32,
10074 a: __m512h,
10075) -> __m512h {
10076 static_assert_uimm_bits!(IMM8, 8);
10077 _mm512_mask_roundscale_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10078}
10079
10080/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10081/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10082/// mask bit is not set).
10083///
10084/// Rounding is done according to the imm8 parameter, which can be one of:
10085///
10086/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10087/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10088/// * [`_MM_FROUND_TO_POS_INF`] : round up
10089/// * [`_MM_FROUND_TO_ZERO`] : truncate
10090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10091///
10092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ph)
10093#[inline]
10094#[target_feature(enable = "avx512fp16")]
10095#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
10096#[rustc_legacy_const_generics(2)]
10097#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10098pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10099 static_assert_uimm_bits!(IMM8, 8);
10100 _mm512_mask_roundscale_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10101}
10102
10103/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10104/// specified by imm8, and store the results in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10105/// in the sae parameter
10106///
10107/// Rounding is done according to the imm8 parameter, which can be one of:
10108///
10109/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10110/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10111/// * [`_MM_FROUND_TO_POS_INF`] : round up
10112/// * [`_MM_FROUND_TO_ZERO`] : truncate
10113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10114///
10115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ph)
10116#[inline]
10117#[target_feature(enable = "avx512fp16")]
10118#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10119#[rustc_legacy_const_generics(1, 2)]
10120#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10121pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10122 static_assert_uimm_bits!(IMM8, 8);
10123 static_assert_sae!(SAE);
10124 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10125}
10126
10127/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10128/// specified by imm8, and store the results in dst using writemask k (elements are copied from src when
10129/// the corresponding mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
10130/// in the sae parameter
10131///
10132/// Rounding is done according to the imm8 parameter, which can be one of:
10133///
10134/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10135/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10136/// * [`_MM_FROUND_TO_POS_INF`] : round up
10137/// * [`_MM_FROUND_TO_ZERO`] : truncate
10138/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10139///
10140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ph)
10141#[inline]
10142#[target_feature(enable = "avx512fp16")]
10143#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10144#[rustc_legacy_const_generics(3, 4)]
10145#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10146pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10147 src: __m512h,
10148 k: __mmask32,
10149 a: __m512h,
10150) -> __m512h {
10151 unsafe {
10152 static_assert_uimm_bits!(IMM8, 8);
10153 static_assert_sae!(SAE);
10154 vrndscaleph_512(a, IMM8, src, k, SAE)
10155 }
10156}
10157
10158/// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits
10159/// specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
10160/// mask bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10161///
10162/// Rounding is done according to the imm8 parameter, which can be one of:
10163///
10164/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10165/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10166/// * [`_MM_FROUND_TO_POS_INF`] : round up
10167/// * [`_MM_FROUND_TO_ZERO`] : truncate
10168/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10169///
10170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ph)
10171#[inline]
10172#[target_feature(enable = "avx512fp16")]
10173#[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
10174#[rustc_legacy_const_generics(2, 3)]
10175#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10176pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
10177 k: __mmask32,
10178 a: __m512h,
10179) -> __m512h {
10180 static_assert_uimm_bits!(IMM8, 8);
10181 static_assert_sae!(SAE);
10182 _mm512_mask_roundscale_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10183}
10184
10185/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10186/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10187/// from a to the upper elements of dst.
10188///
10189/// Rounding is done according to the imm8 parameter, which can be one of:
10190///
10191/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10192/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10193/// * [`_MM_FROUND_TO_POS_INF`] : round up
10194/// * [`_MM_FROUND_TO_ZERO`] : truncate
10195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10196///
10197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sh)
10198#[inline]
10199#[target_feature(enable = "avx512fp16")]
10200#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10201#[rustc_legacy_const_generics(2)]
10202#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10203pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10204 static_assert_uimm_bits!(IMM8, 8);
10205 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10206}
10207
10208/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10209/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10210/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10211///
10212/// Rounding is done according to the imm8 parameter, which can be one of:
10213///
10214/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10215/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10216/// * [`_MM_FROUND_TO_POS_INF`] : round up
10217/// * [`_MM_FROUND_TO_ZERO`] : truncate
10218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10219///
10220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sh)
10221#[inline]
10222#[target_feature(enable = "avx512fp16")]
10223#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10224#[rustc_legacy_const_generics(4)]
10225#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10226pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
10227 src: __m128h,
10228 k: __mmask8,
10229 a: __m128h,
10230 b: __m128h,
10231) -> __m128h {
10232 static_assert_uimm_bits!(IMM8, 8);
10233 _mm_mask_roundscale_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10234}
10235
10236/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10237/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10238/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10239///
10240/// Rounding is done according to the imm8 parameter, which can be one of:
10241///
10242/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10243/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10244/// * [`_MM_FROUND_TO_POS_INF`] : round up
10245/// * [`_MM_FROUND_TO_ZERO`] : truncate
10246/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10247///
10248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sh)
10249#[inline]
10250#[target_feature(enable = "avx512fp16")]
10251#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
10252#[rustc_legacy_const_generics(3)]
10253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10254pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10255 static_assert_uimm_bits!(IMM8, 8);
10256 _mm_mask_roundscale_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
10257}
10258
10259/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10260/// specified by imm8, store the result in the lower element of dst, and copy the upper 7 packed elements
10261/// from a to the upper elements of dst.
10262///
10263/// Rounding is done according to the imm8 parameter, which can be one of:
10264///
10265/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10266/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10267/// * [`_MM_FROUND_TO_POS_INF`] : round up
10268/// * [`_MM_FROUND_TO_ZERO`] : truncate
10269/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10270///
10271/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10272///
10273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sh)
10274#[inline]
10275#[target_feature(enable = "avx512fp16")]
10276#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10277#[rustc_legacy_const_generics(2, 3)]
10278#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10279pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
10280 static_assert_uimm_bits!(IMM8, 8);
10281 static_assert_sae!(SAE);
10282 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10283}
10284
10285/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10286/// specified by imm8, store the result in the lower element of dst using writemask k (the element is copied
10287/// from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10288///
10289/// Rounding is done according to the imm8 parameter, which can be one of:
10290///
10291/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10292/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10293/// * [`_MM_FROUND_TO_POS_INF`] : round up
10294/// * [`_MM_FROUND_TO_ZERO`] : truncate
10295/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10296///
10297/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10298///
10299/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sh)
10300#[inline]
10301#[target_feature(enable = "avx512fp16")]
10302#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10303#[rustc_legacy_const_generics(4, 5)]
10304#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10305pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10306 src: __m128h,
10307 k: __mmask8,
10308 a: __m128h,
10309 b: __m128h,
10310) -> __m128h {
10311 unsafe {
10312 static_assert_uimm_bits!(IMM8, 8);
10313 static_assert_sae!(SAE);
10314 vrndscalesh(a, b, src, k, IMM8, SAE)
10315 }
10316}
10317
10318/// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits
10319/// specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed
10320/// out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper elements of dst.
10321///
10322/// Rounding is done according to the imm8 parameter, which can be one of:
10323///
10324/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10325/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10326/// * [`_MM_FROUND_TO_POS_INF`] : round up
10327/// * [`_MM_FROUND_TO_ZERO`] : truncate
10328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10329///
10330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter
10331///
10332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sh)
10333#[inline]
10334#[target_feature(enable = "avx512fp16")]
10335#[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
10336#[rustc_legacy_const_generics(3, 4)]
10337#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10338pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
10339 k: __mmask8,
10340 a: __m128h,
10341 b: __m128h,
10342) -> __m128h {
10343 static_assert_uimm_bits!(IMM8, 8);
10344 static_assert_sae!(SAE);
10345 _mm_mask_roundscale_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
10346}
10347
10348/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10349/// the results in dst.
10350///
10351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ph)
10352#[inline]
10353#[target_feature(enable = "avx512fp16,avx512vl")]
10354#[cfg_attr(test, assert_instr(vscalefph))]
10355#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10356pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
10357 _mm_mask_scalef_ph(src:_mm_undefined_ph(), k:0xff, a, b)
10358}
10359
10360/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10361/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10362///
10363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ph)
10364#[inline]
10365#[target_feature(enable = "avx512fp16,avx512vl")]
10366#[cfg_attr(test, assert_instr(vscalefph))]
10367#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10368pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10369 unsafe { vscalefph_128(a, b, src, k) }
10370}
10371
10372/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10373/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10374///
10375/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ph)
10376#[inline]
10377#[target_feature(enable = "avx512fp16,avx512vl")]
10378#[cfg_attr(test, assert_instr(vscalefph))]
10379#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10380pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10381 _mm_mask_scalef_ph(src:_mm_setzero_ph(), k, a, b)
10382}
10383
10384/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10385/// the results in dst.
10386///
10387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_scalef_ph)
10388#[inline]
10389#[target_feature(enable = "avx512fp16,avx512vl")]
10390#[cfg_attr(test, assert_instr(vscalefph))]
10391#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10392pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
10393 _mm256_mask_scalef_ph(src:_mm256_undefined_ph(), k:0xffff, a, b)
10394}
10395
10396/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10397/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10398///
10399/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_scalef_ph)
10400#[inline]
10401#[target_feature(enable = "avx512fp16,avx512vl")]
10402#[cfg_attr(test, assert_instr(vscalefph))]
10403#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10404pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10405 unsafe { vscalefph_256(a, b, src, k) }
10406}
10407
10408/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10409/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10410///
10411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_scalef_ph)
10412#[inline]
10413#[target_feature(enable = "avx512fp16,avx512vl")]
10414#[cfg_attr(test, assert_instr(vscalefph))]
10415#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10416pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
10417 _mm256_mask_scalef_ph(src:_mm256_setzero_ph(), k, a, b)
10418}
10419
10420/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10421/// the results in dst.
10422///
10423/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ph)
10424#[inline]
10425#[target_feature(enable = "avx512fp16")]
10426#[cfg_attr(test, assert_instr(vscalefph))]
10427#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10428pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
10429 _mm512_mask_scalef_ph(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10430}
10431
10432/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10433/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10434///
10435/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ph)
10436#[inline]
10437#[target_feature(enable = "avx512fp16")]
10438#[cfg_attr(test, assert_instr(vscalefph))]
10439#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10440pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10441 _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10442}
10443
10444/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10445/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10446///
10447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ph)
10448#[inline]
10449#[target_feature(enable = "avx512fp16")]
10450#[cfg_attr(test, assert_instr(vscalefph))]
10451#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10452pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
10453 _mm512_mask_scalef_ph(src:_mm512_setzero_ph(), k, a, b)
10454}
10455
10456/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10457/// the results in dst.
10458///
10459/// Rounding is done according to the rounding parameter, which can be one of:
10460///
10461/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10462/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10463/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10464/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10465/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10466///
10467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ph)
10468#[inline]
10469#[target_feature(enable = "avx512fp16")]
10470#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10471#[rustc_legacy_const_generics(2)]
10472#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10473pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
10474 static_assert_rounding!(ROUNDING);
10475 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_undefined_ph(), k:0xffffffff, a, b)
10476}
10477
10478/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10479/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10480///
10481/// Rounding is done according to the rounding parameter, which can be one of:
10482///
10483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10488///
10489/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ph)
10490#[inline]
10491#[target_feature(enable = "avx512fp16")]
10492#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10493#[rustc_legacy_const_generics(4)]
10494#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10495pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
10496 src: __m512h,
10497 k: __mmask32,
10498 a: __m512h,
10499 b: __m512h,
10500) -> __m512h {
10501 unsafe {
10502 static_assert_rounding!(ROUNDING);
10503 vscalefph_512(a, b, src, k, ROUNDING)
10504 }
10505}
10506
10507/// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store
10508/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10509///
10510/// Rounding is done according to the rounding parameter, which can be one of:
10511///
10512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10517///
10518/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ph)
10519#[inline]
10520#[target_feature(enable = "avx512fp16")]
10521#[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
10522#[rustc_legacy_const_generics(3)]
10523#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10524pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
10525 k: __mmask32,
10526 a: __m512h,
10527 b: __m512h,
10528) -> __m512h {
10529 static_assert_rounding!(ROUNDING);
10530 _mm512_mask_scalef_round_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a, b)
10531}
10532
10533/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10534/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10535/// elements of dst.
10536///
10537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sh)
10538#[inline]
10539#[target_feature(enable = "avx512fp16")]
10540#[cfg_attr(test, assert_instr(vscalefsh))]
10541#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10542pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
10543 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10544}
10545
10546/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10547/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10548/// and copy the upper 7 packed elements from a to the upper elements of dst.
10549///
10550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sh)
10551#[inline]
10552#[target_feature(enable = "avx512fp16")]
10553#[cfg_attr(test, assert_instr(vscalefsh))]
10554#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10555pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10556 _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10557}
10558
10559/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10560/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10561/// and copy the upper 7 packed elements from a to the upper elements of dst.
10562///
10563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sh)
10564#[inline]
10565#[target_feature(enable = "avx512fp16")]
10566#[cfg_attr(test, assert_instr(vscalefsh))]
10567#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10568pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
10569 _mm_mask_scalef_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
10570}
10571
10572/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10573/// the result in the lower element of dst, and copy the upper 7 packed elements from a to the upper
10574/// elements of dst.
10575///
10576/// Rounding is done according to the rounding parameter, which can be one of:
10577///
10578/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10579/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10580/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10581/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10582/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10583///
10584/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sh)
10585#[inline]
10586#[target_feature(enable = "avx512fp16")]
10587#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10588#[rustc_legacy_const_generics(2)]
10589#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10590pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
10591 static_assert_rounding!(ROUNDING);
10592 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10593}
10594
10595/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10596/// the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set),
10597/// and copy the upper 7 packed elements from a to the upper elements of dst.
10598///
10599/// Rounding is done according to the rounding parameter, which can be one of:
10600///
10601/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10602/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10603/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10604/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10605/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10606///
10607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sh)
10608#[inline]
10609#[target_feature(enable = "avx512fp16")]
10610#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10611#[rustc_legacy_const_generics(4)]
10612#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10613pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
10614 src: __m128h,
10615 k: __mmask8,
10616 a: __m128h,
10617 b: __m128h,
10618) -> __m128h {
10619 unsafe {
10620 static_assert_rounding!(ROUNDING);
10621 vscalefsh(a, b, src, k, ROUNDING)
10622 }
10623}
10624
10625/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store
10626/// the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set),
10627/// and copy the upper 7 packed elements from a to the upper elements of dst.
10628///
10629/// Rounding is done according to the rounding parameter, which can be one of:
10630///
10631/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10632/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10633/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10634/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10635/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10636///
10637/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sh)
10638#[inline]
10639#[target_feature(enable = "avx512fp16")]
10640#[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
10641#[rustc_legacy_const_generics(3)]
10642#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10643pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
10644 k: __mmask8,
10645 a: __m128h,
10646 b: __m128h,
10647) -> __m128h {
10648 static_assert_rounding!(ROUNDING);
10649 _mm_mask_scalef_round_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
10650}
10651
10652/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10653/// number of bits specified by imm8, and store the results in dst.
10654///
10655/// Rounding is done according to the imm8 parameter, which can be one of:
10656///
10657/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10658/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10659/// * [`_MM_FROUND_TO_POS_INF`] : round up
10660/// * [`_MM_FROUND_TO_ZERO`] : truncate
10661/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10662///
10663/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_ph)
10664#[inline]
10665#[target_feature(enable = "avx512fp16,avx512vl")]
10666#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10667#[rustc_legacy_const_generics(1)]
10668#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10669pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
10670 static_assert_uimm_bits!(IMM8, 8);
10671 _mm_mask_reduce_ph::<IMM8>(src:_mm_undefined_ph(), k:0xff, a)
10672}
10673
10674/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10675/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10676/// from src when the corresponding mask bit is not set).
10677///
10678/// Rounding is done according to the imm8 parameter, which can be one of:
10679///
10680/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10681/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10682/// * [`_MM_FROUND_TO_POS_INF`] : round up
10683/// * [`_MM_FROUND_TO_ZERO`] : truncate
10684/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10685///
10686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_ph)
10687#[inline]
10688#[target_feature(enable = "avx512fp16,avx512vl")]
10689#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10690#[rustc_legacy_const_generics(3)]
10691#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10692pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
10693 unsafe {
10694 static_assert_uimm_bits!(IMM8, 8);
10695 vreduceph_128(a, IMM8, src, k)
10696 }
10697}
10698
10699/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10700/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10701/// out when the corresponding mask bit is not set).
10702///
10703/// Rounding is done according to the imm8 parameter, which can be one of:
10704///
10705/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10706/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10707/// * [`_MM_FROUND_TO_POS_INF`] : round up
10708/// * [`_MM_FROUND_TO_ZERO`] : truncate
10709/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10710///
10711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_ph)
10712#[inline]
10713#[target_feature(enable = "avx512fp16,avx512vl")]
10714#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10715#[rustc_legacy_const_generics(2)]
10716#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10717pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
10718 static_assert_uimm_bits!(IMM8, 8);
10719 _mm_mask_reduce_ph::<IMM8>(src:_mm_setzero_ph(), k, a)
10720}
10721
10722/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10723/// number of bits specified by imm8, and store the results in dst.
10724///
10725/// Rounding is done according to the imm8 parameter, which can be one of:
10726///
10727/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10728/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10729/// * [`_MM_FROUND_TO_POS_INF`] : round up
10730/// * [`_MM_FROUND_TO_ZERO`] : truncate
10731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10732///
10733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_ph)
10734#[inline]
10735#[target_feature(enable = "avx512fp16,avx512vl")]
10736#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10737#[rustc_legacy_const_generics(1)]
10738#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10739pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
10740 static_assert_uimm_bits!(IMM8, 8);
10741 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_undefined_ph(), k:0xffff, a)
10742}
10743
10744/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10745/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10746/// from src when the corresponding mask bit is not set).
10747///
10748/// Rounding is done according to the imm8 parameter, which can be one of:
10749///
10750/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10751/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10752/// * [`_MM_FROUND_TO_POS_INF`] : round up
10753/// * [`_MM_FROUND_TO_ZERO`] : truncate
10754/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10755///
10756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_reduce_ph)
10757#[inline]
10758#[target_feature(enable = "avx512fp16,avx512vl")]
10759#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10760#[rustc_legacy_const_generics(3)]
10761#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10762pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
10763 unsafe {
10764 static_assert_uimm_bits!(IMM8, 8);
10765 vreduceph_256(a, IMM8, src, k)
10766 }
10767}
10768
10769/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10770/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10771/// out when the corresponding mask bit is not set).
10772///
10773/// Rounding is done according to the imm8 parameter, which can be one of:
10774///
10775/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10776/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10777/// * [`_MM_FROUND_TO_POS_INF`] : round up
10778/// * [`_MM_FROUND_TO_ZERO`] : truncate
10779/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10780///
10781/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_reduce_ph)
10782#[inline]
10783#[target_feature(enable = "avx512fp16,avx512vl")]
10784#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10785#[rustc_legacy_const_generics(2)]
10786#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10787pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
10788 static_assert_uimm_bits!(IMM8, 8);
10789 _mm256_mask_reduce_ph::<IMM8>(src:_mm256_setzero_ph(), k, a)
10790}
10791
10792/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10793/// number of bits specified by imm8, and store the results in dst.
10794///
10795/// Rounding is done according to the imm8 parameter, which can be one of:
10796///
10797/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10798/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10799/// * [`_MM_FROUND_TO_POS_INF`] : round up
10800/// * [`_MM_FROUND_TO_ZERO`] : truncate
10801/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10802///
10803/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_ph)
10804#[inline]
10805#[target_feature(enable = "avx512fp16")]
10806#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10807#[rustc_legacy_const_generics(1)]
10808#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10809pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
10810 static_assert_uimm_bits!(IMM8, 8);
10811 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10812}
10813
10814/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10815/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10816/// from src when the corresponding mask bit is not set).
10817///
10818/// Rounding is done according to the imm8 parameter, which can be one of:
10819///
10820/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10821/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10822/// * [`_MM_FROUND_TO_POS_INF`] : round up
10823/// * [`_MM_FROUND_TO_ZERO`] : truncate
10824/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10825///
10826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_ph)
10827#[inline]
10828#[target_feature(enable = "avx512fp16")]
10829#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10830#[rustc_legacy_const_generics(3)]
10831#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10832pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
10833 static_assert_uimm_bits!(IMM8, 8);
10834 _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
10835}
10836
10837/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10838/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10839/// out when the corresponding mask bit is not set).
10840///
10841/// Rounding is done according to the imm8 parameter, which can be one of:
10842///
10843/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10844/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10845/// * [`_MM_FROUND_TO_POS_INF`] : round up
10846/// * [`_MM_FROUND_TO_ZERO`] : truncate
10847/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10848///
10849/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_ph)
10850#[inline]
10851#[target_feature(enable = "avx512fp16")]
10852#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
10853#[rustc_legacy_const_generics(2)]
10854#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10855pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
10856 static_assert_uimm_bits!(IMM8, 8);
10857 _mm512_mask_reduce_ph::<IMM8>(src:_mm512_setzero_ph(), k, a)
10858}
10859
10860/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10861/// number of bits specified by imm8, and store the results in dst.
10862///
10863/// Rounding is done according to the imm8 parameter, which can be one of:
10864///
10865/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10866/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10867/// * [`_MM_FROUND_TO_POS_INF`] : round up
10868/// * [`_MM_FROUND_TO_ZERO`] : truncate
10869/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10870///
10871/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10872///
10873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_round_ph)
10874#[inline]
10875#[target_feature(enable = "avx512fp16")]
10876#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10877#[rustc_legacy_const_generics(1, 2)]
10878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10879pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
10880 static_assert_uimm_bits!(IMM8, 8);
10881 static_assert_sae!(SAE);
10882 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_undefined_ph(), k:0xffffffff, a)
10883}
10884
10885/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10886/// number of bits specified by imm8, and store the results in dst using writemask k (elements are copied
10887/// from src when the corresponding mask bit is not set).
10888///
10889/// Rounding is done according to the imm8 parameter, which can be one of:
10890///
10891/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10892/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10893/// * [`_MM_FROUND_TO_POS_INF`] : round up
10894/// * [`_MM_FROUND_TO_ZERO`] : truncate
10895/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10896///
10897/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10898///
10899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_round_ph)
10900#[inline]
10901#[target_feature(enable = "avx512fp16")]
10902#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10903#[rustc_legacy_const_generics(3, 4)]
10904#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10905pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10906 src: __m512h,
10907 k: __mmask32,
10908 a: __m512h,
10909) -> __m512h {
10910 unsafe {
10911 static_assert_uimm_bits!(IMM8, 8);
10912 static_assert_sae!(SAE);
10913 vreduceph_512(a, IMM8, src, k, SAE)
10914 }
10915}
10916
10917/// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the
10918/// number of bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed
10919/// out when the corresponding mask bit is not set).
10920///
10921/// Rounding is done according to the imm8 parameter, which can be one of:
10922///
10923/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10924/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10925/// * [`_MM_FROUND_TO_POS_INF`] : round up
10926/// * [`_MM_FROUND_TO_ZERO`] : truncate
10927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10928///
10929/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10930///
10931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_reduce_round_ph)
10932#[inline]
10933#[target_feature(enable = "avx512fp16")]
10934#[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
10935#[rustc_legacy_const_generics(2, 3)]
10936#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10937pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
10938 k: __mmask32,
10939 a: __m512h,
10940) -> __m512h {
10941 static_assert_uimm_bits!(IMM8, 8);
10942 static_assert_sae!(SAE);
10943 _mm512_mask_reduce_round_ph::<IMM8, SAE>(src:_mm512_setzero_ph(), k, a)
10944}
10945
10946/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10947/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the
10948/// upper 7 packed elements from a to the upper elements of dst.
10949///
10950/// Rounding is done according to the imm8 parameter, which can be one of:
10951///
10952/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10953/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10954/// * [`_MM_FROUND_TO_POS_INF`] : round up
10955/// * [`_MM_FROUND_TO_ZERO`] : truncate
10956/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10957///
10958/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_sh)
10959#[inline]
10960#[target_feature(enable = "avx512fp16")]
10961#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10962#[rustc_legacy_const_generics(2)]
10963#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10964pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
10965 static_assert_uimm_bits!(IMM8, 8);
10966 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
10967}
10968
10969/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10970/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
10971/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from
10972/// a to the upper elements of dst.
10973///
10974/// Rounding is done according to the imm8 parameter, which can be one of:
10975///
10976/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10977/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10978/// * [`_MM_FROUND_TO_POS_INF`] : round up
10979/// * [`_MM_FROUND_TO_ZERO`] : truncate
10980/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10981///
10982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_sh)
10983#[inline]
10984#[target_feature(enable = "avx512fp16")]
10985#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
10986#[rustc_legacy_const_generics(4)]
10987#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10988pub fn _mm_mask_reduce_sh<const IMM8: i32>(
10989 src: __m128h,
10990 k: __mmask8,
10991 a: __m128h,
10992 b: __m128h,
10993) -> __m128h {
10994 static_assert_uimm_bits!(IMM8, 8);
10995 _mm_mask_reduce_round_sh::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a, b)
10996}
10997
10998/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
10999/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
11000/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
11001/// to the upper elements of dst.
11002///
11003/// Rounding is done according to the imm8 parameter, which can be one of:
11004///
11005/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11006/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11007/// * [`_MM_FROUND_TO_POS_INF`] : round up
11008/// * [`_MM_FROUND_TO_ZERO`] : truncate
11009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11010///
11011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_sh)
11012#[inline]
11013#[target_feature(enable = "avx512fp16")]
11014#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
11015#[rustc_legacy_const_generics(3)]
11016#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11017pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11018 static_assert_uimm_bits!(IMM8, 8);
11019 _mm_mask_reduce_sh::<IMM8>(src:f16x8::ZERO.as_m128h(), k, a, b)
11020}
11021
11022/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11023/// the number of bits specified by imm8, store the result in the lower element of dst, and copy the upper
11024/// 7 packed elements from a to the upper elements of dst.
11025///
11026/// Rounding is done according to the imm8 parameter, which can be one of:
11027///
11028/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11029/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11030/// * [`_MM_FROUND_TO_POS_INF`] : round up
11031/// * [`_MM_FROUND_TO_ZERO`] : truncate
11032/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11033///
11034/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11035///
11036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_round_sh)
11037#[inline]
11038#[target_feature(enable = "avx512fp16")]
11039#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11040#[rustc_legacy_const_generics(2, 3)]
11041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11042pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
11043 static_assert_uimm_bits!(IMM8, 8);
11044 static_assert_sae!(SAE);
11045 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
11046}
11047
11048/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11049/// the number of bits specified by imm8, store the result in the lower element of dst using writemask k
11050/// (the element is copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a
11051/// to the upper elements of dst.
11052///
11053/// Rounding is done according to the imm8 parameter, which can be one of:
11054///
11055/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11056/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11057/// * [`_MM_FROUND_TO_POS_INF`] : round up
11058/// * [`_MM_FROUND_TO_ZERO`] : truncate
11059/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11060///
11061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062///
11063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_reduce_round_sh)
11064#[inline]
11065#[target_feature(enable = "avx512fp16")]
11066#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11067#[rustc_legacy_const_generics(4, 5)]
11068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11069pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11070 src: __m128h,
11071 k: __mmask8,
11072 a: __m128h,
11073 b: __m128h,
11074) -> __m128h {
11075 unsafe {
11076 static_assert_uimm_bits!(IMM8, 8);
11077 static_assert_sae!(SAE);
11078 vreducesh(a, b, src, k, IMM8, SAE)
11079 }
11080}
11081
11082/// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by
11083/// the number of bits specified by imm8, store the result in the lower element of dst using zeromask k
11084/// (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a
11085/// to the upper elements of dst.
11086///
11087/// Rounding is done according to the imm8 parameter, which can be one of:
11088///
11089/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
11090/// * [`_MM_FROUND_TO_NEG_INF`] : round down
11091/// * [`_MM_FROUND_TO_POS_INF`] : round up
11092/// * [`_MM_FROUND_TO_ZERO`] : truncate
11093/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11094///
11095/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11096///
11097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_reduce_round_sh)
11098#[inline]
11099#[target_feature(enable = "avx512fp16")]
11100#[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
11101#[rustc_legacy_const_generics(3, 4)]
11102#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11103pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
11104 k: __mmask8,
11105 a: __m128h,
11106 b: __m128h,
11107) -> __m128h {
11108 static_assert_uimm_bits!(IMM8, 8);
11109 static_assert_sae!(SAE);
11110 _mm_mask_reduce_round_sh::<IMM8, SAE>(src:f16x8::ZERO.as_m128h(), k, a, b)
11111}
11112
11113/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11114/// sum of all elements in a.
11115///
11116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_add_ph)
11117#[inline]
11118#[target_feature(enable = "avx512fp16,avx512vl")]
11119#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11120pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
11121 unsafe {
11122 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11123 let a: __m128h = _mm_add_ph(a, b);
11124 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11125 let a: __m128h = _mm_add_ph(a, b);
11126 simd_extract!(a, 0, f16) + simd_extract!(a, 1, f16)
11127 }
11128}
11129
11130/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11131/// sum of all elements in a.
11132///
11133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_add_ph)
11134#[inline]
11135#[target_feature(enable = "avx512fp16,avx512vl")]
11136#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11137pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
11138 unsafe {
11139 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11140 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11141 _mm_reduce_add_ph(_mm_add_ph(a:p, b:q))
11142 }
11143}
11144
11145/// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the
11146/// sum of all elements in a.
11147///
11148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ph)
11149#[inline]
11150#[target_feature(enable = "avx512fp16")]
11151#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11152pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
11153 unsafe {
11154 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11155 let q: __m256h = simd_shuffle!(
11156 a,
11157 a,
11158 [
11159 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11160 ]
11161 );
11162 _mm256_reduce_add_ph(_mm256_add_ph(a:p, b:q))
11163 }
11164}
11165
11166/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11167/// the product of all elements in a.
11168///
11169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_mul_ph)
11170#[inline]
11171#[target_feature(enable = "avx512fp16,avx512vl")]
11172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11173pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
11174 unsafe {
11175 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11176 let a: __m128h = _mm_mul_ph(a, b);
11177 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11178 let a: __m128h = _mm_mul_ph(a, b);
11179 simd_extract!(a, 0, f16) * simd_extract!(a, 1, f16)
11180 }
11181}
11182
11183/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11184/// the product of all elements in a.
11185///
11186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_mul_ph)
11187#[inline]
11188#[target_feature(enable = "avx512fp16,avx512vl")]
11189#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11190pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
11191 unsafe {
11192 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11193 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11194 _mm_reduce_mul_ph(_mm_mul_ph(a:p, b:q))
11195 }
11196}
11197
11198/// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns
11199/// the product of all elements in a.
11200///
11201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ph)
11202#[inline]
11203#[target_feature(enable = "avx512fp16")]
11204#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11205pub fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
11206 unsafe {
11207 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11208 let q: __m256h = simd_shuffle!(
11209 a,
11210 a,
11211 [
11212 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11213 ]
11214 );
11215 _mm256_reduce_mul_ph(_mm256_mul_ph(a:p, b:q))
11216 }
11217}
11218
11219/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11220/// minimum of all elements in a.
11221///
11222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_min_ph)
11223#[inline]
11224#[target_feature(enable = "avx512fp16,avx512vl")]
11225#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11226pub fn _mm_reduce_min_ph(a: __m128h) -> f16 {
11227 unsafe {
11228 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11229 let a: __m128h = _mm_min_ph(a, b);
11230 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11231 let a: __m128h = _mm_min_ph(a, b);
11232 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11233 simd_extract!(_mm_min_sh(a, b), 0)
11234 }
11235}
11236
11237/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11238/// minimum of all elements in a.
11239///
11240/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_min_ph)
11241#[inline]
11242#[target_feature(enable = "avx512fp16,avx512vl")]
11243#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11244pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 {
11245 unsafe {
11246 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11247 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11248 _mm_reduce_min_ph(_mm_min_ph(a:p, b:q))
11249 }
11250}
11251
11252/// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the
11253/// minimum of all elements in a.
11254///
11255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ph)
11256#[inline]
11257#[target_feature(enable = "avx512fp16")]
11258#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11259pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 {
11260 unsafe {
11261 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11262 let q: __m256h = simd_shuffle!(
11263 a,
11264 a,
11265 [
11266 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11267 ]
11268 );
11269 _mm256_reduce_min_ph(_mm256_min_ph(a:p, b:q))
11270 }
11271}
11272
11273/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11274/// maximum of all elements in a.
11275///
11276/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_reduce_max_ph)
11277#[inline]
11278#[target_feature(enable = "avx512fp16,avx512vl")]
11279#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11280pub fn _mm_reduce_max_ph(a: __m128h) -> f16 {
11281 unsafe {
11282 let b: __m128h = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
11283 let a: __m128h = _mm_max_ph(a, b);
11284 let b: __m128h = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
11285 let a: __m128h = _mm_max_ph(a, b);
11286 let b: __m128h = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]);
11287 simd_extract!(_mm_max_sh(a, b), 0)
11288 }
11289}
11290
11291/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11292/// maximum of all elements in a.
11293///
11294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_reduce_max_ph)
11295#[inline]
11296#[target_feature(enable = "avx512fp16,avx512vl")]
11297#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11298pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 {
11299 unsafe {
11300 let p: __m128h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11301 let q: __m128h = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
11302 _mm_reduce_max_ph(_mm_max_ph(a:p, b:q))
11303 }
11304}
11305
11306/// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the
11307/// maximum of all elements in a.
11308///
11309/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ph)
11310#[inline]
11311#[target_feature(enable = "avx512fp16")]
11312#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11313pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
11314 unsafe {
11315 let p: __m256h = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
11316 let q: __m256h = simd_shuffle!(
11317 a,
11318 a,
11319 [
11320 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
11321 ]
11322 );
11323 _mm256_reduce_max_ph(_mm256_max_ph(a:p, b:q))
11324 }
11325}
11326
11327macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
11328 ($mask_type: ty, $reg: ident, $a: expr) => {{
11329 let dst: $mask_type;
11330 asm!(
11331 "vfpclassph {k}, {src}, {imm8}",
11332 k = lateout(kreg) dst,
11333 src = in($reg) $a,
11334 imm8 = const IMM8,
11335 options(pure, nomem, nostack)
11336 );
11337 dst
11338 }};
11339 ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
11340 let dst: $mask_type;
11341 asm!(
11342 "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
11343 k = lateout(kreg) dst,
11344 mask = in(kreg) $mask,
11345 src = in($reg) $a,
11346 imm8 = const IMM8,
11347 options(pure, nomem, nostack)
11348 );
11349 dst
11350 }};
11351}
11352
11353/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11354/// by imm8, and store the results in mask vector k.
11355/// imm can be a combination of:
11356///
11357/// 0x01 // QNaN
11358/// 0x02 // Positive Zero
11359/// 0x04 // Negative Zero
11360/// 0x08 // Positive Infinity
11361/// 0x10 // Negative Infinity
11362/// 0x20 // Denormal
11363/// 0x40 // Negative
11364/// 0x80 // SNaN
11365///
11366/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_ph_mask)
11367#[inline]
11368#[target_feature(enable = "avx512fp16,avx512vl")]
11369#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11370#[rustc_legacy_const_generics(1)]
11371#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11372pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11373 unsafe {
11374 static_assert_uimm_bits!(IMM8, 8);
11375 fpclass_asm!(__mmask8, xmm_reg, a)
11376 }
11377}
11378
11379/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11380/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11381/// corresponding mask bit is not set).
11382/// imm can be a combination of:
11383///
11384/// 0x01 // QNaN
11385/// 0x02 // Positive Zero
11386/// 0x04 // Negative Zero
11387/// 0x08 // Positive Infinity
11388/// 0x10 // Negative Infinity
11389/// 0x20 // Denormal
11390/// 0x40 // Negative
11391/// 0x80 // SNaN
11392///
11393/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_ph_mask)
11394#[inline]
11395#[target_feature(enable = "avx512fp16,avx512vl")]
11396#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11397#[rustc_legacy_const_generics(2)]
11398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11399pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11400 unsafe {
11401 static_assert_uimm_bits!(IMM8, 8);
11402 fpclass_asm!(__mmask8, k1, xmm_reg, a)
11403 }
11404}
11405
11406/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11407/// by imm8, and store the results in mask vector k.
11408/// imm can be a combination of:
11409///
11410/// 0x01 // QNaN
11411/// 0x02 // Positive Zero
11412/// 0x04 // Negative Zero
11413/// 0x08 // Positive Infinity
11414/// 0x10 // Negative Infinity
11415/// 0x20 // Denormal
11416/// 0x40 // Negative
11417/// 0x80 // SNaN
11418///
11419/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fpclass_ph_mask)
11420#[inline]
11421#[target_feature(enable = "avx512fp16,avx512vl")]
11422#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11423#[rustc_legacy_const_generics(1)]
11424#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11425pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
11426 unsafe {
11427 static_assert_uimm_bits!(IMM8, 8);
11428 fpclass_asm!(__mmask16, ymm_reg, a)
11429 }
11430}
11431
11432/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11433/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11434/// corresponding mask bit is not set).
11435/// imm can be a combination of:
11436///
11437/// 0x01 // QNaN
11438/// 0x02 // Positive Zero
11439/// 0x04 // Negative Zero
11440/// 0x08 // Positive Infinity
11441/// 0x10 // Negative Infinity
11442/// 0x20 // Denormal
11443/// 0x40 // Negative
11444/// 0x80 // SNaN
11445///
11446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fpclass_ph_mask)
11447#[inline]
11448#[target_feature(enable = "avx512fp16,avx512vl")]
11449#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11450#[rustc_legacy_const_generics(2)]
11451#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11452pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
11453 unsafe {
11454 static_assert_uimm_bits!(IMM8, 8);
11455 fpclass_asm!(__mmask16, k1, ymm_reg, a)
11456 }
11457}
11458
11459/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11460/// by imm8, and store the results in mask vector k.
11461/// imm can be a combination of:
11462///
11463/// 0x01 // QNaN
11464/// 0x02 // Positive Zero
11465/// 0x04 // Negative Zero
11466/// 0x08 // Positive Infinity
11467/// 0x10 // Negative Infinity
11468/// 0x20 // Denormal
11469/// 0x40 // Negative
11470/// 0x80 // SNaN
11471///
11472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fpclass_ph_mask)
11473#[inline]
11474#[target_feature(enable = "avx512fp16")]
11475#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11476#[rustc_legacy_const_generics(1)]
11477#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11478pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
11479 unsafe {
11480 static_assert_uimm_bits!(IMM8, 8);
11481 fpclass_asm!(__mmask32, zmm_reg, a)
11482 }
11483}
11484
11485/// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
11486/// by imm8, and store the results in mask vector k using zeromask k (elements are zeroed out when the
11487/// corresponding mask bit is not set).
11488/// imm can be a combination of:
11489///
11490/// 0x01 // QNaN
11491/// 0x02 // Positive Zero
11492/// 0x04 // Negative Zero
11493/// 0x08 // Positive Infinity
11494/// 0x10 // Negative Infinity
11495/// 0x20 // Denormal
11496/// 0x40 // Negative
11497/// 0x80 // SNaN
11498///
11499/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fpclass_ph_mask)
11500#[inline]
11501#[target_feature(enable = "avx512fp16")]
11502#[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
11503#[rustc_legacy_const_generics(2)]
11504#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11505pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
11506 unsafe {
11507 static_assert_uimm_bits!(IMM8, 8);
11508 fpclass_asm!(__mmask32, k1, zmm_reg, a)
11509 }
11510}
11511
11512/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11513/// by imm8, and store the result in mask vector k.
11514/// imm can be a combination of:
11515///
11516/// 0x01 // QNaN
11517/// 0x02 // Positive Zero
11518/// 0x04 // Negative Zero
11519/// 0x08 // Positive Infinity
11520/// 0x10 // Negative Infinity
11521/// 0x20 // Denormal
11522/// 0x40 // Negative
11523/// 0x80 // SNaN
11524///
11525/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fpclass_sh_mask)
11526#[inline]
11527#[target_feature(enable = "avx512fp16")]
11528#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11529#[rustc_legacy_const_generics(1)]
11530#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11531pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
11532 _mm_mask_fpclass_sh_mask::<IMM8>(k1:0xff, a)
11533}
11534
11535/// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
11536/// by imm8, and store the result in mask vector k using zeromask k (elements are zeroed out when the
11537/// corresponding mask bit is not set).
11538/// imm can be a combination of:
11539///
11540/// 0x01 // QNaN
11541/// 0x02 // Positive Zero
11542/// 0x04 // Negative Zero
11543/// 0x08 // Positive Infinity
11544/// 0x10 // Negative Infinity
11545/// 0x20 // Denormal
11546/// 0x40 // Negative
11547/// 0x80 // SNaN
11548///
11549/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fpclass_sh_mask)
11550#[inline]
11551#[target_feature(enable = "avx512fp16")]
11552#[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
11553#[rustc_legacy_const_generics(2)]
11554#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11555pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
11556 unsafe {
11557 static_assert_uimm_bits!(IMM8, 8);
11558 vfpclasssh(a, IMM8, k:k1)
11559 }
11560}
11561
11562/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11563/// and store the results in dst.
11564///
11565/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
11566#[inline]
11567#[target_feature(enable = "avx512fp16,avx512vl")]
11568#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11569pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
11570 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11571}
11572
11573/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11574/// and store the results in dst.
11575///
11576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
11577#[inline]
11578#[target_feature(enable = "avx512fp16,avx512vl")]
11579#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11580pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
11581 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11582}
11583
11584/// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k,
11585/// and store the results in dst.
11586///
11587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
11588#[inline]
11589#[target_feature(enable = "avx512fp16")]
11590#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11591pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
11592 unsafe { simd_select_bitmask(m:k, yes:b, no:a) }
11593}
11594
11595/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11596/// and index in idx, and store the results in dst.
11597///
11598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
11599#[inline]
11600#[target_feature(enable = "avx512fp16,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11602pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
11603 _mm_castsi128_ph(_mm_permutex2var_epi16(
11604 a:_mm_castph_si128(a),
11605 idx,
11606 b:_mm_castph_si128(b),
11607 ))
11608}
11609
11610/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11611/// and index in idx, and store the results in dst.
11612///
11613/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
11614#[inline]
11615#[target_feature(enable = "avx512fp16,avx512vl")]
11616#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11617pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
11618 _mm256_castsi256_ph(_mm256_permutex2var_epi16(
11619 a:_mm256_castph_si256(a),
11620 idx,
11621 b:_mm256_castph_si256(b),
11622 ))
11623}
11624
11625/// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector
11626/// and index in idx, and store the results in dst.
11627///
11628/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
11629#[inline]
11630#[target_feature(enable = "avx512fp16")]
11631#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11632pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
11633 _mm512_castsi512_ph(_mm512_permutex2var_epi16(
11634 a:_mm512_castph_si512(a),
11635 idx,
11636 b:_mm512_castph_si512(b),
11637 ))
11638}
11639
11640/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11641/// and store the results in dst.
11642///
11643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
11644#[inline]
11645#[target_feature(enable = "avx512fp16,avx512vl")]
11646#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11647pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
11648 _mm_castsi128_ph(_mm_permutexvar_epi16(idx, a:_mm_castph_si128(a)))
11649}
11650
11651/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11652/// and store the results in dst.
11653///
11654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
11655#[inline]
11656#[target_feature(enable = "avx512fp16,avx512vl")]
11657#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11658pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
11659 _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, a:_mm256_castph_si256(a)))
11660}
11661
11662/// Shuffle half-precision (16-bit) floating-point elements in a using the corresponding index in idx,
11663/// and store the results in dst.
11664///
11665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
11666#[inline]
11667#[target_feature(enable = "avx512fp16")]
11668#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11669pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
11670 _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, a:_mm512_castph_si512(a)))
11671}
11672
11673/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11674/// and store the results in dst.
11675///
11676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_ph)
11677#[inline]
11678#[target_feature(enable = "avx512fp16,avx512vl")]
11679#[cfg_attr(test, assert_instr(vcvtw2ph))]
11680#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11681pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
11682 unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
11683}
11684
11685/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11686/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11687/// mask bit is not set).
11688///
11689/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_ph)
11690#[inline]
11691#[target_feature(enable = "avx512fp16,avx512vl")]
11692#[cfg_attr(test, assert_instr(vcvtw2ph))]
11693#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11694pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11695 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepi16_ph(a), no:src) }
11696}
11697
11698/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11699/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11700///
11701/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_ph)
11702#[inline]
11703#[target_feature(enable = "avx512fp16,avx512vl")]
11704#[cfg_attr(test, assert_instr(vcvtw2ph))]
11705#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11706pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
11707 _mm_mask_cvtepi16_ph(src:_mm_setzero_ph(), k, a)
11708}
11709
11710/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11711/// and store the results in dst.
11712///
11713/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_ph)
11714#[inline]
11715#[target_feature(enable = "avx512fp16,avx512vl")]
11716#[cfg_attr(test, assert_instr(vcvtw2ph))]
11717#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11718pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
11719 unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
11720}
11721
11722/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11723/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11724/// mask bit is not set).
11725///
11726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_ph)
11727#[inline]
11728#[target_feature(enable = "avx512fp16,avx512vl")]
11729#[cfg_attr(test, assert_instr(vcvtw2ph))]
11730#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11731pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11732 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi16_ph(a), no:src) }
11733}
11734
11735/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11736/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11737///
11738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_ph)
11739#[inline]
11740#[target_feature(enable = "avx512fp16,avx512vl")]
11741#[cfg_attr(test, assert_instr(vcvtw2ph))]
11742#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11743pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
11744 _mm256_mask_cvtepi16_ph(src:_mm256_setzero_ph(), k, a)
11745}
11746
11747/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11748/// and store the results in dst.
11749///
11750/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_ph)
11751#[inline]
11752#[target_feature(enable = "avx512fp16")]
11753#[cfg_attr(test, assert_instr(vcvtw2ph))]
11754#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11755pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
11756 unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
11757}
11758
11759/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11760/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11761/// mask bit is not set).
11762///
11763/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_ph)
11764#[inline]
11765#[target_feature(enable = "avx512fp16")]
11766#[cfg_attr(test, assert_instr(vcvtw2ph))]
11767#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11768pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11769 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi16_ph(a), no:src) }
11770}
11771
11772/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11773/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11774///
11775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_ph)
11776#[inline]
11777#[target_feature(enable = "avx512fp16")]
11778#[cfg_attr(test, assert_instr(vcvtw2ph))]
11779#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11780pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
11781 _mm512_mask_cvtepi16_ph(src:_mm512_setzero_ph(), k, a)
11782}
11783
11784/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11785/// and store the results in dst.
11786///
11787/// Rounding is done according to the rounding parameter, which can be one of:
11788///
11789/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11790/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11791/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11792/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11793/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi16_ph)
11796#[inline]
11797#[target_feature(enable = "avx512fp16")]
11798#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11799#[rustc_legacy_const_generics(1)]
11800#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11801pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11802 unsafe {
11803 static_assert_rounding!(ROUNDING);
11804 vcvtw2ph_512(a.as_i16x32(), ROUNDING)
11805 }
11806}
11807
11808/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11809/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11810/// mask bit is not set).
11811///
11812/// Rounding is done according to the rounding parameter, which can be one of:
11813///
11814/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11815/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11816/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11817/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11818/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11819///
11820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi16_ph)
11821#[inline]
11822#[target_feature(enable = "avx512fp16")]
11823#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11824#[rustc_legacy_const_generics(3)]
11825#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11826pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
11827 src: __m512h,
11828 k: __mmask32,
11829 a: __m512i,
11830) -> __m512h {
11831 unsafe {
11832 static_assert_rounding!(ROUNDING);
11833 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi16_ph::<ROUNDING>(a), no:src)
11834 }
11835}
11836
11837/// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11838/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11839///
11840/// Rounding is done according to the rounding parameter, which can be one of:
11841///
11842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11847///
11848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi16_ph)
11849#[inline]
11850#[target_feature(enable = "avx512fp16")]
11851#[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
11852#[rustc_legacy_const_generics(2)]
11853#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11854pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
11855 static_assert_rounding!(ROUNDING);
11856 _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
11857}
11858
11859/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11860/// and store the results in dst.
11861///
11862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_ph)
11863#[inline]
11864#[target_feature(enable = "avx512fp16,avx512vl")]
11865#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11866#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11867pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
11868 unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
11869}
11870
11871/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11872/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11873/// mask bit is not set).
11874///
11875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_ph)
11876#[inline]
11877#[target_feature(enable = "avx512fp16,avx512vl")]
11878#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11879#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11880pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
11881 unsafe { simd_select_bitmask(m:k, yes:_mm_cvtepu16_ph(a), no:src) }
11882}
11883
11884/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11885/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11886///
11887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_ph)
11888#[inline]
11889#[target_feature(enable = "avx512fp16,avx512vl")]
11890#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11892pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
11893 _mm_mask_cvtepu16_ph(src:_mm_setzero_ph(), k, a)
11894}
11895
11896/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11897/// and store the results in dst.
11898///
11899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_ph)
11900#[inline]
11901#[target_feature(enable = "avx512fp16,avx512vl")]
11902#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11903#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11904pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
11905 unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
11906}
11907
11908/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11909/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11910/// mask bit is not set).
11911///
11912/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_ph)
11913#[inline]
11914#[target_feature(enable = "avx512fp16,avx512vl")]
11915#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11916#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11917pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
11918 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu16_ph(a), no:src) }
11919}
11920
11921/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11922/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11923///
11924/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_ph)
11925#[inline]
11926#[target_feature(enable = "avx512fp16,avx512vl")]
11927#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11928#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11929pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
11930 _mm256_mask_cvtepu16_ph(src:_mm256_setzero_ph(), k, a)
11931}
11932
11933/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11934/// and store the results in dst.
11935///
11936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_ph)
11937#[inline]
11938#[target_feature(enable = "avx512fp16")]
11939#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11940#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11941pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
11942 unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
11943}
11944
11945/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11946/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11947/// mask bit is not set).
11948///
11949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_ph)
11950#[inline]
11951#[target_feature(enable = "avx512fp16")]
11952#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11954pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
11955 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu16_ph(a), no:src) }
11956}
11957
11958/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11959/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11960///
11961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_ph)
11962#[inline]
11963#[target_feature(enable = "avx512fp16")]
11964#[cfg_attr(test, assert_instr(vcvtuw2ph))]
11965#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11966pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
11967 _mm512_mask_cvtepu16_ph(src:_mm512_setzero_ph(), k, a)
11968}
11969
11970/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11971/// and store the results in dst.
11972///
11973/// Rounding is done according to the rounding parameter, which can be one of:
11974///
11975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
11976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
11977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
11978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
11979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
11980///
11981/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu16_ph)
11982#[inline]
11983#[target_feature(enable = "avx512fp16")]
11984#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
11985#[rustc_legacy_const_generics(1)]
11986#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
11987pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
11988 unsafe {
11989 static_assert_rounding!(ROUNDING);
11990 vcvtuw2ph_512(a.as_u16x32(), ROUNDING)
11991 }
11992}
11993
11994/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
11995/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
11996/// mask bit is not set).
11997///
11998/// Rounding is done according to the rounding parameter, which can be one of:
11999///
12000/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12001/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12002/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12003/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12004/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12005///
12006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu16_ph)
12007#[inline]
12008#[target_feature(enable = "avx512fp16")]
12009#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
12010#[rustc_legacy_const_generics(3)]
12011#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12012pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
12013 src: __m512h,
12014 k: __mmask32,
12015 a: __m512i,
12016) -> __m512h {
12017 unsafe {
12018 static_assert_rounding!(ROUNDING);
12019 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu16_ph::<ROUNDING>(a), no:src)
12020 }
12021}
12022
12023/// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements,
12024/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12025///
12026/// Rounding is done according to the rounding parameter, which can be one of:
12027///
12028/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12029/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12030/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12031/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12032/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12033///
12034/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu16_ph)
12035#[inline]
12036#[target_feature(enable = "avx512fp16")]
12037#[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
12038#[rustc_legacy_const_generics(2)]
12039#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12040pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
12041 static_assert_rounding!(ROUNDING);
12042 _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(src:_mm512_setzero_ph(), k, a)
12043}
12044
12045/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12046/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12047///
12048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ph)
12049#[inline]
12050#[target_feature(enable = "avx512fp16,avx512vl")]
12051#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12052#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12053pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
12054 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k:0xff, a)
12055}
12056
12057/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12058/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12059/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12060///
12061/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi32_ph)
12062#[inline]
12063#[target_feature(enable = "avx512fp16,avx512vl")]
12064#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12065#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12066pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12067 unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
12068}
12069
12070/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12071/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12072/// The upper 64 bits of dst are zeroed out.
12073///
12074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi32_ph)
12075#[inline]
12076#[target_feature(enable = "avx512fp16,avx512vl")]
12077#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12078#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12079pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
12080 _mm_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12081}
12082
12083/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12084/// and store the results in dst.
12085///
12086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ph)
12087#[inline]
12088#[target_feature(enable = "avx512fp16,avx512vl")]
12089#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12091pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
12092 unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
12093}
12094
12095/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12096/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12097/// mask bit is not set).
12098///
12099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi32_ph)
12100#[inline]
12101#[target_feature(enable = "avx512fp16,avx512vl")]
12102#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12103#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12104pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12105 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepi32_ph(a), no:src) }
12106}
12107
12108/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12109/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12110///
12111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi32_ph)
12112#[inline]
12113#[target_feature(enable = "avx512fp16,avx512vl")]
12114#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12116pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
12117 _mm256_mask_cvtepi32_ph(src:_mm_setzero_ph(), k, a)
12118}
12119
12120/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12121/// and store the results in dst.
12122///
12123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ph)
12124#[inline]
12125#[target_feature(enable = "avx512fp16")]
12126#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12127#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12128pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
12129 unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
12130}
12131
12132/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12133/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12134/// mask bit is not set).
12135///
12136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ph)
12137#[inline]
12138#[target_feature(enable = "avx512fp16")]
12139#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12141pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12142 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi32_ph(a), no:src) }
12143}
12144
12145/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12146/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12147///
12148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ph)
12149#[inline]
12150#[target_feature(enable = "avx512fp16")]
12151#[cfg_attr(test, assert_instr(vcvtdq2ph))]
12152#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12153pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
12154 _mm512_mask_cvtepi32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12155}
12156
12157/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12158/// and store the results in dst.
12159///
12160/// Rounding is done according to the rounding parameter, which can be one of:
12161///
12162/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12163/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12164/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12165/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12166/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12167///
12168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ph)
12169#[inline]
12170#[target_feature(enable = "avx512fp16")]
12171#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12172#[rustc_legacy_const_generics(1)]
12173#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12174pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12175 unsafe {
12176 static_assert_rounding!(ROUNDING);
12177 vcvtdq2ph_512(a.as_i32x16(), ROUNDING)
12178 }
12179}
12180
12181/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12182/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12183/// mask bit is not set).
12184///
12185/// Rounding is done according to the rounding parameter, which can be one of:
12186///
12187/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12188/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12189/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12190/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12191/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12192///
12193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ph)
12194#[inline]
12195#[target_feature(enable = "avx512fp16")]
12196#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12197#[rustc_legacy_const_generics(3)]
12198#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12199pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
12200 src: __m256h,
12201 k: __mmask16,
12202 a: __m512i,
12203) -> __m256h {
12204 unsafe {
12205 static_assert_rounding!(ROUNDING);
12206 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi32_ph::<ROUNDING>(a), no:src)
12207 }
12208}
12209
12210/// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12211/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12212///
12213/// Rounding is done according to the rounding parameter, which can be one of:
12214///
12215/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12216/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12217/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12218/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12219/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12220///
12221/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ph)
12222#[inline]
12223#[target_feature(enable = "avx512fp16")]
12224#[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
12225#[rustc_legacy_const_generics(2)]
12226#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12227pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12228 static_assert_rounding!(ROUNDING);
12229 _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12230}
12231
12232/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12233/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12234/// of dst.
12235///
12236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sh)
12237#[inline]
12238#[target_feature(enable = "avx512fp16")]
12239#[cfg_attr(test, assert_instr(vcvtsi2sh))]
12240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12241pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
12242 unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12243}
12244
12245/// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12246/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12247/// of dst.
12248///
12249/// Rounding is done according to the rounding parameter, which can be one of:
12250///
12251/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12252/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12253/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12254/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12256///
12257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_sh)
12258#[inline]
12259#[target_feature(enable = "avx512fp16")]
12260#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
12261#[rustc_legacy_const_generics(2)]
12262#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12263pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
12264 unsafe {
12265 static_assert_rounding!(ROUNDING);
12266 vcvtsi2sh(a, b, ROUNDING)
12267 }
12268}
12269
12270/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12271/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12272///
12273/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_ph)
12274#[inline]
12275#[target_feature(enable = "avx512fp16,avx512vl")]
12276#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12277#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12278pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
12279 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k:0xff, a)
12280}
12281
12282/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12283/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12284/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12285///
12286/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_ph)
12287#[inline]
12288#[target_feature(enable = "avx512fp16,avx512vl")]
12289#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12290#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12291pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12292 unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
12293}
12294
12295/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12296/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12297/// The upper 64 bits of dst are zeroed out.
12298///
12299/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_ph)
12300#[inline]
12301#[target_feature(enable = "avx512fp16,avx512vl")]
12302#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12303#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12304pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
12305 _mm_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12306}
12307
12308/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12309/// and store the results in dst.
12310///
12311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_ph)
12312#[inline]
12313#[target_feature(enable = "avx512fp16,avx512vl")]
12314#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12316pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
12317 unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
12318}
12319
12320/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12321/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12322/// mask bit is not set).
12323///
12324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_ph)
12325#[inline]
12326#[target_feature(enable = "avx512fp16,avx512vl")]
12327#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12328#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12329pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12330 unsafe { simd_select_bitmask(m:k, yes:_mm256_cvtepu32_ph(a), no:src) }
12331}
12332
12333/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12334/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12335///
12336/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_ph)
12337#[inline]
12338#[target_feature(enable = "avx512fp16,avx512vl")]
12339#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12340#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12341pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
12342 _mm256_mask_cvtepu32_ph(src:_mm_setzero_ph(), k, a)
12343}
12344
12345/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12346/// and store the results in dst.
12347///
12348/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ph)
12349#[inline]
12350#[target_feature(enable = "avx512fp16")]
12351#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12352#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12353pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
12354 unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
12355}
12356
12357/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12358/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12359/// mask bit is not set).
12360///
12361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ph)
12362#[inline]
12363#[target_feature(enable = "avx512fp16")]
12364#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12365#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12366pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
12367 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu32_ph(a), no:src) }
12368}
12369
12370/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12371/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12372///
12373/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ph)
12374#[inline]
12375#[target_feature(enable = "avx512fp16")]
12376#[cfg_attr(test, assert_instr(vcvtudq2ph))]
12377#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12378pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
12379 _mm512_mask_cvtepu32_ph(src:f16x16::ZERO.as_m256h(), k, a)
12380}
12381
12382/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12383/// and store the results in dst.
12384///
12385/// Rounding is done according to the rounding parameter, which can be one of:
12386///
12387/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12388/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12389/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12390/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12391/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12392///
12393/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ph)
12394#[inline]
12395#[target_feature(enable = "avx512fp16")]
12396#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12397#[rustc_legacy_const_generics(1)]
12398#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12399pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
12400 unsafe {
12401 static_assert_rounding!(ROUNDING);
12402 vcvtudq2ph_512(a.as_u32x16(), ROUNDING)
12403 }
12404}
12405
12406/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12407/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12408/// mask bit is not set).
12409///
12410/// Rounding is done according to the rounding parameter, which can be one of:
12411///
12412/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12413/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12414/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12415/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12416/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12417///
12418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ph)
12419#[inline]
12420#[target_feature(enable = "avx512fp16")]
12421#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12422#[rustc_legacy_const_generics(3)]
12423#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12424pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
12425 src: __m256h,
12426 k: __mmask16,
12427 a: __m512i,
12428) -> __m256h {
12429 unsafe {
12430 static_assert_rounding!(ROUNDING);
12431 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu32_ph::<ROUNDING>(a), no:src)
12432 }
12433}
12434
12435/// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements,
12436/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12437///
12438/// Rounding is done according to the rounding parameter, which can be one of:
12439///
12440/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12441/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12442/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12443/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12444/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12445///
12446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ph)
12447#[inline]
12448#[target_feature(enable = "avx512fp16")]
12449#[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
12450#[rustc_legacy_const_generics(2)]
12451#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12452pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
12453 static_assert_rounding!(ROUNDING);
12454 _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
12455}
12456
12457/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12458/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12459/// of dst.
12460///
12461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sh)
12462#[inline]
12463#[target_feature(enable = "avx512fp16")]
12464#[cfg_attr(test, assert_instr(vcvtusi2sh))]
12465#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12466pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
12467 unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
12468}
12469
12470/// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the
12471/// result in the lower element of dst, and copy the upper 7 packed elements from a to the upper elements
12472/// of dst.
12473///
12474/// Rounding is done according to the rounding parameter, which can be one of:
12475///
12476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12480/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12481///
12482/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_sh)
12483#[inline]
12484#[target_feature(enable = "avx512fp16")]
12485#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
12486#[rustc_legacy_const_generics(2)]
12487#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12488pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
12489 unsafe {
12490 static_assert_rounding!(ROUNDING);
12491 vcvtusi2sh(a, b, ROUNDING)
12492 }
12493}
12494
12495/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12496/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12497///
12498/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi64_ph)
12499#[inline]
12500#[target_feature(enable = "avx512fp16,avx512vl")]
12501#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12502#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12503pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
12504 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12505}
12506
12507/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12508/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12509/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12510///
12511/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi64_ph)
12512#[inline]
12513#[target_feature(enable = "avx512fp16,avx512vl")]
12514#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12515#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12516pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12517 unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
12518}
12519
12520/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12521/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12522/// The upper 96 bits of dst are zeroed out.
12523///
12524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi64_ph)
12525#[inline]
12526#[target_feature(enable = "avx512fp16,avx512vl")]
12527#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12528#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12529pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
12530 _mm_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12531}
12532
12533/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12534/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12535///
12536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi64_ph)
12537#[inline]
12538#[target_feature(enable = "avx512fp16,avx512vl")]
12539#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12540#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12541pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
12542 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k:0xff, a)
12543}
12544
12545/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12546/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12547/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12548///
12549/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi64_ph)
12550#[inline]
12551#[target_feature(enable = "avx512fp16,avx512vl")]
12552#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12553#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12554pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12555 unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
12556}
12557
12558/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12559/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12560/// The upper 64 bits of dst are zeroed out.
12561///
12562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi64_ph)
12563#[inline]
12564#[target_feature(enable = "avx512fp16,avx512vl")]
12565#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12566#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12567pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
12568 _mm256_mask_cvtepi64_ph(src:_mm_setzero_ph(), k, a)
12569}
12570
12571/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12572/// and store the results in dst.
12573///
12574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_ph)
12575#[inline]
12576#[target_feature(enable = "avx512fp16")]
12577#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12578#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12579pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
12580 unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
12581}
12582
12583/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12584/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12585/// mask bit is not set).
12586///
12587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_ph)
12588#[inline]
12589#[target_feature(enable = "avx512fp16")]
12590#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12591#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12592pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12593 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepi64_ph(a), no:src) }
12594}
12595
12596/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12597/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12598///
12599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_ph)
12600#[inline]
12601#[target_feature(enable = "avx512fp16")]
12602#[cfg_attr(test, assert_instr(vcvtqq2ph))]
12603#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12604pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
12605 _mm512_mask_cvtepi64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12606}
12607
12608/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12609/// and store the results in dst.
12610///
12611/// Rounding is done according to the rounding parameter, which can be one of:
12612///
12613/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12614/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12615/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12616/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12617/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12618///
12619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi64_ph)
12620#[inline]
12621#[target_feature(enable = "avx512fp16")]
12622#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12623#[rustc_legacy_const_generics(1)]
12624#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12625pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12626 unsafe {
12627 static_assert_rounding!(ROUNDING);
12628 vcvtqq2ph_512(a.as_i64x8(), ROUNDING)
12629 }
12630}
12631
12632/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12633/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12634/// mask bit is not set).
12635///
12636/// Rounding is done according to the rounding parameter, which can be one of:
12637///
12638/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12639/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12640/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12641/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12642/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12643///
12644/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi64_ph)
12645#[inline]
12646#[target_feature(enable = "avx512fp16")]
12647#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12648#[rustc_legacy_const_generics(3)]
12649#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12650pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
12651 src: __m128h,
12652 k: __mmask8,
12653 a: __m512i,
12654) -> __m128h {
12655 unsafe {
12656 static_assert_rounding!(ROUNDING);
12657 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepi64_ph::<ROUNDING>(a), no:src)
12658 }
12659}
12660
12661/// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12662/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12663///
12664/// Rounding is done according to the rounding parameter, which can be one of:
12665///
12666/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12667/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12668/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12669/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12670/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12671///
12672/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi64_ph)
12673#[inline]
12674#[target_feature(enable = "avx512fp16")]
12675#[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
12676#[rustc_legacy_const_generics(2)]
12677#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12678pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12679 static_assert_rounding!(ROUNDING);
12680 _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12681}
12682
12683/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12684/// and store the results in dst. The upper 96 bits of dst are zeroed out.
12685///
12686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu64_ph)
12687#[inline]
12688#[target_feature(enable = "avx512fp16,avx512vl")]
12689#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12690#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12691pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
12692 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12693}
12694
12695/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12696/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12697/// mask bit is not set). The upper 96 bits of dst are zeroed out.
12698///
12699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu64_ph)
12700#[inline]
12701#[target_feature(enable = "avx512fp16,avx512vl")]
12702#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12703#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12704pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
12705 unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
12706}
12707
12708/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12709/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12710/// The upper 96 bits of dst are zeroed out.
12711///
12712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu64_ph)
12713#[inline]
12714#[target_feature(enable = "avx512fp16,avx512vl")]
12715#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12716#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12717pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
12718 _mm_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12719}
12720
12721/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12722/// and store the results in dst. The upper 64 bits of dst are zeroed out.
12723///
12724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu64_ph)
12725#[inline]
12726#[target_feature(enable = "avx512fp16,avx512vl")]
12727#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12728#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12729pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
12730 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k:0xff, a)
12731}
12732
12733/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12734/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12735/// mask bit is not set). The upper 64 bits of dst are zeroed out.
12736///
12737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu64_ph)
12738#[inline]
12739#[target_feature(enable = "avx512fp16,avx512vl")]
12740#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12741#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12742pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
12743 unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
12744}
12745
12746/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12747/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12748/// The upper 64 bits of dst are zeroed out.
12749///
12750/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu64_ph)
12751#[inline]
12752#[target_feature(enable = "avx512fp16,avx512vl")]
12753#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12754#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12755pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
12756 _mm256_mask_cvtepu64_ph(src:_mm_setzero_ph(), k, a)
12757}
12758
12759/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12760/// and store the results in dst.
12761///
12762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu64_ph)
12763#[inline]
12764#[target_feature(enable = "avx512fp16")]
12765#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12766#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12767pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
12768 unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
12769}
12770
12771/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12772/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12773/// mask bit is not set).
12774///
12775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu64_ph)
12776#[inline]
12777#[target_feature(enable = "avx512fp16")]
12778#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12779#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12780pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
12781 unsafe { simd_select_bitmask(m:k, yes:_mm512_cvtepu64_ph(a), no:src) }
12782}
12783
12784/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12785/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12786///
12787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu64_ph)
12788#[inline]
12789#[target_feature(enable = "avx512fp16")]
12790#[cfg_attr(test, assert_instr(vcvtuqq2ph))]
12791#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12792pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
12793 _mm512_mask_cvtepu64_ph(src:f16x8::ZERO.as_m128h(), k, a)
12794}
12795
12796/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12797/// and store the results in dst.
12798///
12799/// Rounding is done according to the rounding parameter, which can be one of:
12800///
12801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12806///
12807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu64_ph)
12808#[inline]
12809#[target_feature(enable = "avx512fp16")]
12810#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12811#[rustc_legacy_const_generics(1)]
12812#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12813pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
12814 unsafe {
12815 static_assert_rounding!(ROUNDING);
12816 vcvtuqq2ph_512(a.as_u64x8(), ROUNDING)
12817 }
12818}
12819
12820/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12821/// and store the results in dst using writemask k (elements are copied from src to dst when the corresponding
12822/// mask bit is not set).
12823///
12824/// Rounding is done according to the rounding parameter, which can be one of:
12825///
12826/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12827/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12828/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12829/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12830/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12831///
12832/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu64_ph)
12833#[inline]
12834#[target_feature(enable = "avx512fp16")]
12835#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12836#[rustc_legacy_const_generics(3)]
12837#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12838pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
12839 src: __m128h,
12840 k: __mmask8,
12841 a: __m512i,
12842) -> __m128h {
12843 unsafe {
12844 static_assert_rounding!(ROUNDING);
12845 simd_select_bitmask(m:k, yes:_mm512_cvt_roundepu64_ph::<ROUNDING>(a), no:src)
12846 }
12847}
12848
12849/// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements,
12850/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12851///
12852/// Rounding is done according to the rounding parameter, which can be one of:
12853///
12854/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12855/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12856/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12857/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12858/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12859///
12860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu64_ph)
12861#[inline]
12862#[target_feature(enable = "avx512fp16")]
12863#[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
12864#[rustc_legacy_const_generics(2)]
12865#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12866pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
12867 static_assert_rounding!(ROUNDING);
12868 _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
12869}
12870
12871/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12872/// floating-point elements, and store the results in dst.
12873///
12874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxps_ph)
12875#[inline]
12876#[target_feature(enable = "avx512fp16,avx512vl")]
12877#[cfg_attr(test, assert_instr(vcvtps2phx))]
12878#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12879pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
12880 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12881}
12882
12883/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12884/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12885/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12886///
12887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxps_ph)
12888#[inline]
12889#[target_feature(enable = "avx512fp16,avx512vl")]
12890#[cfg_attr(test, assert_instr(vcvtps2phx))]
12891#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12892pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
12893 unsafe { vcvtps2phx_128(a, src, k) }
12894}
12895
12896/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12897/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12898/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
12899///
12900/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxps_ph)
12901#[inline]
12902#[target_feature(enable = "avx512fp16,avx512vl")]
12903#[cfg_attr(test, assert_instr(vcvtps2phx))]
12904#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12905pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
12906 _mm_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12907}
12908
12909/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12910/// floating-point elements, and store the results in dst.
12911///
12912/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxps_ph)
12913#[inline]
12914#[target_feature(enable = "avx512fp16,avx512vl")]
12915#[cfg_attr(test, assert_instr(vcvtps2phx))]
12916#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12917pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
12918 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k:0xff, a)
12919}
12920
12921/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12922/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12923/// when the corresponding mask bit is not set).
12924///
12925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxps_ph)
12926#[inline]
12927#[target_feature(enable = "avx512fp16,avx512vl")]
12928#[cfg_attr(test, assert_instr(vcvtps2phx))]
12929#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12930pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
12931 unsafe { vcvtps2phx_256(a, src, k) }
12932}
12933
12934/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12935/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12936/// corresponding mask bit is not set).
12937///
12938/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxps_ph)
12939#[inline]
12940#[target_feature(enable = "avx512fp16,avx512vl")]
12941#[cfg_attr(test, assert_instr(vcvtps2phx))]
12942#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12943pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
12944 _mm256_mask_cvtxps_ph(src:_mm_setzero_ph(), k, a)
12945}
12946
12947/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12948/// floating-point elements, and store the results in dst.
12949///
12950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxps_ph)
12951#[inline]
12952#[target_feature(enable = "avx512fp16")]
12953#[cfg_attr(test, assert_instr(vcvtps2phx))]
12954#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12955pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
12956 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
12957}
12958
12959/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12960/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
12961/// when the corresponding mask bit is not set).
12962///
12963/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxps_ph)
12964#[inline]
12965#[target_feature(enable = "avx512fp16")]
12966#[cfg_attr(test, assert_instr(vcvtps2phx))]
12967#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12968pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
12969 unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
12970}
12971
12972/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12973/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
12974/// corresponding mask bit is not set).
12975///
12976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxps_ph)
12977#[inline]
12978#[target_feature(enable = "avx512fp16")]
12979#[cfg_attr(test, assert_instr(vcvtps2phx))]
12980#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
12981pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
12982 _mm512_mask_cvtxps_ph(src:f16x16::ZERO.as_m256h(), k, a)
12983}
12984
12985/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
12986/// floating-point elements, and store the results in dst.
12987///
12988/// Rounding is done according to the rounding parameter, which can be one of:
12989///
12990/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
12991/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
12992/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
12993/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
12994/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
12995///
12996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundps_ph)
12997#[inline]
12998#[target_feature(enable = "avx512fp16")]
12999#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13000#[rustc_legacy_const_generics(1)]
13001#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13002pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
13003 static_assert_rounding!(ROUNDING);
13004 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k:0xffff, a)
13005}
13006
13007/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13008/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13009/// when the corresponding mask bit is not set).
13010///
13011/// Rounding is done according to the rounding parameter, which can be one of:
13012///
13013/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13014/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13015/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13016/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13017/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13018///
13019/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundps_ph)
13020#[inline]
13021#[target_feature(enable = "avx512fp16")]
13022#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13023#[rustc_legacy_const_generics(3)]
13024#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13025pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
13026 src: __m256h,
13027 k: __mmask16,
13028 a: __m512,
13029) -> __m256h {
13030 unsafe {
13031 static_assert_rounding!(ROUNDING);
13032 vcvtps2phx_512(a, src, k, ROUNDING)
13033 }
13034}
13035
13036/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit)
13037/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13038/// corresponding mask bit is not set).
13039///
13040/// Rounding is done according to the rounding parameter, which can be one of:
13041///
13042/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13043/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13044/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13045/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13046/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13047///
13048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundps_ph)
13049#[inline]
13050#[target_feature(enable = "avx512fp16")]
13051#[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
13052#[rustc_legacy_const_generics(2)]
13053#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13054pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
13055 static_assert_rounding!(ROUNDING);
13056 _mm512_mask_cvtx_roundps_ph::<ROUNDING>(src:f16x16::ZERO.as_m256h(), k, a)
13057}
13058
13059/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13060/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13061/// elements from a to the upper elements of dst.
13062///
13063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sh)
13064#[inline]
13065#[target_feature(enable = "avx512fp16")]
13066#[cfg_attr(test, assert_instr(vcvtss2sh))]
13067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13068pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
13069 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13070}
13071
13072/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13073/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13074/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13075/// upper elements of dst.
13076///
13077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sh)
13078#[inline]
13079#[target_feature(enable = "avx512fp16")]
13080#[cfg_attr(test, assert_instr(vcvtss2sh))]
13081#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13082pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13083 unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13084}
13085
13086/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13087/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13088/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13089/// elements of dst.
13090///
13091/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sh)
13092#[inline]
13093#[target_feature(enable = "avx512fp16")]
13094#[cfg_attr(test, assert_instr(vcvtss2sh))]
13095#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13096pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
13097 _mm_mask_cvtss_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13098}
13099
13100/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13101/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13102/// elements from a to the upper elements of dst.
13103///
13104/// Rounding is done according to the rounding parameter, which can be one of:
13105///
13106/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13107/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13108/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13109/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13110/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13111///
13112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sh)
13113#[inline]
13114#[target_feature(enable = "avx512fp16")]
13115#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13116#[rustc_legacy_const_generics(2)]
13117#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13118pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
13119 static_assert_rounding!(ROUNDING);
13120 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13121}
13122
13123/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13124/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13125/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13126/// upper elements of dst.
13127///
13128/// Rounding is done according to the rounding parameter, which can be one of:
13129///
13130/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13131/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13132/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13133/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13134/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13135///
13136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sh)
13137#[inline]
13138#[target_feature(enable = "avx512fp16")]
13139#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13140#[rustc_legacy_const_generics(4)]
13141#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13142pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
13143 src: __m128h,
13144 k: __mmask8,
13145 a: __m128h,
13146 b: __m128,
13147) -> __m128h {
13148 unsafe {
13149 static_assert_rounding!(ROUNDING);
13150 vcvtss2sh(a, b, src, k, ROUNDING)
13151 }
13152}
13153
13154/// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit)
13155/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13156/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13157/// elements of dst.
13158///
13159/// Rounding is done according to the rounding parameter, which can be one of:
13160///
13161/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13162/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13163/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13164/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13165/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13166///
13167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sh)
13168#[inline]
13169#[target_feature(enable = "avx512fp16")]
13170#[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
13171#[rustc_legacy_const_generics(3)]
13172#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13173pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
13174 k: __mmask8,
13175 a: __m128h,
13176 b: __m128,
13177) -> __m128h {
13178 static_assert_rounding!(ROUNDING);
13179 _mm_mask_cvt_roundss_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13180}
13181
13182/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13183/// floating-point elements, and store the results in dst. The upper 96 bits of dst are zeroed out.
13184///
13185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ph)
13186#[inline]
13187#[target_feature(enable = "avx512fp16,avx512vl")]
13188#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13189#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13190pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
13191 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13192}
13193
13194/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13195/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13196/// when the corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13197///
13198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ph)
13199#[inline]
13200#[target_feature(enable = "avx512fp16,avx512vl")]
13201#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13202#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13203pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
13204 unsafe { vcvtpd2ph_128(a, src, k) }
13205}
13206
13207/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13208/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13209/// corresponding mask bit is not set). The upper 96 bits of dst are zeroed out.
13210///
13211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ph)
13212#[inline]
13213#[target_feature(enable = "avx512fp16,avx512vl")]
13214#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13215#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13216pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
13217 _mm_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13218}
13219
13220/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13221/// floating-point elements, and store the results in dst. The upper 64 bits of dst are zeroed out.
13222///
13223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ph)
13224#[inline]
13225#[target_feature(enable = "avx512fp16,avx512vl")]
13226#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13227#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13228pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
13229 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k:0xff, a)
13230}
13231
13232/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13233/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13234/// when the corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13235///
13236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ph)
13237#[inline]
13238#[target_feature(enable = "avx512fp16,avx512vl")]
13239#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13240#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13241pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
13242 unsafe { vcvtpd2ph_256(a, src, k) }
13243}
13244
13245/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13246/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13247/// corresponding mask bit is not set). The upper 64 bits of dst are zeroed out.
13248///
13249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ph)
13250#[inline]
13251#[target_feature(enable = "avx512fp16,avx512vl")]
13252#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13254pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
13255 _mm256_mask_cvtpd_ph(src:_mm_setzero_ph(), k, a)
13256}
13257
13258/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13259/// floating-point elements, and store the results in dst.
13260///
13261/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ph)
13262#[inline]
13263#[target_feature(enable = "avx512fp16")]
13264#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13265#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13266pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
13267 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13268}
13269
13270/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13271/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13272/// when the corresponding mask bit is not set).
13273///
13274/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ph)
13275#[inline]
13276#[target_feature(enable = "avx512fp16")]
13277#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13278#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13279pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
13280 unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
13281}
13282
13283/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13284/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13285/// corresponding mask bit is not set).
13286///
13287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ph)
13288#[inline]
13289#[target_feature(enable = "avx512fp16")]
13290#[cfg_attr(test, assert_instr(vcvtpd2ph))]
13291#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13292pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
13293 _mm512_mask_cvtpd_ph(src:f16x8::ZERO.as_m128h(), k, a)
13294}
13295
13296/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13297/// floating-point elements, and store the results in dst.
13298///
13299/// Rounding is done according to the rounding parameter, which can be one of:
13300///
13301/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13302/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13303/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13304/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13305/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13306///
13307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ph)
13308#[inline]
13309#[target_feature(enable = "avx512fp16")]
13310#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13311#[rustc_legacy_const_generics(1)]
13312#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13313pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
13314 static_assert_rounding!(ROUNDING);
13315 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a)
13316}
13317
13318/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13319/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to dst
13320/// when the corresponding mask bit is not set).
13321///
13322/// Rounding is done according to the rounding parameter, which can be one of:
13323///
13324/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13325/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13326/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13327/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13329///
13330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ph)
13331#[inline]
13332#[target_feature(enable = "avx512fp16")]
13333#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13334#[rustc_legacy_const_generics(3)]
13335#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13336pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
13337 src: __m128h,
13338 k: __mmask8,
13339 a: __m512d,
13340) -> __m128h {
13341 unsafe {
13342 static_assert_rounding!(ROUNDING);
13343 vcvtpd2ph_512(a, src, k, ROUNDING)
13344 }
13345}
13346
13347/// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit)
13348/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
13349/// corresponding mask bit is not set).
13350///
13351/// Rounding is done according to the rounding parameter, which can be one of:
13352///
13353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13358///
13359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ph)
13360#[inline]
13361#[target_feature(enable = "avx512fp16")]
13362#[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
13363#[rustc_legacy_const_generics(2)]
13364#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13365pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
13366 static_assert_rounding!(ROUNDING);
13367 _mm512_mask_cvt_roundpd_ph::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a)
13368}
13369
13370/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13371/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13372/// elements from a to the upper elements of dst.
13373///
13374/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_sh)
13375#[inline]
13376#[target_feature(enable = "avx512fp16")]
13377#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13378#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13379pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
13380 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13381}
13382
13383/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13384/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13385/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13386/// upper elements of dst.
13387///
13388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_sh)
13389#[inline]
13390#[target_feature(enable = "avx512fp16")]
13391#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13393pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13394 unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
13395}
13396
13397/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13398/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13399/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13400/// elements of dst.
13401///
13402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_sh)
13403#[inline]
13404#[target_feature(enable = "avx512fp16")]
13405#[cfg_attr(test, assert_instr(vcvtsd2sh))]
13406#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13407pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
13408 _mm_mask_cvtsd_sh(src:f16x8::ZERO.as_m128h(), k, a, b)
13409}
13410
13411/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13412/// floating-point elements, store the result in the lower element of dst, and copy the upper 7 packed
13413/// elements from a to the upper elements of dst.
13414///
13415/// Rounding is done according to the rounding parameter, which can be one of:
13416///
13417/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13418/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13419/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13420/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13421/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13422///
13423/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_sh)
13424#[inline]
13425#[target_feature(enable = "avx512fp16")]
13426#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13427#[rustc_legacy_const_generics(2)]
13428#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13429pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
13430 static_assert_rounding!(ROUNDING);
13431 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k:0xff, a, b)
13432}
13433
13434/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13435/// floating-point elements, store the result in the lower element of dst using writemask k (the element
13436/// if copied from src when mask bit 0 is not set), and copy the upper 7 packed elements from a to the
13437/// upper elements of dst.
13438///
13439/// Rounding is done according to the rounding parameter, which can be one of:
13440///
13441/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13442/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13443/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13444/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13445/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13446///
13447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_sh)
13448#[inline]
13449#[target_feature(enable = "avx512fp16")]
13450#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13451#[rustc_legacy_const_generics(4)]
13452#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13453pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
13454 src: __m128h,
13455 k: __mmask8,
13456 a: __m128h,
13457 b: __m128d,
13458) -> __m128h {
13459 unsafe {
13460 static_assert_rounding!(ROUNDING);
13461 vcvtsd2sh(a, b, src, k, ROUNDING)
13462 }
13463}
13464
13465/// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit)
13466/// floating-point elements, store the result in the lower element of dst using zeromask k (the element
13467/// is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from a to the upper
13468/// elements of dst.
13469///
13470/// Rounding is done according to the rounding parameter, which can be one of:
13471///
13472/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13473/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13474/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13475/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13476/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13477///
13478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_sh)
13479#[inline]
13480#[target_feature(enable = "avx512fp16")]
13481#[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
13482#[rustc_legacy_const_generics(3)]
13483#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13484pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
13485 k: __mmask8,
13486 a: __m128h,
13487 b: __m128d,
13488) -> __m128h {
13489 static_assert_rounding!(ROUNDING);
13490 _mm_mask_cvt_roundsd_sh::<ROUNDING>(src:f16x8::ZERO.as_m128h(), k, a, b)
13491}
13492
13493/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13494/// store the results in dst.
13495///
13496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
13497#[inline]
13498#[target_feature(enable = "avx512fp16,avx512vl")]
13499#[cfg_attr(test, assert_instr(vcvtph2w))]
13500#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13501pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
13502 _mm_mask_cvtph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13503}
13504
13505/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13506/// store the results in dst using writemask k (elements are copied from src when the corresponding
13507/// mask bit is not set).
13508///
13509/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
13510#[inline]
13511#[target_feature(enable = "avx512fp16,avx512vl")]
13512#[cfg_attr(test, assert_instr(vcvtph2w))]
13513#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13514pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13515 unsafe { transmute(src:vcvtph2w_128(a, src.as_i16x8(), k)) }
13516}
13517
13518/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13519/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13520///
13521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
13522#[inline]
13523#[target_feature(enable = "avx512fp16,avx512vl")]
13524#[cfg_attr(test, assert_instr(vcvtph2w))]
13525#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13526pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13527 _mm_mask_cvtph_epi16(src:_mm_setzero_si128(), k, a)
13528}
13529
13530/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13531/// store the results in dst.
13532///
13533/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
13534#[inline]
13535#[target_feature(enable = "avx512fp16,avx512vl")]
13536#[cfg_attr(test, assert_instr(vcvtph2w))]
13537#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13538pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
13539 _mm256_mask_cvtph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13540}
13541
13542/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13543/// store the results in dst using writemask k (elements are copied from src when the corresponding
13544/// mask bit is not set).
13545///
13546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
13547#[inline]
13548#[target_feature(enable = "avx512fp16,avx512vl")]
13549#[cfg_attr(test, assert_instr(vcvtph2w))]
13550#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13551pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13552 unsafe { transmute(src:vcvtph2w_256(a, src.as_i16x16(), k)) }
13553}
13554
13555/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13556/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557///
13558/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
13559#[inline]
13560#[target_feature(enable = "avx512fp16,avx512vl")]
13561#[cfg_attr(test, assert_instr(vcvtph2w))]
13562#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13563pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13564 _mm256_mask_cvtph_epi16(src:_mm256_setzero_si256(), k, a)
13565}
13566
13567/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13568/// store the results in dst.
13569///
13570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
13571#[inline]
13572#[target_feature(enable = "avx512fp16")]
13573#[cfg_attr(test, assert_instr(vcvtph2w))]
13574#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13575pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
13576 _mm512_mask_cvtph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13577}
13578
13579/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13580/// store the results in dst using writemask k (elements are copied from src when the corresponding
13581/// mask bit is not set).
13582///
13583/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
13584#[inline]
13585#[target_feature(enable = "avx512fp16")]
13586#[cfg_attr(test, assert_instr(vcvtph2w))]
13587#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13588pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13589 unsafe {
13590 transmute(src:vcvtph2w_512(
13591 a,
13592 src.as_i16x32(),
13593 k,
13594 _MM_FROUND_CUR_DIRECTION,
13595 ))
13596 }
13597}
13598
13599/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13600/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13601///
13602/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
13603#[inline]
13604#[target_feature(enable = "avx512fp16")]
13605#[cfg_attr(test, assert_instr(vcvtph2w))]
13606#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13607pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13608 _mm512_mask_cvtph_epi16(src:_mm512_setzero_si512(), k, a)
13609}
13610
13611/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13612/// store the results in dst.
13613///
13614/// Rounding is done according to the rounding parameter, which can be one of:
13615///
13616/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13617/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13618/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13619/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13620/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13621///
13622/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
13623#[inline]
13624#[target_feature(enable = "avx512fp16")]
13625#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13626#[rustc_legacy_const_generics(1)]
13627#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13628pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
13629 static_assert_rounding!(ROUNDING);
13630 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13631}
13632
13633/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13634/// store the results in dst using writemask k (elements are copied from src when the corresponding
13635/// mask bit is not set).
13636///
13637/// Rounding is done according to the rounding parameter, which can be one of:
13638///
13639/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13640/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13641/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13642/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13643/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13644///
13645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
13646#[inline]
13647#[target_feature(enable = "avx512fp16")]
13648#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13649#[rustc_legacy_const_generics(3)]
13650#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13651pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
13652 src: __m512i,
13653 k: __mmask32,
13654 a: __m512h,
13655) -> __m512i {
13656 unsafe {
13657 static_assert_rounding!(ROUNDING);
13658 transmute(src:vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
13659 }
13660}
13661
13662/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
13663/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13664///
13665/// Rounding is done according to the rounding parameter, which can be one of:
13666///
13667/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13668/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13669/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13670/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13671/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13672///
13673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
13674#[inline]
13675#[target_feature(enable = "avx512fp16")]
13676#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
13677#[rustc_legacy_const_generics(2)]
13678#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13679pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
13680 static_assert_rounding!(ROUNDING);
13681 _mm512_mask_cvt_roundph_epi16::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
13682}
13683
13684/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13685/// and store the results in dst.
13686///
13687/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
13688#[inline]
13689#[target_feature(enable = "avx512fp16,avx512vl")]
13690#[cfg_attr(test, assert_instr(vcvtph2uw))]
13691#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13692pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
13693 _mm_mask_cvtph_epu16(src:_mm_undefined_si128(), k:0xff, a)
13694}
13695
13696/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13697/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13698/// mask bit is not set).
13699///
13700/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
13701#[inline]
13702#[target_feature(enable = "avx512fp16,avx512vl")]
13703#[cfg_attr(test, assert_instr(vcvtph2uw))]
13704#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13705pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13706 unsafe { transmute(src:vcvtph2uw_128(a, src.as_u16x8(), k)) }
13707}
13708
13709/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13710/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13711///
13712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
13713#[inline]
13714#[target_feature(enable = "avx512fp16,avx512vl")]
13715#[cfg_attr(test, assert_instr(vcvtph2uw))]
13716#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13717pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
13718 _mm_mask_cvtph_epu16(src:_mm_setzero_si128(), k, a)
13719}
13720
13721/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13722/// and store the results in dst.
13723///
13724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
13725#[inline]
13726#[target_feature(enable = "avx512fp16,avx512vl")]
13727#[cfg_attr(test, assert_instr(vcvtph2uw))]
13728#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13729pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
13730 _mm256_mask_cvtph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
13731}
13732
13733/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13734/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13735/// mask bit is not set).
13736///
13737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
13738#[inline]
13739#[target_feature(enable = "avx512fp16,avx512vl")]
13740#[cfg_attr(test, assert_instr(vcvtph2uw))]
13741#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13742pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13743 unsafe { transmute(src:vcvtph2uw_256(a, src.as_u16x16(), k)) }
13744}
13745
13746/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13747/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13748///
13749/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
13750#[inline]
13751#[target_feature(enable = "avx512fp16,avx512vl")]
13752#[cfg_attr(test, assert_instr(vcvtph2uw))]
13753#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13754pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
13755 _mm256_mask_cvtph_epu16(src:_mm256_setzero_si256(), k, a)
13756}
13757
13758/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13759/// and store the results in dst.
13760///
13761/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
13762#[inline]
13763#[target_feature(enable = "avx512fp16")]
13764#[cfg_attr(test, assert_instr(vcvtph2uw))]
13765#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13766pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
13767 _mm512_mask_cvtph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13768}
13769
13770/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13771/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13772/// mask bit is not set).
13773///
13774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
13775#[inline]
13776#[target_feature(enable = "avx512fp16")]
13777#[cfg_attr(test, assert_instr(vcvtph2uw))]
13778#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13779pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13780 unsafe {
13781 transmute(src:vcvtph2uw_512(
13782 a,
13783 src.as_u16x32(),
13784 k,
13785 _MM_FROUND_CUR_DIRECTION,
13786 ))
13787 }
13788}
13789
13790/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13791/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13792///
13793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
13794#[inline]
13795#[target_feature(enable = "avx512fp16")]
13796#[cfg_attr(test, assert_instr(vcvtph2uw))]
13797#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13798pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
13799 _mm512_mask_cvtph_epu16(src:_mm512_setzero_si512(), k, a)
13800}
13801
13802/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13803/// and store the results in dst.
13804///
13805/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13806///
13807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
13808#[inline]
13809#[target_feature(enable = "avx512fp16")]
13810#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13811#[rustc_legacy_const_generics(1)]
13812#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13813pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
13814 static_assert_sae!(SAE);
13815 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13816}
13817
13818/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13819/// and store the results in dst using writemask k (elements are copied from src when the corresponding
13820/// mask bit is not set).
13821///
13822/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13823///
13824/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
13825#[inline]
13826#[target_feature(enable = "avx512fp16")]
13827#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13828#[rustc_legacy_const_generics(3)]
13829#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13830pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
13831 src: __m512i,
13832 k: __mmask32,
13833 a: __m512h,
13834) -> __m512i {
13835 unsafe {
13836 static_assert_sae!(SAE);
13837 transmute(src:vcvtph2uw_512(a, src.as_u16x32(), k, SAE))
13838 }
13839}
13840
13841/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
13842/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13843///
13844/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
13845///
13846/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
13847#[inline]
13848#[target_feature(enable = "avx512fp16")]
13849#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
13850#[rustc_legacy_const_generics(2)]
13851#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13852pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
13853 static_assert_sae!(SAE);
13854 _mm512_mask_cvt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
13855}
13856
13857/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13858/// truncation, and store the results in dst.
13859///
13860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
13861#[inline]
13862#[target_feature(enable = "avx512fp16,avx512vl")]
13863#[cfg_attr(test, assert_instr(vcvttph2w))]
13864#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13865pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
13866 _mm_mask_cvttph_epi16(src:_mm_undefined_si128(), k:0xff, a)
13867}
13868
13869/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13870/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13871/// mask bit is not set).
13872///
13873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
13874#[inline]
13875#[target_feature(enable = "avx512fp16,avx512vl")]
13876#[cfg_attr(test, assert_instr(vcvttph2w))]
13877#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13878pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
13879 unsafe { transmute(src:vcvttph2w_128(a, src.as_i16x8(), k)) }
13880}
13881
13882/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13883/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13884/// mask bit is not set).
13885///
13886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
13887#[inline]
13888#[target_feature(enable = "avx512fp16,avx512vl")]
13889#[cfg_attr(test, assert_instr(vcvttph2w))]
13890#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13891pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
13892 _mm_mask_cvttph_epi16(src:_mm_setzero_si128(), k, a)
13893}
13894
13895/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13896/// truncation, and store the results in dst.
13897///
13898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
13899#[inline]
13900#[target_feature(enable = "avx512fp16,avx512vl")]
13901#[cfg_attr(test, assert_instr(vcvttph2w))]
13902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13903pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
13904 _mm256_mask_cvttph_epi16(src:_mm256_undefined_si256(), k:0xffff, a)
13905}
13906
13907/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13908/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13909/// mask bit is not set).
13910///
13911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
13912#[inline]
13913#[target_feature(enable = "avx512fp16,avx512vl")]
13914#[cfg_attr(test, assert_instr(vcvttph2w))]
13915#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13916pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
13917 unsafe { transmute(src:vcvttph2w_256(a, src.as_i16x16(), k)) }
13918}
13919
13920/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13921/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13922/// mask bit is not set).
13923///
13924/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
13925#[inline]
13926#[target_feature(enable = "avx512fp16,avx512vl")]
13927#[cfg_attr(test, assert_instr(vcvttph2w))]
13928#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13929pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
13930 _mm256_mask_cvttph_epi16(src:_mm256_setzero_si256(), k, a)
13931}
13932
13933/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13934/// truncation, and store the results in dst.
13935///
13936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
13937#[inline]
13938#[target_feature(enable = "avx512fp16")]
13939#[cfg_attr(test, assert_instr(vcvttph2w))]
13940#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13941pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
13942 _mm512_mask_cvttph_epi16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13943}
13944
13945/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13946/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13947/// mask bit is not set).
13948///
13949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
13950#[inline]
13951#[target_feature(enable = "avx512fp16")]
13952#[cfg_attr(test, assert_instr(vcvttph2w))]
13953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13954pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
13955 unsafe {
13956 transmute(src:vcvttph2w_512(
13957 a,
13958 src.as_i16x32(),
13959 k,
13960 _MM_FROUND_CUR_DIRECTION,
13961 ))
13962 }
13963}
13964
13965/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13966/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
13967/// mask bit is not set).
13968///
13969/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
13970#[inline]
13971#[target_feature(enable = "avx512fp16")]
13972#[cfg_attr(test, assert_instr(vcvttph2w))]
13973#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13974pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
13975 _mm512_mask_cvttph_epi16(src:_mm512_setzero_si512(), k, a)
13976}
13977
13978/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13979/// truncation, and store the results in dst.
13980///
13981/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13982///
13983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
13984#[inline]
13985#[target_feature(enable = "avx512fp16")]
13986#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
13987#[rustc_legacy_const_generics(1)]
13988#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
13989pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
13990 static_assert_sae!(SAE);
13991 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
13992}
13993
13994/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
13995/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
13996/// mask bit is not set).
13997///
13998/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13999///
14000/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
14001#[inline]
14002#[target_feature(enable = "avx512fp16")]
14003#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
14004#[rustc_legacy_const_generics(3)]
14005#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14006pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
14007 src: __m512i,
14008 k: __mmask32,
14009 a: __m512h,
14010) -> __m512i {
14011 unsafe {
14012 static_assert_sae!(SAE);
14013 transmute(src:vcvttph2w_512(a, src.as_i16x32(), k, SAE))
14014 }
14015}
14016
14017/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
14018/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14019/// mask bit is not set).
14020///
14021/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14022///
14023/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
14024#[inline]
14025#[target_feature(enable = "avx512fp16")]
14026#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
14027#[rustc_legacy_const_generics(2)]
14028#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14029pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14030 static_assert_sae!(SAE);
14031 _mm512_mask_cvtt_roundph_epi16::<SAE>(src:_mm512_setzero_si512(), k, a)
14032}
14033
14034/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14035/// truncation, and store the results in dst.
14036///
14037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
14038#[inline]
14039#[target_feature(enable = "avx512fp16,avx512vl")]
14040#[cfg_attr(test, assert_instr(vcvttph2uw))]
14041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14042pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
14043 _mm_mask_cvttph_epu16(src:_mm_undefined_si128(), k:0xff, a)
14044}
14045
14046/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14047/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14048/// mask bit is not set).
14049///
14050/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
14051#[inline]
14052#[target_feature(enable = "avx512fp16,avx512vl")]
14053#[cfg_attr(test, assert_instr(vcvttph2uw))]
14054#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14055pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14056 unsafe { transmute(src:vcvttph2uw_128(a, src.as_u16x8(), k)) }
14057}
14058
14059/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14060/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14061/// mask bit is not set).
14062///
14063/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
14064#[inline]
14065#[target_feature(enable = "avx512fp16,avx512vl")]
14066#[cfg_attr(test, assert_instr(vcvttph2uw))]
14067#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14068pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
14069 _mm_mask_cvttph_epu16(src:_mm_setzero_si128(), k, a)
14070}
14071
14072/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14073/// truncation, and store the results in dst.
14074///
14075/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
14076#[inline]
14077#[target_feature(enable = "avx512fp16,avx512vl")]
14078#[cfg_attr(test, assert_instr(vcvttph2uw))]
14079#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14080pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
14081 _mm256_mask_cvttph_epu16(src:_mm256_undefined_si256(), k:0xffff, a)
14082}
14083
14084/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14085/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14086/// mask bit is not set).
14087///
14088/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
14089#[inline]
14090#[target_feature(enable = "avx512fp16,avx512vl")]
14091#[cfg_attr(test, assert_instr(vcvttph2uw))]
14092#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14093pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
14094 unsafe { transmute(src:vcvttph2uw_256(a, src.as_u16x16(), k)) }
14095}
14096
14097/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14098/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14099/// mask bit is not set).
14100///
14101/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
14102#[inline]
14103#[target_feature(enable = "avx512fp16,avx512vl")]
14104#[cfg_attr(test, assert_instr(vcvttph2uw))]
14105#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14106pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
14107 _mm256_mask_cvttph_epu16(src:_mm256_setzero_si256(), k, a)
14108}
14109
14110/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14111/// truncation, and store the results in dst.
14112///
14113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
14114#[inline]
14115#[target_feature(enable = "avx512fp16")]
14116#[cfg_attr(test, assert_instr(vcvttph2uw))]
14117#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14118pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
14119 _mm512_mask_cvttph_epu16(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14120}
14121
14122/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14123/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14124/// mask bit is not set).
14125///
14126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
14127#[inline]
14128#[target_feature(enable = "avx512fp16")]
14129#[cfg_attr(test, assert_instr(vcvttph2uw))]
14130#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14131pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
14132 unsafe {
14133 transmute(src:vcvttph2uw_512(
14134 a,
14135 src.as_u16x32(),
14136 k,
14137 _MM_FROUND_CUR_DIRECTION,
14138 ))
14139 }
14140}
14141
14142/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14143/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14144/// mask bit is not set).
14145///
14146/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
14147#[inline]
14148#[target_feature(enable = "avx512fp16")]
14149#[cfg_attr(test, assert_instr(vcvttph2uw))]
14150#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14151pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
14152 _mm512_mask_cvttph_epu16(src:_mm512_setzero_si512(), k, a)
14153}
14154
14155/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14156/// truncation, and store the results in dst.
14157///
14158/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14159///
14160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
14161#[inline]
14162#[target_feature(enable = "avx512fp16")]
14163#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14164#[rustc_legacy_const_generics(1)]
14165#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14166pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
14167 static_assert_sae!(SAE);
14168 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_undefined_epi32(), k:0xffffffff, a)
14169}
14170
14171/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14172/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
14173/// mask bit is not set).
14174///
14175/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14176///
14177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
14178#[inline]
14179#[target_feature(enable = "avx512fp16")]
14180#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14181#[rustc_legacy_const_generics(3)]
14182#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14183pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
14184 src: __m512i,
14185 k: __mmask32,
14186 a: __m512h,
14187) -> __m512i {
14188 unsafe {
14189 static_assert_sae!(SAE);
14190 transmute(src:vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
14191 }
14192}
14193
14194/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
14195/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
14196/// mask bit is not set).
14197///
14198/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14199///
14200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
14201#[inline]
14202#[target_feature(enable = "avx512fp16")]
14203#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
14204#[rustc_legacy_const_generics(2)]
14205#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14206pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
14207 static_assert_sae!(SAE);
14208 _mm512_mask_cvtt_roundph_epu16::<SAE>(src:_mm512_setzero_si512(), k, a)
14209}
14210
14211/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14212/// results in dst.
14213///
14214/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
14215#[inline]
14216#[target_feature(enable = "avx512fp16,avx512vl")]
14217#[cfg_attr(test, assert_instr(vcvtph2dq))]
14218#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14219pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
14220 _mm_mask_cvtph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14221}
14222
14223/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14224/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14225///
14226/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
14227#[inline]
14228#[target_feature(enable = "avx512fp16,avx512vl")]
14229#[cfg_attr(test, assert_instr(vcvtph2dq))]
14230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14231pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14232 unsafe { transmute(src:vcvtph2dq_128(a, src.as_i32x4(), k)) }
14233}
14234
14235/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14236/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
14239#[inline]
14240#[target_feature(enable = "avx512fp16,avx512vl")]
14241#[cfg_attr(test, assert_instr(vcvtph2dq))]
14242#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14243pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14244 _mm_mask_cvtph_epi32(src:_mm_setzero_si128(), k, a)
14245}
14246
14247/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14248/// results in dst.
14249///
14250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
14251#[inline]
14252#[target_feature(enable = "avx512fp16,avx512vl")]
14253#[cfg_attr(test, assert_instr(vcvtph2dq))]
14254#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14255pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
14256 _mm256_mask_cvtph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14257}
14258
14259/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14260/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14261///
14262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
14263#[inline]
14264#[target_feature(enable = "avx512fp16,avx512vl")]
14265#[cfg_attr(test, assert_instr(vcvtph2dq))]
14266#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14267pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14268 unsafe { transmute(src:vcvtph2dq_256(a, src.as_i32x8(), k)) }
14269}
14270
14271/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14272/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14273///
14274/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
14275#[inline]
14276#[target_feature(enable = "avx512fp16,avx512vl")]
14277#[cfg_attr(test, assert_instr(vcvtph2dq))]
14278#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14279pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14280 _mm256_mask_cvtph_epi32(src:_mm256_setzero_si256(), k, a)
14281}
14282
14283/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14284/// results in dst.
14285///
14286/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
14287#[inline]
14288#[target_feature(enable = "avx512fp16")]
14289#[cfg_attr(test, assert_instr(vcvtph2dq))]
14290#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14291pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
14292 _mm512_mask_cvtph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14293}
14294
14295/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14296/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14297///
14298/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
14299#[inline]
14300#[target_feature(enable = "avx512fp16")]
14301#[cfg_attr(test, assert_instr(vcvtph2dq))]
14302#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14303pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14304 unsafe {
14305 transmute(src:vcvtph2dq_512(
14306 a,
14307 src.as_i32x16(),
14308 k,
14309 _MM_FROUND_CUR_DIRECTION,
14310 ))
14311 }
14312}
14313
14314/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14315/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14316///
14317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
14318#[inline]
14319#[target_feature(enable = "avx512fp16")]
14320#[cfg_attr(test, assert_instr(vcvtph2dq))]
14321#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14322pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14323 _mm512_mask_cvtph_epi32(src:_mm512_setzero_si512(), k, a)
14324}
14325
14326/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14327/// results in dst.
14328///
14329/// Rounding is done according to the rounding parameter, which can be one of:
14330///
14331/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14332/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14333/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14334/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14336///
14337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
14338#[inline]
14339#[target_feature(enable = "avx512fp16")]
14340#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14341#[rustc_legacy_const_generics(1)]
14342#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14343pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14344 static_assert_rounding!(ROUNDING);
14345 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14346}
14347
14348/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14349/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14350///
14351/// Rounding is done according to the rounding parameter, which can be one of:
14352///
14353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14358///
14359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
14360#[inline]
14361#[target_feature(enable = "avx512fp16")]
14362#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14363#[rustc_legacy_const_generics(3)]
14364#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14365pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
14366 src: __m512i,
14367 k: __mmask16,
14368 a: __m256h,
14369) -> __m512i {
14370 unsafe {
14371 static_assert_rounding!(ROUNDING);
14372 transmute(src:vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
14373 }
14374}
14375
14376/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14377/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14378///
14379/// Rounding is done according to the rounding parameter, which can be one of:
14380///
14381/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14382/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14383/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14384/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14386///
14387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
14388#[inline]
14389#[target_feature(enable = "avx512fp16")]
14390#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
14391#[rustc_legacy_const_generics(2)]
14392#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14393pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14394 static_assert_rounding!(ROUNDING);
14395 _mm512_mask_cvt_roundph_epi32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14396}
14397
14398/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14399/// the result in dst.
14400///
14401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
14402#[inline]
14403#[target_feature(enable = "avx512fp16")]
14404#[cfg_attr(test, assert_instr(vcvtsh2si))]
14405#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14406pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
14407 unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14408}
14409
14410/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
14411/// the result in dst.
14412///
14413/// Rounding is done according to the rounding parameter, which can be one of:
14414///
14415/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14416/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14417/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14418/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14419/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14420///
14421/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
14422#[inline]
14423#[target_feature(enable = "avx512fp16")]
14424#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
14425#[rustc_legacy_const_generics(1)]
14426#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14427pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
14428 unsafe {
14429 static_assert_rounding!(ROUNDING);
14430 vcvtsh2si32(a, ROUNDING)
14431 }
14432}
14433
14434/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
14435/// results in dst.
14436///
14437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
14438#[inline]
14439#[target_feature(enable = "avx512fp16,avx512vl")]
14440#[cfg_attr(test, assert_instr(vcvtph2udq))]
14441#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14442pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
14443 _mm_mask_cvtph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14444}
14445
14446/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14447/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14448///
14449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
14450#[inline]
14451#[target_feature(enable = "avx512fp16,avx512vl")]
14452#[cfg_attr(test, assert_instr(vcvtph2udq))]
14453#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14454pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14455 unsafe { transmute(src:vcvtph2udq_128(a, src.as_u32x4(), k)) }
14456}
14457
14458/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14459/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14460///
14461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
14462#[inline]
14463#[target_feature(enable = "avx512fp16,avx512vl")]
14464#[cfg_attr(test, assert_instr(vcvtph2udq))]
14465#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14466pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14467 _mm_mask_cvtph_epu32(src:_mm_setzero_si128(), k, a)
14468}
14469
14470/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14471/// the results in dst.
14472///
14473/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
14474#[inline]
14475#[target_feature(enable = "avx512fp16,avx512vl")]
14476#[cfg_attr(test, assert_instr(vcvtph2udq))]
14477#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14478pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
14479 _mm256_mask_cvtph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14480}
14481
14482/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14483/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14484///
14485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
14486#[inline]
14487#[target_feature(enable = "avx512fp16,avx512vl")]
14488#[cfg_attr(test, assert_instr(vcvtph2udq))]
14489#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14490pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14491 unsafe { transmute(src:vcvtph2udq_256(a, src.as_u32x8(), k)) }
14492}
14493
14494/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14495/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14496///
14497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
14498#[inline]
14499#[target_feature(enable = "avx512fp16,avx512vl")]
14500#[cfg_attr(test, assert_instr(vcvtph2udq))]
14501#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14502pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14503 _mm256_mask_cvtph_epu32(src:_mm256_setzero_si256(), k, a)
14504}
14505
14506/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14507/// the results in dst.
14508///
14509/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
14510#[inline]
14511#[target_feature(enable = "avx512fp16")]
14512#[cfg_attr(test, assert_instr(vcvtph2udq))]
14513#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14514pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
14515 _mm512_mask_cvtph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14516}
14517
14518/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14519/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14520///
14521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
14522#[inline]
14523#[target_feature(enable = "avx512fp16")]
14524#[cfg_attr(test, assert_instr(vcvtph2udq))]
14525#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14526pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14527 unsafe {
14528 transmute(src:vcvtph2udq_512(
14529 a,
14530 src.as_u32x16(),
14531 k,
14532 _MM_FROUND_CUR_DIRECTION,
14533 ))
14534 }
14535}
14536
14537/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14538/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14539///
14540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
14541#[inline]
14542#[target_feature(enable = "avx512fp16")]
14543#[cfg_attr(test, assert_instr(vcvtph2udq))]
14544#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14545pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14546 _mm512_mask_cvtph_epu32(src:_mm512_setzero_si512(), k, a)
14547}
14548
14549/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14550/// the results in dst.
14551///
14552/// Rounding is done according to the rounding parameter, which can be one of:
14553///
14554/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14555/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14556/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14557/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14558/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14559///
14560/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
14561#[inline]
14562#[target_feature(enable = "avx512fp16")]
14563#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14564#[rustc_legacy_const_generics(1)]
14565#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14566pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
14567 static_assert_rounding!(ROUNDING);
14568 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xffff, a)
14569}
14570
14571/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14572/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14573///
14574/// Rounding is done according to the rounding parameter, which can be one of:
14575///
14576/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14577/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14578/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14579/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14580/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14581///
14582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
14583#[inline]
14584#[target_feature(enable = "avx512fp16")]
14585#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14586#[rustc_legacy_const_generics(3)]
14587#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14588pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
14589 src: __m512i,
14590 k: __mmask16,
14591 a: __m256h,
14592) -> __m512i {
14593 unsafe {
14594 static_assert_rounding!(ROUNDING);
14595 transmute(src:vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
14596 }
14597}
14598
14599/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
14600/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14601///
14602/// Rounding is done according to the rounding parameter, which can be one of:
14603///
14604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14609///
14610/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
14611#[inline]
14612#[target_feature(enable = "avx512fp16")]
14613#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
14614#[rustc_legacy_const_generics(2)]
14615#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14616pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
14617 static_assert_rounding!(ROUNDING);
14618 _mm512_mask_cvt_roundph_epu32::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
14619}
14620
14621/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14622/// the result in dst.
14623///
14624/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
14625#[inline]
14626#[target_feature(enable = "avx512fp16")]
14627#[cfg_attr(test, assert_instr(vcvtsh2usi))]
14628#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14629pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
14630 unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
14631}
14632
14633/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
14634/// the result in dst.
14635///
14636/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
14637///
14638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
14639#[inline]
14640#[target_feature(enable = "avx512fp16")]
14641#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))]
14642#[rustc_legacy_const_generics(1)]
14643#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14644pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
14645 unsafe {
14646 static_assert_rounding!(SAE);
14647 vcvtsh2usi32(a, SAE)
14648 }
14649}
14650
14651/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14652/// store the results in dst.
14653///
14654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
14655#[inline]
14656#[target_feature(enable = "avx512fp16,avx512vl")]
14657#[cfg_attr(test, assert_instr(vcvttph2dq))]
14658#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14659pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
14660 _mm_mask_cvttph_epi32(src:_mm_undefined_si128(), k:0xff, a)
14661}
14662
14663/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14664/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14665///
14666/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
14667#[inline]
14668#[target_feature(enable = "avx512fp16,avx512vl")]
14669#[cfg_attr(test, assert_instr(vcvttph2dq))]
14670#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14671pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14672 unsafe { transmute(src:vcvttph2dq_128(a, src.as_i32x4(), k)) }
14673}
14674
14675/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14676/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14677///
14678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
14679#[inline]
14680#[target_feature(enable = "avx512fp16,avx512vl")]
14681#[cfg_attr(test, assert_instr(vcvttph2dq))]
14682#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14683pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
14684 _mm_mask_cvttph_epi32(src:_mm_setzero_si128(), k, a)
14685}
14686
14687/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14688/// store the results in dst.
14689///
14690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
14691#[inline]
14692#[target_feature(enable = "avx512fp16,avx512vl")]
14693#[cfg_attr(test, assert_instr(vcvttph2dq))]
14694#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14695pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
14696 _mm256_mask_cvttph_epi32(src:_mm256_undefined_si256(), k:0xff, a)
14697}
14698
14699/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14700/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14701///
14702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
14703#[inline]
14704#[target_feature(enable = "avx512fp16,avx512vl")]
14705#[cfg_attr(test, assert_instr(vcvttph2dq))]
14706#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14707pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14708 unsafe { transmute(src:vcvttph2dq_256(a, src.as_i32x8(), k)) }
14709}
14710
14711/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14712/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14713///
14714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
14715#[inline]
14716#[target_feature(enable = "avx512fp16,avx512vl")]
14717#[cfg_attr(test, assert_instr(vcvttph2dq))]
14718#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14719pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
14720 _mm256_mask_cvttph_epi32(src:_mm256_setzero_si256(), k, a)
14721}
14722
14723/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14724/// store the results in dst.
14725///
14726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
14727#[inline]
14728#[target_feature(enable = "avx512fp16")]
14729#[cfg_attr(test, assert_instr(vcvttph2dq))]
14730#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14731pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
14732 _mm512_mask_cvttph_epi32(src:_mm512_undefined_epi32(), k:0xffff, a)
14733}
14734
14735/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14736/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14737///
14738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
14739#[inline]
14740#[target_feature(enable = "avx512fp16")]
14741#[cfg_attr(test, assert_instr(vcvttph2dq))]
14742#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14743pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14744 unsafe {
14745 transmute(src:vcvttph2dq_512(
14746 a,
14747 src.as_i32x16(),
14748 k,
14749 _MM_FROUND_CUR_DIRECTION,
14750 ))
14751 }
14752}
14753
14754/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14755/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14756///
14757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
14758#[inline]
14759#[target_feature(enable = "avx512fp16")]
14760#[cfg_attr(test, assert_instr(vcvttph2dq))]
14761#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14762pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
14763 _mm512_mask_cvttph_epi32(src:_mm512_setzero_si512(), k, a)
14764}
14765
14766/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14767/// store the results in dst.
14768///
14769/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14770///
14771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
14772#[inline]
14773#[target_feature(enable = "avx512fp16")]
14774#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14775#[rustc_legacy_const_generics(1)]
14776#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14777pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
14778 static_assert_sae!(SAE);
14779 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14780}
14781
14782/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14783/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14784///
14785/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14786///
14787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
14788#[inline]
14789#[target_feature(enable = "avx512fp16")]
14790#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14791#[rustc_legacy_const_generics(3)]
14792#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14793pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
14794 src: __m512i,
14795 k: __mmask16,
14796 a: __m256h,
14797) -> __m512i {
14798 unsafe {
14799 static_assert_sae!(SAE);
14800 transmute(src:vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
14801 }
14802}
14803
14804/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
14805/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806///
14807/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14808///
14809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
14810#[inline]
14811#[target_feature(enable = "avx512fp16")]
14812#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
14813#[rustc_legacy_const_generics(2)]
14814#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14815pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
14816 static_assert_sae!(SAE);
14817 _mm512_mask_cvtt_roundph_epi32::<SAE>(src:_mm512_setzero_si512(), k, a)
14818}
14819
14820/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14821/// the result in dst.
14822///
14823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
14824#[inline]
14825#[target_feature(enable = "avx512fp16")]
14826#[cfg_attr(test, assert_instr(vcvttsh2si))]
14827#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14828pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
14829 unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
14830}
14831
14832/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
14833/// the result in dst.
14834///
14835/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14836///
14837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
14838#[inline]
14839#[target_feature(enable = "avx512fp16")]
14840#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
14841#[rustc_legacy_const_generics(1)]
14842#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14843pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
14844 unsafe {
14845 static_assert_sae!(SAE);
14846 vcvttsh2si32(a, SAE)
14847 }
14848}
14849
14850/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14851/// store the results in dst.
14852///
14853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
14854#[inline]
14855#[target_feature(enable = "avx512fp16,avx512vl")]
14856#[cfg_attr(test, assert_instr(vcvttph2udq))]
14857#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14858pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
14859 _mm_mask_cvttph_epu32(src:_mm_undefined_si128(), k:0xff, a)
14860}
14861
14862/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14863/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14864///
14865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
14866#[inline]
14867#[target_feature(enable = "avx512fp16,avx512vl")]
14868#[cfg_attr(test, assert_instr(vcvttph2udq))]
14869#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14870pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
14871 unsafe { transmute(src:vcvttph2udq_128(a, src.as_u32x4(), k)) }
14872}
14873
14874/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14875/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14876///
14877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
14878#[inline]
14879#[target_feature(enable = "avx512fp16,avx512vl")]
14880#[cfg_attr(test, assert_instr(vcvttph2udq))]
14881#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14882pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
14883 _mm_mask_cvttph_epu32(src:_mm_setzero_si128(), k, a)
14884}
14885
14886/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14887/// store the results in dst.
14888///
14889/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
14890#[inline]
14891#[target_feature(enable = "avx512fp16,avx512vl")]
14892#[cfg_attr(test, assert_instr(vcvttph2udq))]
14893#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14894pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
14895 _mm256_mask_cvttph_epu32(src:_mm256_undefined_si256(), k:0xff, a)
14896}
14897
14898/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14899/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14900///
14901/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
14902#[inline]
14903#[target_feature(enable = "avx512fp16,avx512vl")]
14904#[cfg_attr(test, assert_instr(vcvttph2udq))]
14905#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14906pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
14907 unsafe { transmute(src:vcvttph2udq_256(a, src.as_u32x8(), k)) }
14908}
14909
14910/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14911/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14912///
14913/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
14914#[inline]
14915#[target_feature(enable = "avx512fp16,avx512vl")]
14916#[cfg_attr(test, assert_instr(vcvttph2udq))]
14917#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14918pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
14919 _mm256_mask_cvttph_epu32(src:_mm256_setzero_si256(), k, a)
14920}
14921
14922/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14923/// store the results in dst.
14924///
14925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
14926#[inline]
14927#[target_feature(enable = "avx512fp16")]
14928#[cfg_attr(test, assert_instr(vcvttph2udq))]
14929#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14930pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
14931 _mm512_mask_cvttph_epu32(src:_mm512_undefined_epi32(), k:0xffff, a)
14932}
14933
14934/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14935/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14936///
14937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
14938#[inline]
14939#[target_feature(enable = "avx512fp16")]
14940#[cfg_attr(test, assert_instr(vcvttph2udq))]
14941#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14942pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
14943 unsafe {
14944 transmute(src:vcvttph2udq_512(
14945 a,
14946 src.as_u32x16(),
14947 k,
14948 _MM_FROUND_CUR_DIRECTION,
14949 ))
14950 }
14951}
14952
14953/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14954/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14955///
14956/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
14957#[inline]
14958#[target_feature(enable = "avx512fp16")]
14959#[cfg_attr(test, assert_instr(vcvttph2udq))]
14960#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14961pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
14962 _mm512_mask_cvttph_epu32(src:_mm512_setzero_si512(), k, a)
14963}
14964
14965/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14966/// store the results in dst.
14967///
14968/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14969///
14970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
14971#[inline]
14972#[target_feature(enable = "avx512fp16")]
14973#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14974#[rustc_legacy_const_generics(1)]
14975#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14976pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
14977 static_assert_sae!(SAE);
14978 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_undefined_epi32(), k:0xffff, a)
14979}
14980
14981/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
14982/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14983///
14984/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
14985///
14986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
14987#[inline]
14988#[target_feature(enable = "avx512fp16")]
14989#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
14990#[rustc_legacy_const_generics(3)]
14991#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14992pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
14993 src: __m512i,
14994 k: __mmask16,
14995 a: __m256h,
14996) -> __m512i {
14997 unsafe {
14998 static_assert_sae!(SAE);
14999 transmute(src:vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
15000 }
15001}
15002
15003/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
15004/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15005///
15006/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15007///
15008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
15009#[inline]
15010#[target_feature(enable = "avx512fp16")]
15011#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
15012#[rustc_legacy_const_generics(2)]
15013#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15014pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
15015 static_assert_sae!(SAE);
15016 _mm512_mask_cvtt_roundph_epu32::<SAE>(src:_mm512_setzero_si512(), k, a)
15017}
15018
15019/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
15020/// the result in dst.
15021///
15022/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
15023#[inline]
15024#[target_feature(enable = "avx512fp16")]
15025#[cfg_attr(test, assert_instr(vcvttsh2usi))]
15026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15027pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
15028 unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
15029}
15030
15031/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
15032/// the result in dst.
15033///
15034/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
15035///
15036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
15037#[inline]
15038#[target_feature(enable = "avx512fp16")]
15039#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
15040#[rustc_legacy_const_generics(1)]
15041#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15042pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
15043 unsafe {
15044 static_assert_sae!(SAE);
15045 vcvttsh2usi32(a, SAE)
15046 }
15047}
15048
15049/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15050/// store the results in dst.
15051///
15052/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
15053#[inline]
15054#[target_feature(enable = "avx512fp16,avx512vl")]
15055#[cfg_attr(test, assert_instr(vcvtph2qq))]
15056#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15057pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
15058 _mm_mask_cvtph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15059}
15060
15061/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15062/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15063///
15064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
15065#[inline]
15066#[target_feature(enable = "avx512fp16,avx512vl")]
15067#[cfg_attr(test, assert_instr(vcvtph2qq))]
15068#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15069pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15070 unsafe { transmute(src:vcvtph2qq_128(a, src.as_i64x2(), k)) }
15071}
15072
15073/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15074/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15075///
15076/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
15077#[inline]
15078#[target_feature(enable = "avx512fp16,avx512vl")]
15079#[cfg_attr(test, assert_instr(vcvtph2qq))]
15080#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15081pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15082 _mm_mask_cvtph_epi64(src:_mm_setzero_si128(), k, a)
15083}
15084
15085/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15086/// store the results in dst.
15087///
15088/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
15089#[inline]
15090#[target_feature(enable = "avx512fp16,avx512vl")]
15091#[cfg_attr(test, assert_instr(vcvtph2qq))]
15092#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15093pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
15094 _mm256_mask_cvtph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15095}
15096
15097/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15098/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15099///
15100/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
15101#[inline]
15102#[target_feature(enable = "avx512fp16,avx512vl")]
15103#[cfg_attr(test, assert_instr(vcvtph2qq))]
15104#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15105pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15106 unsafe { transmute(src:vcvtph2qq_256(a, src.as_i64x4(), k)) }
15107}
15108
15109/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15110/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15111///
15112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
15113#[inline]
15114#[target_feature(enable = "avx512fp16,avx512vl")]
15115#[cfg_attr(test, assert_instr(vcvtph2qq))]
15116#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15117pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15118 _mm256_mask_cvtph_epi64(src:_mm256_setzero_si256(), k, a)
15119}
15120
15121/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15122/// store the results in dst.
15123///
15124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
15125#[inline]
15126#[target_feature(enable = "avx512fp16")]
15127#[cfg_attr(test, assert_instr(vcvtph2qq))]
15128#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15129pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
15130 _mm512_mask_cvtph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15131}
15132
15133/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15134/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15135///
15136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
15137#[inline]
15138#[target_feature(enable = "avx512fp16")]
15139#[cfg_attr(test, assert_instr(vcvtph2qq))]
15140#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15141pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15142 unsafe {
15143 transmute(src:vcvtph2qq_512(
15144 a,
15145 src.as_i64x8(),
15146 k,
15147 _MM_FROUND_CUR_DIRECTION,
15148 ))
15149 }
15150}
15151
15152/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15153/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15154///
15155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
15156#[inline]
15157#[target_feature(enable = "avx512fp16")]
15158#[cfg_attr(test, assert_instr(vcvtph2qq))]
15159#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15160pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15161 _mm512_mask_cvtph_epi64(src:_mm512_setzero_si512(), k, a)
15162}
15163
15164/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15165/// store the results in dst.
15166///
15167/// Rounding is done according to the rounding parameter, which can be one of:
15168///
15169/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15170/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15171/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15172/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15173/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15174///
15175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
15176#[inline]
15177#[target_feature(enable = "avx512fp16")]
15178#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15179#[rustc_legacy_const_generics(1)]
15180#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15181pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15182 static_assert_rounding!(ROUNDING);
15183 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15184}
15185
15186/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15187/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15188///
15189/// Rounding is done according to the rounding parameter, which can be one of:
15190///
15191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196///
15197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
15198#[inline]
15199#[target_feature(enable = "avx512fp16")]
15200#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15201#[rustc_legacy_const_generics(3)]
15202#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15203pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
15204 src: __m512i,
15205 k: __mmask8,
15206 a: __m128h,
15207) -> __m512i {
15208 unsafe {
15209 static_assert_rounding!(ROUNDING);
15210 transmute(src:vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
15211 }
15212}
15213
15214/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
15215/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15216///
15217/// Rounding is done according to the rounding parameter, which can be one of:
15218///
15219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15224///
15225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
15226#[inline]
15227#[target_feature(enable = "avx512fp16")]
15228#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
15229#[rustc_legacy_const_generics(2)]
15230#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15231pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15232 static_assert_rounding!(ROUNDING);
15233 _mm512_mask_cvt_roundph_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15234}
15235
15236/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15237/// store the results in dst.
15238///
15239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
15240#[inline]
15241#[target_feature(enable = "avx512fp16,avx512vl")]
15242#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15243#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15244pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
15245 _mm_mask_cvtph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15246}
15247
15248/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15249/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15250///
15251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
15252#[inline]
15253#[target_feature(enable = "avx512fp16,avx512vl")]
15254#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15255#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15256pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15257 unsafe { transmute(src:vcvtph2uqq_128(a, src.as_u64x2(), k)) }
15258}
15259
15260/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15261/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15262///
15263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
15264#[inline]
15265#[target_feature(enable = "avx512fp16,avx512vl")]
15266#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15267#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15268pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15269 _mm_mask_cvtph_epu64(src:_mm_setzero_si128(), k, a)
15270}
15271
15272/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15273/// store the results in dst.
15274///
15275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
15276#[inline]
15277#[target_feature(enable = "avx512fp16,avx512vl")]
15278#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15279#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15280pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
15281 _mm256_mask_cvtph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15282}
15283
15284/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15285/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15286///
15287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
15288#[inline]
15289#[target_feature(enable = "avx512fp16,avx512vl")]
15290#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15291#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15292pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15293 unsafe { transmute(src:vcvtph2uqq_256(a, src.as_u64x4(), k)) }
15294}
15295
15296/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15297/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15298///
15299/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
15300#[inline]
15301#[target_feature(enable = "avx512fp16,avx512vl")]
15302#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15303#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15304pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15305 _mm256_mask_cvtph_epu64(src:_mm256_setzero_si256(), k, a)
15306}
15307
15308/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15309/// store the results in dst.
15310///
15311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
15312#[inline]
15313#[target_feature(enable = "avx512fp16")]
15314#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15315#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15316pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
15317 _mm512_mask_cvtph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15318}
15319
15320/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15321/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15322///
15323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
15324#[inline]
15325#[target_feature(enable = "avx512fp16")]
15326#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15327#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15328pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15329 unsafe {
15330 transmute(src:vcvtph2uqq_512(
15331 a,
15332 src.as_u64x8(),
15333 k,
15334 _MM_FROUND_CUR_DIRECTION,
15335 ))
15336 }
15337}
15338
15339/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15340/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15341///
15342/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
15343#[inline]
15344#[target_feature(enable = "avx512fp16")]
15345#[cfg_attr(test, assert_instr(vcvtph2uqq))]
15346#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15347pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15348 _mm512_mask_cvtph_epu64(src:_mm512_setzero_si512(), k, a)
15349}
15350
15351/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15352/// store the results in dst.
15353///
15354/// Rounding is done according to the rounding parameter, which can be one of:
15355///
15356/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15357/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15358/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15359/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15360/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15361///
15362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
15363#[inline]
15364#[target_feature(enable = "avx512fp16")]
15365#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15366#[rustc_legacy_const_generics(1)]
15367#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15368pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
15369 static_assert_rounding!(ROUNDING);
15370 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
15371}
15372
15373/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15374/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15375///
15376/// Rounding is done according to the rounding parameter, which can be one of:
15377///
15378/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15379/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15380/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15381/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15382/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15383///
15384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
15385#[inline]
15386#[target_feature(enable = "avx512fp16")]
15387#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15388#[rustc_legacy_const_generics(3)]
15389#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15390pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
15391 src: __m512i,
15392 k: __mmask8,
15393 a: __m128h,
15394) -> __m512i {
15395 unsafe {
15396 static_assert_rounding!(ROUNDING);
15397 transmute(src:vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
15398 }
15399}
15400
15401/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
15402/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15403///
15404/// Rounding is done according to the rounding parameter, which can be one of:
15405///
15406/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15407/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15408/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15409/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15411///
15412/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
15413#[inline]
15414#[target_feature(enable = "avx512fp16")]
15415#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
15416#[rustc_legacy_const_generics(2)]
15417#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15418pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
15419 static_assert_rounding!(ROUNDING);
15420 _mm512_mask_cvt_roundph_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
15421}
15422
15423/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15424/// store the results in dst.
15425///
15426/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
15427#[inline]
15428#[target_feature(enable = "avx512fp16,avx512vl")]
15429#[cfg_attr(test, assert_instr(vcvttph2qq))]
15430#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15431pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
15432 _mm_mask_cvttph_epi64(src:_mm_undefined_si128(), k:0xff, a)
15433}
15434
15435/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15436/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15437///
15438/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
15439#[inline]
15440#[target_feature(enable = "avx512fp16,avx512vl")]
15441#[cfg_attr(test, assert_instr(vcvttph2qq))]
15442#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15443pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15444 unsafe { transmute(src:vcvttph2qq_128(a, src.as_i64x2(), k)) }
15445}
15446
15447/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15448/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15449///
15450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
15451#[inline]
15452#[target_feature(enable = "avx512fp16,avx512vl")]
15453#[cfg_attr(test, assert_instr(vcvttph2qq))]
15454#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15455pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
15456 _mm_mask_cvttph_epi64(src:_mm_setzero_si128(), k, a)
15457}
15458
15459/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15460/// store the results in dst.
15461///
15462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
15463#[inline]
15464#[target_feature(enable = "avx512fp16,avx512vl")]
15465#[cfg_attr(test, assert_instr(vcvttph2qq))]
15466#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15467pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
15468 _mm256_mask_cvttph_epi64(src:_mm256_undefined_si256(), k:0xff, a)
15469}
15470
15471/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15472/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15473///
15474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
15475#[inline]
15476#[target_feature(enable = "avx512fp16,avx512vl")]
15477#[cfg_attr(test, assert_instr(vcvttph2qq))]
15478#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15479pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15480 unsafe { transmute(src:vcvttph2qq_256(a, src.as_i64x4(), k)) }
15481}
15482
15483/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15484/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15485///
15486/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
15487#[inline]
15488#[target_feature(enable = "avx512fp16,avx512vl")]
15489#[cfg_attr(test, assert_instr(vcvttph2qq))]
15490#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15491pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
15492 _mm256_mask_cvttph_epi64(src:_mm256_setzero_si256(), k, a)
15493}
15494
15495/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15496/// store the results in dst.
15497///
15498/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
15499#[inline]
15500#[target_feature(enable = "avx512fp16")]
15501#[cfg_attr(test, assert_instr(vcvttph2qq))]
15502#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15503pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
15504 _mm512_mask_cvttph_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
15505}
15506
15507/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15508/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15509///
15510/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
15511#[inline]
15512#[target_feature(enable = "avx512fp16")]
15513#[cfg_attr(test, assert_instr(vcvttph2qq))]
15514#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15515pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15516 unsafe {
15517 transmute(src:vcvttph2qq_512(
15518 a,
15519 src.as_i64x8(),
15520 k,
15521 _MM_FROUND_CUR_DIRECTION,
15522 ))
15523 }
15524}
15525
15526/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15527/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15528///
15529/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
15530#[inline]
15531#[target_feature(enable = "avx512fp16")]
15532#[cfg_attr(test, assert_instr(vcvttph2qq))]
15533#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15534pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
15535 _mm512_mask_cvttph_epi64(src:_mm512_setzero_si512(), k, a)
15536}
15537
15538/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15539/// store the results in dst.
15540///
15541/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15542///
15543/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
15544#[inline]
15545#[target_feature(enable = "avx512fp16")]
15546#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15547#[rustc_legacy_const_generics(1)]
15548#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15549pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
15550 static_assert_sae!(SAE);
15551 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15552}
15553
15554/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15555/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15556///
15557/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15558///
15559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
15560#[inline]
15561#[target_feature(enable = "avx512fp16")]
15562#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15563#[rustc_legacy_const_generics(3)]
15564#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15565pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
15566 src: __m512i,
15567 k: __mmask8,
15568 a: __m128h,
15569) -> __m512i {
15570 unsafe {
15571 static_assert_sae!(SAE);
15572 transmute(src:vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
15573 }
15574}
15575
15576/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
15577/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15578///
15579/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15580///
15581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
15582#[inline]
15583#[target_feature(enable = "avx512fp16")]
15584#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
15585#[rustc_legacy_const_generics(2)]
15586#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15587pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15588 static_assert_sae!(SAE);
15589 _mm512_mask_cvtt_roundph_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
15590}
15591
15592/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15593/// store the results in dst.
15594///
15595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
15596#[inline]
15597#[target_feature(enable = "avx512fp16,avx512vl")]
15598#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15599#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15600pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
15601 _mm_mask_cvttph_epu64(src:_mm_undefined_si128(), k:0xff, a)
15602}
15603
15604/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15605/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15606///
15607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
15608#[inline]
15609#[target_feature(enable = "avx512fp16,avx512vl")]
15610#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15611#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15612pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
15613 unsafe { transmute(src:vcvttph2uqq_128(a, src.as_u64x2(), k)) }
15614}
15615
15616/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15617/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15618///
15619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
15620#[inline]
15621#[target_feature(enable = "avx512fp16,avx512vl")]
15622#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15623#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15624pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
15625 _mm_mask_cvttph_epu64(src:_mm_setzero_si128(), k, a)
15626}
15627
15628/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15629/// store the results in dst.
15630///
15631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
15632#[inline]
15633#[target_feature(enable = "avx512fp16,avx512vl")]
15634#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15635#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15636pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
15637 _mm256_mask_cvttph_epu64(src:_mm256_undefined_si256(), k:0xff, a)
15638}
15639
15640/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15641/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15642///
15643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
15644#[inline]
15645#[target_feature(enable = "avx512fp16,avx512vl")]
15646#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15647#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15648pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
15649 unsafe { transmute(src:vcvttph2uqq_256(a, src.as_u64x4(), k)) }
15650}
15651
15652/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15653/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15654///
15655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
15656#[inline]
15657#[target_feature(enable = "avx512fp16,avx512vl")]
15658#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15659#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15660pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
15661 _mm256_mask_cvttph_epu64(src:_mm256_setzero_si256(), k, a)
15662}
15663
15664/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15665/// store the results in dst.
15666///
15667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
15668#[inline]
15669#[target_feature(enable = "avx512fp16")]
15670#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15671#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15672pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
15673 _mm512_mask_cvttph_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
15674}
15675
15676/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15677/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15678///
15679/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
15680#[inline]
15681#[target_feature(enable = "avx512fp16")]
15682#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15683#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15684pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
15685 unsafe {
15686 transmute(src:vcvttph2uqq_512(
15687 a,
15688 src.as_u64x8(),
15689 k,
15690 _MM_FROUND_CUR_DIRECTION,
15691 ))
15692 }
15693}
15694
15695/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15696/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15697///
15698/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
15699#[inline]
15700#[target_feature(enable = "avx512fp16")]
15701#[cfg_attr(test, assert_instr(vcvttph2uqq))]
15702#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15703pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
15704 _mm512_mask_cvttph_epu64(src:_mm512_setzero_si512(), k, a)
15705}
15706
15707/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15708/// store the results in dst.
15709///
15710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15711///
15712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
15713#[inline]
15714#[target_feature(enable = "avx512fp16")]
15715#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15716#[rustc_legacy_const_generics(1)]
15717#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15718pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
15719 static_assert_sae!(SAE);
15720 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
15721}
15722
15723/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15724/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15725///
15726/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15727///
15728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
15729#[inline]
15730#[target_feature(enable = "avx512fp16")]
15731#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15732#[rustc_legacy_const_generics(3)]
15733#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15734pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
15735 src: __m512i,
15736 k: __mmask8,
15737 a: __m128h,
15738) -> __m512i {
15739 unsafe {
15740 static_assert_sae!(SAE);
15741 transmute(src:vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
15742 }
15743}
15744
15745/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
15746/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15747///
15748/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15749///
15750/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
15751#[inline]
15752#[target_feature(enable = "avx512fp16")]
15753#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
15754#[rustc_legacy_const_generics(2)]
15755#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15756pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
15757 static_assert_sae!(SAE);
15758 _mm512_mask_cvtt_roundph_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
15759}
15760
15761/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15762/// floating-point elements, and store the results in dst.
15763///
15764/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
15765#[inline]
15766#[target_feature(enable = "avx512fp16,avx512vl")]
15767#[cfg_attr(test, assert_instr(vcvtph2psx))]
15768#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15769pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
15770 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k:0xff, a)
15771}
15772
15773/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15774/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15775/// dst when the corresponding mask bit is not set).
15776///
15777/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
15778#[inline]
15779#[target_feature(enable = "avx512fp16,avx512vl")]
15780#[cfg_attr(test, assert_instr(vcvtph2psx))]
15781#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15782pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
15783 unsafe { vcvtph2psx_128(a, src, k) }
15784}
15785
15786/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15787/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15788/// corresponding mask bit is not set).
15789///
15790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
15791#[inline]
15792#[target_feature(enable = "avx512fp16,avx512vl")]
15793#[cfg_attr(test, assert_instr(vcvtph2psx))]
15794#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15795pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
15796 _mm_mask_cvtxph_ps(src:_mm_setzero_ps(), k, a)
15797}
15798
15799/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15800/// floating-point elements, and store the results in dst.
15801///
15802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
15803#[inline]
15804#[target_feature(enable = "avx512fp16,avx512vl")]
15805#[cfg_attr(test, assert_instr(vcvtph2psx))]
15806#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15807pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
15808 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k:0xff, a)
15809}
15810
15811/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15812/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15813/// dst when the corresponding mask bit is not set).
15814///
15815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
15816#[inline]
15817#[target_feature(enable = "avx512fp16,avx512vl")]
15818#[cfg_attr(test, assert_instr(vcvtph2psx))]
15819#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15820pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
15821 unsafe { vcvtph2psx_256(a, src, k) }
15822}
15823
15824/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15825/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15826/// corresponding mask bit is not set).
15827///
15828/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
15829#[inline]
15830#[target_feature(enable = "avx512fp16,avx512vl")]
15831#[cfg_attr(test, assert_instr(vcvtph2psx))]
15832#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15833pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
15834 _mm256_mask_cvtxph_ps(src:_mm256_setzero_ps(), k, a)
15835}
15836
15837/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15838/// floating-point elements, and store the results in dst.
15839///
15840/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
15841#[inline]
15842#[target_feature(enable = "avx512fp16")]
15843#[cfg_attr(test, assert_instr(vcvtph2psx))]
15844#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15845pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
15846 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k:0xffff, a)
15847}
15848
15849/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15850/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15851/// dst when the corresponding mask bit is not set).
15852///
15853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
15854#[inline]
15855#[target_feature(enable = "avx512fp16")]
15856#[cfg_attr(test, assert_instr(vcvtph2psx))]
15857#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15858pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
15859 unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
15860}
15861
15862/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15863/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15864/// corresponding mask bit is not set).
15865///
15866/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
15867#[inline]
15868#[target_feature(enable = "avx512fp16")]
15869#[cfg_attr(test, assert_instr(vcvtph2psx))]
15870#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15871pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
15872 _mm512_mask_cvtxph_ps(src:_mm512_setzero_ps(), k, a)
15873}
15874
15875/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15876/// floating-point elements, and store the results in dst.
15877///
15878/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15879///
15880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
15881#[inline]
15882#[target_feature(enable = "avx512fp16")]
15883#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15884#[rustc_legacy_const_generics(1)]
15885#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15886pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
15887 static_assert_sae!(SAE);
15888 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k:0xffff, a)
15889}
15890
15891/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15892/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
15893/// dst when the corresponding mask bit is not set).
15894///
15895/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15896///
15897/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
15898#[inline]
15899#[target_feature(enable = "avx512fp16")]
15900#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15901#[rustc_legacy_const_generics(3)]
15902#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15903pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
15904 src: __m512,
15905 k: __mmask16,
15906 a: __m256h,
15907) -> __m512 {
15908 unsafe {
15909 static_assert_sae!(SAE);
15910 vcvtph2psx_512(a, src, k, SAE)
15911 }
15912}
15913
15914/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
15915/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
15916/// corresponding mask bit is not set).
15917///
15918/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15919///
15920/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
15921#[inline]
15922#[target_feature(enable = "avx512fp16")]
15923#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
15924#[rustc_legacy_const_generics(2)]
15925#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15926pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
15927 static_assert_sae!(SAE);
15928 _mm512_mask_cvtx_roundph_ps::<SAE>(src:_mm512_setzero_ps(), k, a)
15929}
15930
15931/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15932/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
15933/// elements from a to the upper elements of dst.
15934///
15935/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
15936#[inline]
15937#[target_feature(enable = "avx512fp16")]
15938#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15939#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15940pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
15941 _mm_mask_cvtsh_ss(src:a, k:0xff, a, b)
15942}
15943
15944/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15945/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15946/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15947/// upper elements of dst.
15948///
15949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
15950#[inline]
15951#[target_feature(enable = "avx512fp16")]
15952#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15953#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15954pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15955 unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
15956}
15957
15958/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15959/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
15960/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
15961/// of dst.
15962///
15963/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
15964#[inline]
15965#[target_feature(enable = "avx512fp16")]
15966#[cfg_attr(test, assert_instr(vcvtsh2ss))]
15967#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15968pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
15969 _mm_mask_cvtsh_ss(src:_mm_set_ss(0.0), k, a, b)
15970}
15971
15972/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15973/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
15974/// from a to the upper elements of dst.
15975///
15976/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15977///
15978/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
15979#[inline]
15980#[target_feature(enable = "avx512fp16")]
15981#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
15982#[rustc_legacy_const_generics(2)]
15983#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
15984pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
15985 static_assert_sae!(SAE);
15986 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_undefined_ps(), k:0xff, a, b)
15987}
15988
15989/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
15990/// floating-point element, store the result in the lower element of dst using writemask k (the element is
15991/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
15992/// upper elements of dst.
15993///
15994/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15995///
15996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
15997#[inline]
15998#[target_feature(enable = "avx512fp16")]
15999#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
16000#[rustc_legacy_const_generics(4)]
16001#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16002pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
16003 src: __m128,
16004 k: __mmask8,
16005 a: __m128,
16006 b: __m128h,
16007) -> __m128 {
16008 unsafe {
16009 static_assert_sae!(SAE);
16010 vcvtsh2ss(a, b, src, k, SAE)
16011 }
16012}
16013
16014/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
16015/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16016/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
16017/// of dst.
16018///
16019/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16020///
16021/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
16022#[inline]
16023#[target_feature(enable = "avx512fp16")]
16024#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
16025#[rustc_legacy_const_generics(3)]
16026#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16027pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
16028 static_assert_sae!(SAE);
16029 _mm_mask_cvt_roundsh_ss::<SAE>(src:_mm_set_ss(0.0), k, a, b)
16030}
16031
16032/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16033/// floating-point elements, and store the results in dst.
16034///
16035/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
16036#[inline]
16037#[target_feature(enable = "avx512fp16,avx512vl")]
16038#[cfg_attr(test, assert_instr(vcvtph2pd))]
16039#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16040pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
16041 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k:0xff, a)
16042}
16043
16044/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16045/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16046/// dst when the corresponding mask bit is not set).
16047///
16048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
16049#[inline]
16050#[target_feature(enable = "avx512fp16,avx512vl")]
16051#[cfg_attr(test, assert_instr(vcvtph2pd))]
16052#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16053pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
16054 unsafe { vcvtph2pd_128(a, src, k) }
16055}
16056
16057/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16058/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16059/// corresponding mask bit is not set).
16060///
16061/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
16062#[inline]
16063#[target_feature(enable = "avx512fp16,avx512vl")]
16064#[cfg_attr(test, assert_instr(vcvtph2pd))]
16065#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16066pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
16067 _mm_mask_cvtph_pd(src:_mm_setzero_pd(), k, a)
16068}
16069
16070/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16071/// floating-point elements, and store the results in dst.
16072///
16073/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
16074#[inline]
16075#[target_feature(enable = "avx512fp16,avx512vl")]
16076#[cfg_attr(test, assert_instr(vcvtph2pd))]
16077#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16078pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
16079 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k:0xff, a)
16080}
16081
16082/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16083/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16084/// dst when the corresponding mask bit is not set).
16085///
16086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
16087#[inline]
16088#[target_feature(enable = "avx512fp16,avx512vl")]
16089#[cfg_attr(test, assert_instr(vcvtph2pd))]
16090#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16091pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
16092 unsafe { vcvtph2pd_256(a, src, k) }
16093}
16094
16095/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16096/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16097/// corresponding mask bit is not set).
16098///
16099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
16100#[inline]
16101#[target_feature(enable = "avx512fp16,avx512vl")]
16102#[cfg_attr(test, assert_instr(vcvtph2pd))]
16103#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16104pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
16105 _mm256_mask_cvtph_pd(src:_mm256_setzero_pd(), k, a)
16106}
16107
16108/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16109/// floating-point elements, and store the results in dst.
16110///
16111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
16112#[inline]
16113#[target_feature(enable = "avx512fp16")]
16114#[cfg_attr(test, assert_instr(vcvtph2pd))]
16115#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16116pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
16117 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k:0xff, a)
16118}
16119
16120/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16121/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16122/// dst when the corresponding mask bit is not set).
16123///
16124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
16125#[inline]
16126#[target_feature(enable = "avx512fp16")]
16127#[cfg_attr(test, assert_instr(vcvtph2pd))]
16128#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16129pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
16130 unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
16131}
16132
16133/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16134/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16135/// corresponding mask bit is not set).
16136///
16137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
16138#[inline]
16139#[target_feature(enable = "avx512fp16")]
16140#[cfg_attr(test, assert_instr(vcvtph2pd))]
16141#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16142pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
16143 _mm512_mask_cvtph_pd(src:_mm512_setzero_pd(), k, a)
16144}
16145
16146/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16147/// floating-point elements, and store the results in dst.
16148///
16149/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16150///
16151/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
16152#[inline]
16153#[target_feature(enable = "avx512fp16")]
16154#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16155#[rustc_legacy_const_generics(1)]
16156#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16157pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
16158 static_assert_sae!(SAE);
16159 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k:0xff, a)
16160}
16161
16162/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16163/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
16164/// dst when the corresponding mask bit is not set).
16165///
16166/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16167///
16168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
16169#[inline]
16170#[target_feature(enable = "avx512fp16")]
16171#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16172#[rustc_legacy_const_generics(3)]
16173#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16174pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
16175 src: __m512d,
16176 k: __mmask8,
16177 a: __m128h,
16178) -> __m512d {
16179 unsafe {
16180 static_assert_sae!(SAE);
16181 vcvtph2pd_512(a, src, k, SAE)
16182 }
16183}
16184
16185/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
16186/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
16187/// corresponding mask bit is not set).
16188///
16189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16190///
16191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
16192#[inline]
16193#[target_feature(enable = "avx512fp16")]
16194#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
16195#[rustc_legacy_const_generics(2)]
16196#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16197pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
16198 static_assert_sae!(SAE);
16199 _mm512_mask_cvt_roundph_pd::<SAE>(src:_mm512_setzero_pd(), k, a)
16200}
16201
16202/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16203/// floating-point element, store the result in the lower element of dst, and copy the upper element
16204/// from a to the upper element of dst.
16205///
16206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
16207#[inline]
16208#[target_feature(enable = "avx512fp16")]
16209#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16210#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16211pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
16212 _mm_mask_cvtsh_sd(src:a, k:0xff, a, b)
16213}
16214
16215/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16216/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16217/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16218/// of dst.
16219///
16220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
16221#[inline]
16222#[target_feature(enable = "avx512fp16")]
16223#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16224#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16225pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16226 unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
16227}
16228
16229/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16230/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16231/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16232///
16233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
16234#[inline]
16235#[target_feature(enable = "avx512fp16")]
16236#[cfg_attr(test, assert_instr(vcvtsh2sd))]
16237#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16238pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16239 _mm_mask_cvtsh_sd(src:_mm_set_sd(0.0), k, a, b)
16240}
16241
16242/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16243/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
16244/// to the upper element of dst.
16245///
16246/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16247///
16248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
16249#[inline]
16250#[target_feature(enable = "avx512fp16")]
16251#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16252#[rustc_legacy_const_generics(2)]
16253#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16254pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
16255 static_assert_sae!(SAE);
16256 _mm_mask_cvt_roundsh_sd::<SAE>(src:a, k:0xff, a, b)
16257}
16258
16259/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16260/// floating-point element, store the result in the lower element of dst using writemask k (the element is
16261/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
16262/// of dst.
16263///
16264/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16265///
16266/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
16267#[inline]
16268#[target_feature(enable = "avx512fp16")]
16269#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16270#[rustc_legacy_const_generics(4)]
16271#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16272pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
16273 src: __m128d,
16274 k: __mmask8,
16275 a: __m128d,
16276 b: __m128h,
16277) -> __m128d {
16278 unsafe {
16279 static_assert_sae!(SAE);
16280 vcvtsh2sd(a, b, src, k, SAE)
16281 }
16282}
16283
16284/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
16285/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
16286/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
16287///
16288/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16289///
16290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
16291#[inline]
16292#[target_feature(enable = "avx512fp16")]
16293#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
16294#[rustc_legacy_const_generics(3)]
16295#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16296pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
16297 static_assert_sae!(SAE);
16298 _mm_mask_cvt_roundsh_sd::<SAE>(src:_mm_set_sd(0.0), k, a, b)
16299}
16300
16301/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16302///
16303/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
16304#[inline]
16305#[target_feature(enable = "avx512fp16")]
16306#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16307pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
16308 unsafe { simd_extract!(a, 0) }
16309}
16310
16311/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16312///
16313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
16314#[inline]
16315#[target_feature(enable = "avx512fp16")]
16316#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16317pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
16318 unsafe { simd_extract!(a, 0) }
16319}
16320
16321/// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`.
16322///
16323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
16324#[inline]
16325#[target_feature(enable = "avx512fp16")]
16326#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16327pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
16328 unsafe { simd_extract!(a, 0) }
16329}
16330
16331/// Copy the lower 16-bit integer in a to dst.
16332///
16333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
16334#[inline]
16335#[target_feature(enable = "avx512fp16")]
16336#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16337pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
16338 unsafe { simd_extract!(a.as_i16x8(), 0) }
16339}
16340
16341/// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst.
16342///
16343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
16344#[inline]
16345#[target_feature(enable = "avx512fp16")]
16346#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
16347pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
16348 unsafe { transmute(src:simd_insert!(i16x8::ZERO, 0, a)) }
16349}
16350
16351#[allow(improper_ctypes)]
16352unsafe extern "C" {
16353 #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
16354 unsafefn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
16355 #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
16356 unsafefn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
16357
16358 #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
16359 unsafefn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16360 #[link_name = "llvm.x86.avx512fp16.sub.ph.512"]
16361 unsafefn vsubph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16362 #[link_name = "llvm.x86.avx512fp16.mul.ph.512"]
16363 unsafefn vmulph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16364 #[link_name = "llvm.x86.avx512fp16.div.ph.512"]
16365 unsafefn vdivph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
16366
16367 #[link_name = "llvm.x86.avx512fp16.mask.add.sh.round"]
16368 unsafefn vaddsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16369 #[link_name = "llvm.x86.avx512fp16.mask.sub.sh.round"]
16370 unsafefn vsubsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16371 #[link_name = "llvm.x86.avx512fp16.mask.mul.sh.round"]
16372 unsafefn vmulsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16373 #[link_name = "llvm.x86.avx512fp16.mask.div.sh.round"]
16374 unsafefn vdivsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16375
16376 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.128"]
16377 unsafefn vfmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16378 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.256"]
16379 unsafefn vfmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16380 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.cph.512"]
16381 unsafefn vfmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16382 #[link_name = "llvm.x86.avx512fp16.mask.vfmul.csh"]
16383 unsafefn vfmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16384
16385 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.128"]
16386 unsafefn vfcmulcph_128(a: __m128, b: __m128, src: __m128, k: __mmask8) -> __m128;
16387 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.256"]
16388 unsafefn vfcmulcph_256(a: __m256, b: __m256, src: __m256, k: __mmask8) -> __m256;
16389 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.cph.512"]
16390 unsafefn vfcmulcph_512(a: __m512, b: __m512, src: __m512, k: __mmask16, rounding: i32) -> __m512;
16391 #[link_name = "llvm.x86.avx512fp16.mask.vfcmul.csh"]
16392 unsafefn vfcmulcsh(a: __m128, b: __m128, src: __m128, k: __mmask8, rounding: i32) -> __m128;
16393
16394 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.128"]
16395 unsafefn vfmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16396 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.128"]
16397 unsafefn vfmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16398 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.256"]
16399 unsafefn vfmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16400 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.256"]
16401 unsafefn vfmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16402 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.cph.512"]
16403 unsafefn vfmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16404 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.cph.512"]
16405 unsafefn vfmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32) -> __m512;
16406 #[link_name = "llvm.x86.avx512fp16.mask.vfmadd.csh"]
16407 unsafefn vfmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16408 #[link_name = "llvm.x86.avx512fp16.maskz.vfmadd.csh"]
16409 unsafefn vfmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16410
16411 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.128"]
16412 unsafefn vfcmaddcph_mask3_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16413 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.128"]
16414 unsafefn vfcmaddcph_maskz_128(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128;
16415 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.256"]
16416 unsafefn vfcmaddcph_mask3_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16417 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.256"]
16418 unsafefn vfcmaddcph_maskz_256(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256;
16419 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.cph.512"]
16420 unsafefn vfcmaddcph_mask3_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16421 -> __m512;
16422 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.cph.512"]
16423 unsafefn vfcmaddcph_maskz_512(a: __m512, b: __m512, c: __m512, k: __mmask16, rounding: i32)
16424 -> __m512;
16425 #[link_name = "llvm.x86.avx512fp16.mask.vfcmadd.csh"]
16426 unsafefn vfcmaddcsh_mask(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16427 #[link_name = "llvm.x86.avx512fp16.maskz.vfcmadd.csh"]
16428 unsafefn vfcmaddcsh_maskz(a: __m128, b: __m128, c: __m128, k: __mmask8, rounding: i32) -> __m128;
16429
16430 #[link_name = "llvm.x86.avx512fp16.vfmadd.ph.512"]
16431 unsafefn vfmaddph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16432 #[link_name = "llvm.x86.avx512fp16.vfmadd.f16"]
16433 unsafefn vfmaddsh(a: f16, b: f16, c: f16, rounding: i32) -> f16;
16434
16435 #[link_name = "llvm.x86.avx512fp16.vfmaddsub.ph.512"]
16436 unsafefn vfmaddsubph_512(a: __m512h, b: __m512h, c: __m512h, rounding: i32) -> __m512h;
16437
16438 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.128"]
16439 unsafefn vrcpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16440 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.256"]
16441 unsafefn vrcpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16442 #[link_name = "llvm.x86.avx512fp16.mask.rcp.ph.512"]
16443 unsafefn vrcpph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16444 #[link_name = "llvm.x86.avx512fp16.mask.rcp.sh"]
16445 unsafefn vrcpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16446
16447 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.128"]
16448 unsafefn vrsqrtph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16449 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.256"]
16450 unsafefn vrsqrtph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16451 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.ph.512"]
16452 unsafefn vrsqrtph_512(a: __m512h, src: __m512h, k: __mmask32) -> __m512h;
16453 #[link_name = "llvm.x86.avx512fp16.mask.rsqrt.sh"]
16454 unsafefn vrsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16455
16456 #[link_name = "llvm.x86.avx512fp16.sqrt.ph.512"]
16457 unsafefn vsqrtph_512(a: __m512h, rounding: i32) -> __m512h;
16458 #[link_name = "llvm.x86.avx512fp16.mask.sqrt.sh"]
16459 unsafefn vsqrtsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16460
16461 #[link_name = "llvm.x86.avx512fp16.max.ph.128"]
16462 unsafefn vmaxph_128(a: __m128h, b: __m128h) -> __m128h;
16463 #[link_name = "llvm.x86.avx512fp16.max.ph.256"]
16464 unsafefn vmaxph_256(a: __m256h, b: __m256h) -> __m256h;
16465 #[link_name = "llvm.x86.avx512fp16.max.ph.512"]
16466 unsafefn vmaxph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16467 #[link_name = "llvm.x86.avx512fp16.mask.max.sh.round"]
16468 unsafefn vmaxsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16469
16470 #[link_name = "llvm.x86.avx512fp16.min.ph.128"]
16471 unsafefn vminph_128(a: __m128h, b: __m128h) -> __m128h;
16472 #[link_name = "llvm.x86.avx512fp16.min.ph.256"]
16473 unsafefn vminph_256(a: __m256h, b: __m256h) -> __m256h;
16474 #[link_name = "llvm.x86.avx512fp16.min.ph.512"]
16475 unsafefn vminph_512(a: __m512h, b: __m512h, sae: i32) -> __m512h;
16476 #[link_name = "llvm.x86.avx512fp16.mask.min.sh.round"]
16477 unsafefn vminsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16478
16479 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.128"]
16480 unsafefn vgetexpph_128(a: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16481 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.256"]
16482 unsafefn vgetexpph_256(a: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16483 #[link_name = "llvm.x86.avx512fp16.mask.getexp.ph.512"]
16484 unsafefn vgetexpph_512(a: __m512h, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16485 #[link_name = "llvm.x86.avx512fp16.mask.getexp.sh"]
16486 unsafefn vgetexpsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, sae: i32) -> __m128h;
16487
16488 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.128"]
16489 unsafefn vgetmantph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16490 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.256"]
16491 unsafefn vgetmantph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16492 #[link_name = "llvm.x86.avx512fp16.mask.getmant.ph.512"]
16493 unsafefn vgetmantph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16494 #[link_name = "llvm.x86.avx512fp16.mask.getmant.sh"]
16495 unsafefn vgetmantsh(
16496 a: __m128h,
16497 b: __m128h,
16498 imm8: i32,
16499 src: __m128h,
16500 k: __mmask8,
16501 sae: i32,
16502 ) -> __m128h;
16503
16504 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.128"]
16505 unsafefn vrndscaleph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16506 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.256"]
16507 unsafefn vrndscaleph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16508 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.ph.512"]
16509 unsafefn vrndscaleph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16510 #[link_name = "llvm.x86.avx512fp16.mask.rndscale.sh"]
16511 unsafefn vrndscalesh(
16512 a: __m128h,
16513 b: __m128h,
16514 src: __m128h,
16515 k: __mmask8,
16516 imm8: i32,
16517 sae: i32,
16518 ) -> __m128h;
16519
16520 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.128"]
16521 unsafefn vscalefph_128(a: __m128h, b: __m128h, src: __m128h, k: __mmask8) -> __m128h;
16522 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.256"]
16523 unsafefn vscalefph_256(a: __m256h, b: __m256h, src: __m256h, k: __mmask16) -> __m256h;
16524 #[link_name = "llvm.x86.avx512fp16.mask.scalef.ph.512"]
16525 unsafefn vscalefph_512(a: __m512h, b: __m512h, src: __m512h, k: __mmask32, rounding: i32) -> __m512h;
16526 #[link_name = "llvm.x86.avx512fp16.mask.scalef.sh"]
16527 unsafefn vscalefsh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16528
16529 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.128"]
16530 unsafefn vreduceph_128(a: __m128h, imm8: i32, src: __m128h, k: __mmask8) -> __m128h;
16531 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.256"]
16532 unsafefn vreduceph_256(a: __m256h, imm8: i32, src: __m256h, k: __mmask16) -> __m256h;
16533 #[link_name = "llvm.x86.avx512fp16.mask.reduce.ph.512"]
16534 unsafefn vreduceph_512(a: __m512h, imm8: i32, src: __m512h, k: __mmask32, sae: i32) -> __m512h;
16535 #[link_name = "llvm.x86.avx512fp16.mask.reduce.sh"]
16536 unsafefn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
16537 -> __m128h;
16538
16539 #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
16540 unsafefn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
16541
16542 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i16"]
16543 unsafefn vcvtw2ph_128(a: i16x8, rounding: i32) -> __m128h;
16544 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i16"]
16545 unsafefn vcvtw2ph_256(a: i16x16, rounding: i32) -> __m256h;
16546 #[link_name = "llvm.x86.avx512.sitofp.round.v32f16.v32i16"]
16547 unsafefn vcvtw2ph_512(a: i16x32, rounding: i32) -> __m512h;
16548 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i16"]
16549 unsafefn vcvtuw2ph_128(a: u16x8, rounding: i32) -> __m128h;
16550 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i16"]
16551 unsafefn vcvtuw2ph_256(a: u16x16, rounding: i32) -> __m256h;
16552 #[link_name = "llvm.x86.avx512.uitofp.round.v32f16.v32i16"]
16553 unsafefn vcvtuw2ph_512(a: u16x32, rounding: i32) -> __m512h;
16554
16555 #[link_name = "llvm.x86.avx512fp16.mask.vcvtdq2ph.128"]
16556 unsafefn vcvtdq2ph_128(a: i32x4, src: __m128h, k: __mmask8) -> __m128h;
16557 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i32"]
16558 unsafefn vcvtdq2ph_256(a: i32x8, rounding: i32) -> __m128h;
16559 #[link_name = "llvm.x86.avx512.sitofp.round.v16f16.v16i32"]
16560 unsafefn vcvtdq2ph_512(a: i32x16, rounding: i32) -> __m256h;
16561 #[link_name = "llvm.x86.avx512fp16.vcvtsi2sh"]
16562 unsafefn vcvtsi2sh(a: __m128h, b: i32, rounding: i32) -> __m128h;
16563 #[link_name = "llvm.x86.avx512fp16.mask.vcvtudq2ph.128"]
16564 unsafefn vcvtudq2ph_128(a: u32x4, src: __m128h, k: __mmask8) -> __m128h;
16565 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i32"]
16566 unsafefn vcvtudq2ph_256(a: u32x8, rounding: i32) -> __m128h;
16567 #[link_name = "llvm.x86.avx512.uitofp.round.v16f16.v16i32"]
16568 unsafefn vcvtudq2ph_512(a: u32x16, rounding: i32) -> __m256h;
16569 #[link_name = "llvm.x86.avx512fp16.vcvtusi2sh"]
16570 unsafefn vcvtusi2sh(a: __m128h, b: u32, rounding: i32) -> __m128h;
16571
16572 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.128"]
16573 unsafefn vcvtqq2ph_128(a: i64x2, src: __m128h, k: __mmask8) -> __m128h;
16574 #[link_name = "llvm.x86.avx512fp16.mask.vcvtqq2ph.256"]
16575 unsafefn vcvtqq2ph_256(a: i64x4, src: __m128h, k: __mmask8) -> __m128h;
16576 #[link_name = "llvm.x86.avx512.sitofp.round.v8f16.v8i64"]
16577 unsafefn vcvtqq2ph_512(a: i64x8, rounding: i32) -> __m128h;
16578 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.128"]
16579 unsafefn vcvtuqq2ph_128(a: u64x2, src: __m128h, k: __mmask8) -> __m128h;
16580 #[link_name = "llvm.x86.avx512fp16.mask.vcvtuqq2ph.256"]
16581 unsafefn vcvtuqq2ph_256(a: u64x4, src: __m128h, k: __mmask8) -> __m128h;
16582 #[link_name = "llvm.x86.avx512.uitofp.round.v8f16.v8i64"]
16583 unsafefn vcvtuqq2ph_512(a: u64x8, rounding: i32) -> __m128h;
16584
16585 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.128"]
16586 unsafefn vcvtps2phx_128(a: __m128, src: __m128h, k: __mmask8) -> __m128h;
16587 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.256"]
16588 unsafefn vcvtps2phx_256(a: __m256, src: __m128h, k: __mmask8) -> __m128h;
16589 #[link_name = "llvm.x86.avx512fp16.mask.vcvtps2phx.512"]
16590 unsafefn vcvtps2phx_512(a: __m512, src: __m256h, k: __mmask16, rounding: i32) -> __m256h;
16591 #[link_name = "llvm.x86.avx512fp16.mask.vcvtss2sh.round"]
16592 unsafefn vcvtss2sh(a: __m128h, b: __m128, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16593
16594 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.128"]
16595 unsafefn vcvtpd2ph_128(a: __m128d, src: __m128h, k: __mmask8) -> __m128h;
16596 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.256"]
16597 unsafefn vcvtpd2ph_256(a: __m256d, src: __m128h, k: __mmask8) -> __m128h;
16598 #[link_name = "llvm.x86.avx512fp16.mask.vcvtpd2ph.512"]
16599 unsafefn vcvtpd2ph_512(a: __m512d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16600 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
16601 unsafefn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
16602
16603 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
16604 unsafefn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16605 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
16606 unsafefn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16607 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
16608 unsafefn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
16609 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
16610 unsafefn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16611 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
16612 unsafefn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16613 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
16614 unsafefn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16615
16616 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
16617 unsafefn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
16618 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
16619 unsafefn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
16620 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
16621 unsafefn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
16622 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
16623 unsafefn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
16624 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
16625 unsafefn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
16626 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
16627 unsafefn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
16628
16629 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
16630 unsafefn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16631 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
16632 unsafefn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16633 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
16634 unsafefn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
16635 #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
16636 unsafefn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
16637 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
16638 unsafefn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16639 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
16640 unsafefn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16641 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
16642 unsafefn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
16643 #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
16644 unsafefn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
16645
16646 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
16647 unsafefn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
16648 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
16649 unsafefn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
16650 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
16651 unsafefn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
16652 #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
16653 unsafefn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
16654 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
16655 unsafefn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
16656 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
16657 unsafefn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
16658 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
16659 unsafefn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
16660 #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
16661 unsafefn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
16662
16663 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
16664 unsafefn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16665 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
16666 unsafefn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16667 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
16668 unsafefn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
16669 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
16670 unsafefn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16671 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
16672 unsafefn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16673 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
16674 unsafefn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
16675
16676 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
16677 unsafefn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
16678 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
16679 unsafefn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
16680 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
16681 unsafefn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
16682 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
16683 unsafefn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
16684 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
16685 unsafefn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
16686 #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
16687 unsafefn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
16688
16689 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
16690 unsafefn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
16691 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
16692 unsafefn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
16693 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
16694 unsafefn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
16695 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
16696 unsafefn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
16697
16698 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
16699 unsafefn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
16700 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
16701 unsafefn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
16702 #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
16703 unsafefn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
16704 #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
16705 unsafefn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
16706
16707}
16708
16709#[cfg(test)]
16710mod tests {
16711 use crate::core_arch::x86::*;
16712 use crate::mem::transmute;
16713 use crate::ptr::{addr_of, addr_of_mut};
16714 use stdarch_test::simd_test;
16715
16716 #[target_feature(enable = "avx512fp16")]
16717 unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
16718 _mm_setr_ph(re, im, re, im, re, im, re, im)
16719 }
16720
16721 #[target_feature(enable = "avx512fp16")]
16722 unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
16723 _mm256_setr_ph(
16724 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16725 )
16726 }
16727
16728 #[target_feature(enable = "avx512fp16")]
16729 unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
16730 _mm512_setr_ph(
16731 re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
16732 re, im, re, im, re, im, re, im, re, im,
16733 )
16734 }
16735
16736 #[simd_test(enable = "avx512fp16,avx512vl")]
16737 unsafe fn test_mm_set_ph() {
16738 let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16739 let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16740 assert_eq_m128h(r, e);
16741 }
16742
16743 #[simd_test(enable = "avx512fp16,avx512vl")]
16744 unsafe fn test_mm256_set_ph() {
16745 let r = _mm256_set_ph(
16746 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16747 );
16748 let e = _mm256_setr_ph(
16749 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16750 );
16751 assert_eq_m256h(r, e);
16752 }
16753
16754 #[simd_test(enable = "avx512fp16")]
16755 unsafe fn test_mm512_set_ph() {
16756 let r = _mm512_set_ph(
16757 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16758 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16759 31.0, 32.0,
16760 );
16761 let e = _mm512_setr_ph(
16762 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16763 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16764 3.0, 2.0, 1.0,
16765 );
16766 assert_eq_m512h(r, e);
16767 }
16768
16769 #[simd_test(enable = "avx512fp16,avx512vl")]
16770 unsafe fn test_mm_set_sh() {
16771 let r = _mm_set_sh(1.0);
16772 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
16773 assert_eq_m128h(r, e);
16774 }
16775
16776 #[simd_test(enable = "avx512fp16,avx512vl")]
16777 unsafe fn test_mm_set1_ph() {
16778 let r = _mm_set1_ph(1.0);
16779 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
16780 assert_eq_m128h(r, e);
16781 }
16782
16783 #[simd_test(enable = "avx512fp16,avx512vl")]
16784 unsafe fn test_mm256_set1_ph() {
16785 let r = _mm256_set1_ph(1.0);
16786 let e = _mm256_set_ph(
16787 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16788 );
16789 assert_eq_m256h(r, e);
16790 }
16791
16792 #[simd_test(enable = "avx512fp16")]
16793 unsafe fn test_mm512_set1_ph() {
16794 let r = _mm512_set1_ph(1.0);
16795 let e = _mm512_set_ph(
16796 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16797 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
16798 );
16799 assert_eq_m512h(r, e);
16800 }
16801
16802 #[simd_test(enable = "avx512fp16,avx512vl")]
16803 unsafe fn test_mm_setr_ph() {
16804 let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
16805 let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
16806 assert_eq_m128h(r, e);
16807 }
16808
16809 #[simd_test(enable = "avx512fp16,avx512vl")]
16810 unsafe fn test_mm256_setr_ph() {
16811 let r = _mm256_setr_ph(
16812 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16813 );
16814 let e = _mm256_set_ph(
16815 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
16816 );
16817 assert_eq_m256h(r, e);
16818 }
16819
16820 #[simd_test(enable = "avx512fp16")]
16821 unsafe fn test_mm512_setr_ph() {
16822 let r = _mm512_setr_ph(
16823 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
16824 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
16825 31.0, 32.0,
16826 );
16827 let e = _mm512_set_ph(
16828 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
16829 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
16830 3.0, 2.0, 1.0,
16831 );
16832 assert_eq_m512h(r, e);
16833 }
16834
16835 #[simd_test(enable = "avx512fp16,avx512vl")]
16836 unsafe fn test_mm_setzero_ph() {
16837 let r = _mm_setzero_ph();
16838 let e = _mm_set1_ph(0.0);
16839 assert_eq_m128h(r, e);
16840 }
16841
16842 #[simd_test(enable = "avx512fp16,avx512vl")]
16843 unsafe fn test_mm256_setzero_ph() {
16844 let r = _mm256_setzero_ph();
16845 let e = _mm256_set1_ph(0.0);
16846 assert_eq_m256h(r, e);
16847 }
16848
16849 #[simd_test(enable = "avx512fp16")]
16850 unsafe fn test_mm512_setzero_ph() {
16851 let r = _mm512_setzero_ph();
16852 let e = _mm512_set1_ph(0.0);
16853 assert_eq_m512h(r, e);
16854 }
16855
16856 #[simd_test(enable = "avx512fp16,avx512vl")]
16857 unsafe fn test_mm_castsi128_ph() {
16858 let a = _mm_set1_epi16(0x3c00);
16859 let r = _mm_castsi128_ph(a);
16860 let e = _mm_set1_ph(1.0);
16861 assert_eq_m128h(r, e);
16862 }
16863
16864 #[simd_test(enable = "avx512fp16,avx512vl")]
16865 unsafe fn test_mm256_castsi256_ph() {
16866 let a = _mm256_set1_epi16(0x3c00);
16867 let r = _mm256_castsi256_ph(a);
16868 let e = _mm256_set1_ph(1.0);
16869 assert_eq_m256h(r, e);
16870 }
16871
16872 #[simd_test(enable = "avx512fp16")]
16873 unsafe fn test_mm512_castsi512_ph() {
16874 let a = _mm512_set1_epi16(0x3c00);
16875 let r = _mm512_castsi512_ph(a);
16876 let e = _mm512_set1_ph(1.0);
16877 assert_eq_m512h(r, e);
16878 }
16879
16880 #[simd_test(enable = "avx512fp16")]
16881 unsafe fn test_mm_castph_si128() {
16882 let a = _mm_set1_ph(1.0);
16883 let r = _mm_castph_si128(a);
16884 let e = _mm_set1_epi16(0x3c00);
16885 assert_eq_m128i(r, e);
16886 }
16887
16888 #[simd_test(enable = "avx512fp16")]
16889 unsafe fn test_mm256_castph_si256() {
16890 let a = _mm256_set1_ph(1.0);
16891 let r = _mm256_castph_si256(a);
16892 let e = _mm256_set1_epi16(0x3c00);
16893 assert_eq_m256i(r, e);
16894 }
16895
16896 #[simd_test(enable = "avx512fp16")]
16897 unsafe fn test_mm512_castph_si512() {
16898 let a = _mm512_set1_ph(1.0);
16899 let r = _mm512_castph_si512(a);
16900 let e = _mm512_set1_epi16(0x3c00);
16901 assert_eq_m512i(r, e);
16902 }
16903
16904 #[simd_test(enable = "avx512fp16,avx512vl")]
16905 unsafe fn test_mm_castps_ph() {
16906 let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
16907 let r = _mm_castps_ph(a);
16908 let e = _mm_set1_ph(1.0);
16909 assert_eq_m128h(r, e);
16910 }
16911
16912 #[simd_test(enable = "avx512fp16,avx512vl")]
16913 unsafe fn test_mm256_castps_ph() {
16914 let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
16915 let r = _mm256_castps_ph(a);
16916 let e = _mm256_set1_ph(1.0);
16917 assert_eq_m256h(r, e);
16918 }
16919
16920 #[simd_test(enable = "avx512fp16")]
16921 unsafe fn test_mm512_castps_ph() {
16922 let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
16923 let r = _mm512_castps_ph(a);
16924 let e = _mm512_set1_ph(1.0);
16925 assert_eq_m512h(r, e);
16926 }
16927
16928 #[simd_test(enable = "avx512fp16")]
16929 unsafe fn test_mm_castph_ps() {
16930 let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
16931 let r = _mm_castph_ps(a);
16932 let e = _mm_set1_ps(1.0);
16933 assert_eq_m128(r, e);
16934 }
16935
16936 #[simd_test(enable = "avx512fp16")]
16937 unsafe fn test_mm256_castph_ps() {
16938 let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
16939 let r = _mm256_castph_ps(a);
16940 let e = _mm256_set1_ps(1.0);
16941 assert_eq_m256(r, e);
16942 }
16943
16944 #[simd_test(enable = "avx512fp16")]
16945 unsafe fn test_mm512_castph_ps() {
16946 let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
16947 let r = _mm512_castph_ps(a);
16948 let e = _mm512_set1_ps(1.0);
16949 assert_eq_m512(r, e);
16950 }
16951
16952 #[simd_test(enable = "avx512fp16,avx512vl")]
16953 unsafe fn test_mm_castpd_ph() {
16954 let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
16955 let r = _mm_castpd_ph(a);
16956 let e = _mm_set1_ph(1.0);
16957 assert_eq_m128h(r, e);
16958 }
16959
16960 #[simd_test(enable = "avx512fp16,avx512vl")]
16961 unsafe fn test_mm256_castpd_ph() {
16962 let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
16963 let r = _mm256_castpd_ph(a);
16964 let e = _mm256_set1_ph(1.0);
16965 assert_eq_m256h(r, e);
16966 }
16967
16968 #[simd_test(enable = "avx512fp16")]
16969 unsafe fn test_mm512_castpd_ph() {
16970 let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
16971 let r = _mm512_castpd_ph(a);
16972 let e = _mm512_set1_ph(1.0);
16973 assert_eq_m512h(r, e);
16974 }
16975
16976 #[simd_test(enable = "avx512fp16")]
16977 unsafe fn test_mm_castph_pd() {
16978 let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
16979 let r = _mm_castph_pd(a);
16980 let e = _mm_set1_pd(1.0);
16981 assert_eq_m128d(r, e);
16982 }
16983
16984 #[simd_test(enable = "avx512fp16")]
16985 unsafe fn test_mm256_castph_pd() {
16986 let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
16987 let r = _mm256_castph_pd(a);
16988 let e = _mm256_set1_pd(1.0);
16989 assert_eq_m256d(r, e);
16990 }
16991
16992 #[simd_test(enable = "avx512fp16")]
16993 unsafe fn test_mm512_castph_pd() {
16994 let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
16995 let r = _mm512_castph_pd(a);
16996 let e = _mm512_set1_pd(1.0);
16997 assert_eq_m512d(r, e);
16998 }
16999
17000 #[simd_test(enable = "avx512fp16,avx512vl")]
17001 unsafe fn test_mm256_castph256_ph128() {
17002 let a = _mm256_setr_ph(
17003 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17004 );
17005 let r = _mm256_castph256_ph128(a);
17006 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17007 assert_eq_m128h(r, e);
17008 }
17009
17010 #[simd_test(enable = "avx512fp16,avx512vl")]
17011 unsafe fn test_mm512_castph512_ph128() {
17012 let a = _mm512_setr_ph(
17013 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
17014 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
17015 );
17016 let r = _mm512_castph512_ph128(a);
17017 let e = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17018 assert_eq_m128h(r, e);
17019 }
17020
17021 #[simd_test(enable = "avx512fp16,avx512vl")]
17022 unsafe fn test_mm512_castph512_ph256() {
17023 let a = _mm512_setr_ph(
17024 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
17025 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
17026 );
17027 let r = _mm512_castph512_ph256(a);
17028 let e = _mm256_setr_ph(
17029 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17030 );
17031 assert_eq_m256h(r, e);
17032 }
17033
17034 #[simd_test(enable = "avx512fp16,avx512vl")]
17035 unsafe fn test_mm256_castph128_ph256() {
17036 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17037 let r = _mm256_castph128_ph256(a);
17038 assert_eq_m128h(_mm256_castph256_ph128(r), a);
17039 }
17040
17041 #[simd_test(enable = "avx512fp16,avx512vl")]
17042 unsafe fn test_mm512_castph128_ph512() {
17043 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17044 let r = _mm512_castph128_ph512(a);
17045 assert_eq_m128h(_mm512_castph512_ph128(r), a);
17046 }
17047
17048 #[simd_test(enable = "avx512fp16,avx512vl")]
17049 unsafe fn test_mm512_castph256_ph512() {
17050 let a = _mm256_setr_ph(
17051 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17052 );
17053 let r = _mm512_castph256_ph512(a);
17054 assert_eq_m256h(_mm512_castph512_ph256(r), a);
17055 }
17056
17057 #[simd_test(enable = "avx512fp16,avx512vl")]
17058 unsafe fn test_mm256_zextph128_ph256() {
17059 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17060 let r = _mm256_zextph128_ph256(a);
17061 let e = _mm256_setr_ph(
17062 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
17063 );
17064 assert_eq_m256h(r, e);
17065 }
17066
17067 #[simd_test(enable = "avx512fp16")]
17068 unsafe fn test_mm512_zextph128_ph512() {
17069 let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
17070 let r = _mm512_zextph128_ph512(a);
17071 let e = _mm512_setr_ph(
17072 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17073 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17074 );
17075 assert_eq_m512h(r, e);
17076 }
17077
17078 #[simd_test(enable = "avx512fp16")]
17079 unsafe fn test_mm512_zextph256_ph512() {
17080 let a = _mm256_setr_ph(
17081 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
17082 );
17083 let r = _mm512_zextph256_ph512(a);
17084 let e = _mm512_setr_ph(
17085 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 0., 0., 0., 0.,
17086 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
17087 );
17088 assert_eq_m512h(r, e);
17089 }
17090
17091 #[simd_test(enable = "avx512fp16,avx512vl")]
17092 unsafe fn test_mm_cmp_ph_mask() {
17093 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17094 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17095 let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17096 assert_eq!(r, 0b11110000);
17097 }
17098
17099 #[simd_test(enable = "avx512fp16,avx512vl")]
17100 unsafe fn test_mm_mask_cmp_ph_mask() {
17101 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17102 let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
17103 let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
17104 assert_eq!(r, 0b01010000);
17105 }
17106
17107 #[simd_test(enable = "avx512fp16,avx512vl")]
17108 unsafe fn test_mm256_cmp_ph_mask() {
17109 let a = _mm256_set_ph(
17110 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17111 );
17112 let b = _mm256_set_ph(
17113 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17114 -16.0,
17115 );
17116 let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17117 assert_eq!(r, 0b1111000011110000);
17118 }
17119
17120 #[simd_test(enable = "avx512fp16,avx512vl")]
17121 unsafe fn test_mm256_mask_cmp_ph_mask() {
17122 let a = _mm256_set_ph(
17123 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17124 );
17125 let b = _mm256_set_ph(
17126 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17127 -16.0,
17128 );
17129 let r = _mm256_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b0101010101010101, a, b);
17130 assert_eq!(r, 0b0101000001010000);
17131 }
17132
17133 #[simd_test(enable = "avx512fp16")]
17134 unsafe fn test_mm512_cmp_ph_mask() {
17135 let a = _mm512_set_ph(
17136 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17137 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17138 31.0, 32.0,
17139 );
17140 let b = _mm512_set_ph(
17141 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17142 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17143 -29.0, -30.0, -31.0, -32.0,
17144 );
17145 let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
17146 assert_eq!(r, 0b11110000111100001111000011110000);
17147 }
17148
17149 #[simd_test(enable = "avx512fp16")]
17150 unsafe fn test_mm512_mask_cmp_ph_mask() {
17151 let a = _mm512_set_ph(
17152 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17153 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17154 31.0, 32.0,
17155 );
17156 let b = _mm512_set_ph(
17157 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17158 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17159 -29.0, -30.0, -31.0, -32.0,
17160 );
17161 let r = _mm512_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101010101010101010101010101, a, b);
17162 assert_eq!(r, 0b01010000010100000101000001010000);
17163 }
17164
17165 #[simd_test(enable = "avx512fp16")]
17166 unsafe fn test_mm512_cmp_round_ph_mask() {
17167 let a = _mm512_set_ph(
17168 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17169 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17170 31.0, 32.0,
17171 );
17172 let b = _mm512_set_ph(
17173 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17174 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17175 -29.0, -30.0, -31.0, -32.0,
17176 );
17177 let r = _mm512_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17178 assert_eq!(r, 0b11110000111100001111000011110000);
17179 }
17180
17181 #[simd_test(enable = "avx512fp16")]
17182 unsafe fn test_mm512_mask_cmp_round_ph_mask() {
17183 let a = _mm512_set_ph(
17184 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17185 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17186 31.0, 32.0,
17187 );
17188 let b = _mm512_set_ph(
17189 1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0,
17190 -16.0, 17.0, 18.0, 19.0, 20.0, -21.0, -22.0, -23.0, -24.0, 25.0, 26.0, 27.0, 28.0,
17191 -29.0, -30.0, -31.0, -32.0,
17192 );
17193 let r = _mm512_mask_cmp_round_ph_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(
17194 0b01010101010101010101010101010101,
17195 a,
17196 b,
17197 );
17198 assert_eq!(r, 0b01010000010100000101000001010000);
17199 }
17200
17201 #[simd_test(enable = "avx512fp16")]
17202 unsafe fn test_mm_cmp_round_sh_mask() {
17203 let a = _mm_set_sh(1.0);
17204 let b = _mm_set_sh(1.0);
17205 let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17206 assert_eq!(r, 1);
17207 }
17208
17209 #[simd_test(enable = "avx512fp16")]
17210 unsafe fn test_mm_mask_cmp_round_sh_mask() {
17211 let a = _mm_set_sh(1.0);
17212 let b = _mm_set_sh(1.0);
17213 let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
17214 assert_eq!(r, 0);
17215 }
17216
17217 #[simd_test(enable = "avx512fp16")]
17218 unsafe fn test_mm_cmp_sh_mask() {
17219 let a = _mm_set_sh(1.0);
17220 let b = _mm_set_sh(1.0);
17221 let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
17222 assert_eq!(r, 1);
17223 }
17224
17225 #[simd_test(enable = "avx512fp16")]
17226 unsafe fn test_mm_mask_cmp_sh_mask() {
17227 let a = _mm_set_sh(1.0);
17228 let b = _mm_set_sh(1.0);
17229 let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
17230 assert_eq!(r, 0);
17231 }
17232
17233 #[simd_test(enable = "avx512fp16")]
17234 unsafe fn test_mm_comi_round_sh() {
17235 let a = _mm_set_sh(1.0);
17236 let b = _mm_set_sh(1.0);
17237 let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
17238 assert_eq!(r, 1);
17239 }
17240
17241 #[simd_test(enable = "avx512fp16")]
17242 unsafe fn test_mm_comi_sh() {
17243 let a = _mm_set_sh(1.0);
17244 let b = _mm_set_sh(1.0);
17245 let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
17246 assert_eq!(r, 1);
17247 }
17248
17249 #[simd_test(enable = "avx512fp16")]
17250 unsafe fn test_mm_comieq_sh() {
17251 let a = _mm_set_sh(1.0);
17252 let b = _mm_set_sh(1.0);
17253 let r = _mm_comieq_sh(a, b);
17254 assert_eq!(r, 1);
17255 }
17256
17257 #[simd_test(enable = "avx512fp16")]
17258 unsafe fn test_mm_comige_sh() {
17259 let a = _mm_set_sh(2.0);
17260 let b = _mm_set_sh(1.0);
17261 let r = _mm_comige_sh(a, b);
17262 assert_eq!(r, 1);
17263 }
17264
17265 #[simd_test(enable = "avx512fp16")]
17266 unsafe fn test_mm_comigt_sh() {
17267 let a = _mm_set_sh(2.0);
17268 let b = _mm_set_sh(1.0);
17269 let r = _mm_comigt_sh(a, b);
17270 assert_eq!(r, 1);
17271 }
17272
17273 #[simd_test(enable = "avx512fp16")]
17274 unsafe fn test_mm_comile_sh() {
17275 let a = _mm_set_sh(1.0);
17276 let b = _mm_set_sh(2.0);
17277 let r = _mm_comile_sh(a, b);
17278 assert_eq!(r, 1);
17279 }
17280
17281 #[simd_test(enable = "avx512fp16")]
17282 unsafe fn test_mm_comilt_sh() {
17283 let a = _mm_set_sh(1.0);
17284 let b = _mm_set_sh(2.0);
17285 let r = _mm_comilt_sh(a, b);
17286 assert_eq!(r, 1);
17287 }
17288
17289 #[simd_test(enable = "avx512fp16")]
17290 unsafe fn test_mm_comineq_sh() {
17291 let a = _mm_set_sh(1.0);
17292 let b = _mm_set_sh(2.0);
17293 let r = _mm_comineq_sh(a, b);
17294 assert_eq!(r, 1);
17295 }
17296
17297 #[simd_test(enable = "avx512fp16")]
17298 unsafe fn test_mm_ucomieq_sh() {
17299 let a = _mm_set_sh(1.0);
17300 let b = _mm_set_sh(1.0);
17301 let r = _mm_ucomieq_sh(a, b);
17302 assert_eq!(r, 1);
17303 }
17304
17305 #[simd_test(enable = "avx512fp16")]
17306 unsafe fn test_mm_ucomige_sh() {
17307 let a = _mm_set_sh(2.0);
17308 let b = _mm_set_sh(1.0);
17309 let r = _mm_ucomige_sh(a, b);
17310 assert_eq!(r, 1);
17311 }
17312
17313 #[simd_test(enable = "avx512fp16")]
17314 unsafe fn test_mm_ucomigt_sh() {
17315 let a = _mm_set_sh(2.0);
17316 let b = _mm_set_sh(1.0);
17317 let r = _mm_ucomigt_sh(a, b);
17318 assert_eq!(r, 1);
17319 }
17320
17321 #[simd_test(enable = "avx512fp16")]
17322 unsafe fn test_mm_ucomile_sh() {
17323 let a = _mm_set_sh(1.0);
17324 let b = _mm_set_sh(2.0);
17325 let r = _mm_ucomile_sh(a, b);
17326 assert_eq!(r, 1);
17327 }
17328
17329 #[simd_test(enable = "avx512fp16")]
17330 unsafe fn test_mm_ucomilt_sh() {
17331 let a = _mm_set_sh(1.0);
17332 let b = _mm_set_sh(2.0);
17333 let r = _mm_ucomilt_sh(a, b);
17334 assert_eq!(r, 1);
17335 }
17336
17337 #[simd_test(enable = "avx512fp16")]
17338 unsafe fn test_mm_ucomineq_sh() {
17339 let a = _mm_set_sh(1.0);
17340 let b = _mm_set_sh(2.0);
17341 let r = _mm_ucomineq_sh(a, b);
17342 assert_eq!(r, 1);
17343 }
17344
17345 #[simd_test(enable = "avx512fp16,avx512vl")]
17346 unsafe fn test_mm_load_ph() {
17347 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17348 let b = _mm_load_ph(addr_of!(a).cast());
17349 assert_eq_m128h(a, b);
17350 }
17351
17352 #[simd_test(enable = "avx512fp16,avx512vl")]
17353 unsafe fn test_mm256_load_ph() {
17354 let a = _mm256_set_ph(
17355 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17356 );
17357 let b = _mm256_load_ph(addr_of!(a).cast());
17358 assert_eq_m256h(a, b);
17359 }
17360
17361 #[simd_test(enable = "avx512fp16")]
17362 unsafe fn test_mm512_load_ph() {
17363 let a = _mm512_set_ph(
17364 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17365 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17366 31.0, 32.0,
17367 );
17368 let b = _mm512_load_ph(addr_of!(a).cast());
17369 assert_eq_m512h(a, b);
17370 }
17371
17372 #[simd_test(enable = "avx512fp16,avx512vl")]
17373 unsafe fn test_mm_load_sh() {
17374 let a = _mm_set_sh(1.0);
17375 let b = _mm_load_sh(addr_of!(a).cast());
17376 assert_eq_m128h(a, b);
17377 }
17378
17379 #[simd_test(enable = "avx512fp16,avx512vl")]
17380 unsafe fn test_mm_mask_load_sh() {
17381 let a = _mm_set_sh(1.0);
17382 let src = _mm_set_sh(2.);
17383 let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
17384 assert_eq_m128h(a, b);
17385 let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
17386 assert_eq_m128h(src, b);
17387 }
17388
17389 #[simd_test(enable = "avx512fp16,avx512vl")]
17390 unsafe fn test_mm_maskz_load_sh() {
17391 let a = _mm_set_sh(1.0);
17392 let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
17393 assert_eq_m128h(a, b);
17394 let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
17395 assert_eq_m128h(_mm_setzero_ph(), b);
17396 }
17397
17398 #[simd_test(enable = "avx512fp16,avx512vl")]
17399 unsafe fn test_mm_loadu_ph() {
17400 let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
17401 let r = _mm_loadu_ph(array.as_ptr());
17402 let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17403 assert_eq_m128h(r, e);
17404 }
17405
17406 #[simd_test(enable = "avx512fp16,avx512vl")]
17407 unsafe fn test_mm256_loadu_ph() {
17408 let array = [
17409 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17410 ];
17411 let r = _mm256_loadu_ph(array.as_ptr());
17412 let e = _mm256_setr_ph(
17413 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17414 );
17415 assert_eq_m256h(r, e);
17416 }
17417
17418 #[simd_test(enable = "avx512fp16")]
17419 unsafe fn test_mm512_loadu_ph() {
17420 let array = [
17421 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17422 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17423 31.0, 32.0,
17424 ];
17425 let r = _mm512_loadu_ph(array.as_ptr());
17426 let e = _mm512_setr_ph(
17427 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17428 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17429 31.0, 32.0,
17430 );
17431 assert_eq_m512h(r, e);
17432 }
17433
17434 #[simd_test(enable = "avx512fp16,avx512vl")]
17435 unsafe fn test_mm_move_sh() {
17436 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17437 let b = _mm_set_sh(9.0);
17438 let r = _mm_move_sh(a, b);
17439 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0);
17440 assert_eq_m128h(r, e);
17441 }
17442
17443 #[simd_test(enable = "avx512fp16,avx512vl")]
17444 unsafe fn test_mm_mask_move_sh() {
17445 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17446 let b = _mm_set_sh(9.0);
17447 let src = _mm_set_sh(10.0);
17448 let r = _mm_mask_move_sh(src, 0, a, b);
17449 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0);
17450 assert_eq_m128h(r, e);
17451 }
17452
17453 #[simd_test(enable = "avx512fp16,avx512vl")]
17454 unsafe fn test_mm_maskz_move_sh() {
17455 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17456 let b = _mm_set_sh(9.0);
17457 let r = _mm_maskz_move_sh(0, a, b);
17458 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0);
17459 assert_eq_m128h(r, e);
17460 }
17461
17462 #[simd_test(enable = "avx512fp16,avx512vl")]
17463 unsafe fn test_mm_store_ph() {
17464 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17465 let mut b = _mm_setzero_ph();
17466 _mm_store_ph(addr_of_mut!(b).cast(), a);
17467 assert_eq_m128h(a, b);
17468 }
17469
17470 #[simd_test(enable = "avx512fp16,avx512vl")]
17471 unsafe fn test_mm256_store_ph() {
17472 let a = _mm256_set_ph(
17473 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17474 );
17475 let mut b = _mm256_setzero_ph();
17476 _mm256_store_ph(addr_of_mut!(b).cast(), a);
17477 assert_eq_m256h(a, b);
17478 }
17479
17480 #[simd_test(enable = "avx512fp16")]
17481 unsafe fn test_mm512_store_ph() {
17482 let a = _mm512_set_ph(
17483 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17484 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17485 31.0, 32.0,
17486 );
17487 let mut b = _mm512_setzero_ph();
17488 _mm512_store_ph(addr_of_mut!(b).cast(), a);
17489 assert_eq_m512h(a, b);
17490 }
17491
17492 #[simd_test(enable = "avx512fp16,avx512vl")]
17493 unsafe fn test_mm_store_sh() {
17494 let a = _mm_set_sh(1.0);
17495 let mut b = _mm_setzero_ph();
17496 _mm_store_sh(addr_of_mut!(b).cast(), a);
17497 assert_eq_m128h(a, b);
17498 }
17499
17500 #[simd_test(enable = "avx512fp16,avx512vl")]
17501 unsafe fn test_mm_mask_store_sh() {
17502 let a = _mm_set_sh(1.0);
17503 let mut b = _mm_setzero_ph();
17504 _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
17505 assert_eq_m128h(_mm_setzero_ph(), b);
17506 _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
17507 assert_eq_m128h(a, b);
17508 }
17509
17510 #[simd_test(enable = "avx512fp16,avx512vl")]
17511 unsafe fn test_mm_storeu_ph() {
17512 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17513 let mut array = [0.0; 8];
17514 _mm_storeu_ph(array.as_mut_ptr(), a);
17515 assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
17516 }
17517
17518 #[simd_test(enable = "avx512fp16,avx512vl")]
17519 unsafe fn test_mm256_storeu_ph() {
17520 let a = _mm256_set_ph(
17521 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17522 );
17523 let mut array = [0.0; 16];
17524 _mm256_storeu_ph(array.as_mut_ptr(), a);
17525 assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
17526 }
17527
17528 #[simd_test(enable = "avx512fp16")]
17529 unsafe fn test_mm512_storeu_ph() {
17530 let a = _mm512_set_ph(
17531 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17532 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17533 31.0, 32.0,
17534 );
17535 let mut array = [0.0; 32];
17536 _mm512_storeu_ph(array.as_mut_ptr(), a);
17537 assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
17538 }
17539
17540 #[simd_test(enable = "avx512fp16,avx512vl")]
17541 unsafe fn test_mm_add_ph() {
17542 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17543 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17544 let r = _mm_add_ph(a, b);
17545 let e = _mm_set1_ph(9.0);
17546 assert_eq_m128h(r, e);
17547 }
17548
17549 #[simd_test(enable = "avx512fp16,avx512vl")]
17550 unsafe fn test_mm_mask_add_ph() {
17551 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17552 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17553 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17554 let r = _mm_mask_add_ph(src, 0b01010101, a, b);
17555 let e = _mm_set_ph(10., 9., 12., 9., 14., 9., 16., 9.);
17556 assert_eq_m128h(r, e);
17557 }
17558
17559 #[simd_test(enable = "avx512fp16,avx512vl")]
17560 unsafe fn test_mm_maskz_add_ph() {
17561 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17562 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17563 let r = _mm_maskz_add_ph(0b01010101, a, b);
17564 let e = _mm_set_ph(0., 9., 0., 9., 0., 9., 0., 9.);
17565 assert_eq_m128h(r, e);
17566 }
17567
17568 #[simd_test(enable = "avx512fp16,avx512vl")]
17569 unsafe fn test_mm256_add_ph() {
17570 let a = _mm256_set_ph(
17571 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17572 );
17573 let b = _mm256_set_ph(
17574 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17575 );
17576 let r = _mm256_add_ph(a, b);
17577 let e = _mm256_set1_ph(17.0);
17578 assert_eq_m256h(r, e);
17579 }
17580
17581 #[simd_test(enable = "avx512fp16,avx512vl")]
17582 unsafe fn test_mm256_mask_add_ph() {
17583 let a = _mm256_set_ph(
17584 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17585 );
17586 let b = _mm256_set_ph(
17587 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17588 );
17589 let src = _mm256_set_ph(
17590 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17591 );
17592 let r = _mm256_mask_add_ph(src, 0b0101010101010101, a, b);
17593 let e = _mm256_set_ph(
17594 18., 17., 20., 17., 22., 17., 24., 17., 26., 17., 28., 17., 30., 17., 32., 17.,
17595 );
17596 assert_eq_m256h(r, e);
17597 }
17598
17599 #[simd_test(enable = "avx512fp16,avx512vl")]
17600 unsafe fn test_mm256_maskz_add_ph() {
17601 let a = _mm256_set_ph(
17602 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17603 );
17604 let b = _mm256_set_ph(
17605 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17606 );
17607 let r = _mm256_maskz_add_ph(0b0101010101010101, a, b);
17608 let e = _mm256_set_ph(
17609 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17., 0., 17.,
17610 );
17611 assert_eq_m256h(r, e);
17612 }
17613
17614 #[simd_test(enable = "avx512fp16")]
17615 unsafe fn test_mm512_add_ph() {
17616 let a = _mm512_set_ph(
17617 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17618 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17619 31.0, 32.0,
17620 );
17621 let b = _mm512_set_ph(
17622 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17623 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17624 3.0, 2.0, 1.0,
17625 );
17626 let r = _mm512_add_ph(a, b);
17627 let e = _mm512_set1_ph(33.0);
17628 assert_eq_m512h(r, e);
17629 }
17630
17631 #[simd_test(enable = "avx512fp16")]
17632 unsafe fn test_mm512_mask_add_ph() {
17633 let a = _mm512_set_ph(
17634 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17635 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17636 31.0, 32.0,
17637 );
17638 let b = _mm512_set_ph(
17639 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17640 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17641 3.0, 2.0, 1.0,
17642 );
17643 let src = _mm512_set_ph(
17644 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17645 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17646 );
17647 let r = _mm512_mask_add_ph(src, 0b01010101010101010101010101010101, a, b);
17648 let e = _mm512_set_ph(
17649 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17650 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17651 );
17652 assert_eq_m512h(r, e);
17653 }
17654
17655 #[simd_test(enable = "avx512fp16")]
17656 unsafe fn test_mm512_maskz_add_ph() {
17657 let a = _mm512_set_ph(
17658 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17659 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17660 31.0, 32.0,
17661 );
17662 let b = _mm512_set_ph(
17663 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17664 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17665 3.0, 2.0, 1.0,
17666 );
17667 let r = _mm512_maskz_add_ph(0b01010101010101010101010101010101, a, b);
17668 let e = _mm512_set_ph(
17669 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17670 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17671 );
17672 assert_eq_m512h(r, e);
17673 }
17674
17675 #[simd_test(enable = "avx512fp16")]
17676 unsafe fn test_mm512_add_round_ph() {
17677 let a = _mm512_set_ph(
17678 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17679 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17680 31.0, 32.0,
17681 );
17682 let b = _mm512_set_ph(
17683 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17684 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17685 3.0, 2.0, 1.0,
17686 );
17687 let r = _mm512_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17688 let e = _mm512_set1_ph(33.0);
17689 assert_eq_m512h(r, e);
17690 }
17691
17692 #[simd_test(enable = "avx512fp16")]
17693 unsafe fn test_mm512_mask_add_round_ph() {
17694 let a = _mm512_set_ph(
17695 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17696 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17697 31.0, 32.0,
17698 );
17699 let b = _mm512_set_ph(
17700 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17701 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17702 3.0, 2.0, 1.0,
17703 );
17704 let src = _mm512_set_ph(
17705 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17706 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17707 );
17708 let r = _mm512_mask_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17709 src,
17710 0b01010101010101010101010101010101,
17711 a,
17712 b,
17713 );
17714 let e = _mm512_set_ph(
17715 34., 33., 36., 33., 38., 33., 40., 33., 42., 33., 44., 33., 46., 33., 48., 33., 50.,
17716 33., 52., 33., 54., 33., 56., 33., 58., 33., 60., 33., 62., 33., 64., 33.,
17717 );
17718 assert_eq_m512h(r, e);
17719 }
17720
17721 #[simd_test(enable = "avx512fp16")]
17722 unsafe fn test_mm512_maskz_add_round_ph() {
17723 let a = _mm512_set_ph(
17724 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17725 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17726 31.0, 32.0,
17727 );
17728 let b = _mm512_set_ph(
17729 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17730 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17731 3.0, 2.0, 1.0,
17732 );
17733 let r = _mm512_maskz_add_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17734 0b01010101010101010101010101010101,
17735 a,
17736 b,
17737 );
17738 let e = _mm512_set_ph(
17739 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0.,
17740 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33., 0., 33.,
17741 );
17742 assert_eq_m512h(r, e);
17743 }
17744
17745 #[simd_test(enable = "avx512fp16,avx512vl")]
17746 unsafe fn test_mm_add_round_sh() {
17747 let a = _mm_set_sh(1.0);
17748 let b = _mm_set_sh(2.0);
17749 let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17750 let e = _mm_set_sh(3.0);
17751 assert_eq_m128h(r, e);
17752 }
17753
17754 #[simd_test(enable = "avx512fp16,avx512vl")]
17755 unsafe fn test_mm_mask_add_round_sh() {
17756 let a = _mm_set_sh(1.0);
17757 let b = _mm_set_sh(2.0);
17758 let src = _mm_set_sh(4.0);
17759 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17760 src, 0, a, b,
17761 );
17762 let e = _mm_set_sh(4.0);
17763 assert_eq_m128h(r, e);
17764 let r = _mm_mask_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17765 src, 1, a, b,
17766 );
17767 let e = _mm_set_sh(3.0);
17768 assert_eq_m128h(r, e);
17769 }
17770
17771 #[simd_test(enable = "avx512fp16,avx512vl")]
17772 unsafe fn test_mm_maskz_add_round_sh() {
17773 let a = _mm_set_sh(1.0);
17774 let b = _mm_set_sh(2.0);
17775 let r =
17776 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
17777 let e = _mm_set_sh(0.0);
17778 assert_eq_m128h(r, e);
17779 let r =
17780 _mm_maskz_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
17781 let e = _mm_set_sh(3.0);
17782 assert_eq_m128h(r, e);
17783 }
17784
17785 #[simd_test(enable = "avx512fp16,avx512vl")]
17786 unsafe fn test_mm_add_sh() {
17787 let a = _mm_set_sh(1.0);
17788 let b = _mm_set_sh(2.0);
17789 let r = _mm_add_sh(a, b);
17790 let e = _mm_set_sh(3.0);
17791 assert_eq_m128h(r, e);
17792 }
17793
17794 #[simd_test(enable = "avx512fp16,avx512vl")]
17795 unsafe fn test_mm_mask_add_sh() {
17796 let a = _mm_set_sh(1.0);
17797 let b = _mm_set_sh(2.0);
17798 let src = _mm_set_sh(4.0);
17799 let r = _mm_mask_add_sh(src, 0, a, b);
17800 let e = _mm_set_sh(4.0);
17801 assert_eq_m128h(r, e);
17802 let r = _mm_mask_add_sh(src, 1, a, b);
17803 let e = _mm_set_sh(3.0);
17804 assert_eq_m128h(r, e);
17805 }
17806
17807 #[simd_test(enable = "avx512fp16,avx512vl")]
17808 unsafe fn test_mm_maskz_add_sh() {
17809 let a = _mm_set_sh(1.0);
17810 let b = _mm_set_sh(2.0);
17811 let r = _mm_maskz_add_sh(0, a, b);
17812 let e = _mm_set_sh(0.0);
17813 assert_eq_m128h(r, e);
17814 let r = _mm_maskz_add_sh(1, a, b);
17815 let e = _mm_set_sh(3.0);
17816 assert_eq_m128h(r, e);
17817 }
17818
17819 #[simd_test(enable = "avx512fp16,avx512vl")]
17820 unsafe fn test_mm_sub_ph() {
17821 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17822 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17823 let r = _mm_sub_ph(a, b);
17824 let e = _mm_set_ph(-7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0);
17825 assert_eq_m128h(r, e);
17826 }
17827
17828 #[simd_test(enable = "avx512fp16,avx512vl")]
17829 unsafe fn test_mm_mask_sub_ph() {
17830 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17831 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17832 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
17833 let r = _mm_mask_sub_ph(src, 0b01010101, a, b);
17834 let e = _mm_set_ph(10., -5., 12., -1., 14., 3., 16., 7.);
17835 assert_eq_m128h(r, e);
17836 }
17837
17838 #[simd_test(enable = "avx512fp16,avx512vl")]
17839 unsafe fn test_mm_maskz_sub_ph() {
17840 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
17841 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
17842 let r = _mm_maskz_sub_ph(0b01010101, a, b);
17843 let e = _mm_set_ph(0., -5., 0., -1., 0., 3., 0., 7.);
17844 assert_eq_m128h(r, e);
17845 }
17846
17847 #[simd_test(enable = "avx512fp16,avx512vl")]
17848 unsafe fn test_mm256_sub_ph() {
17849 let a = _mm256_set_ph(
17850 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17851 );
17852 let b = _mm256_set_ph(
17853 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17854 );
17855 let r = _mm256_sub_ph(a, b);
17856 let e = _mm256_set_ph(
17857 -15.0, -13.0, -11.0, -9.0, -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0,
17858 15.0,
17859 );
17860 assert_eq_m256h(r, e);
17861 }
17862
17863 #[simd_test(enable = "avx512fp16,avx512vl")]
17864 unsafe fn test_mm256_mask_sub_ph() {
17865 let a = _mm256_set_ph(
17866 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17867 );
17868 let b = _mm256_set_ph(
17869 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17870 );
17871 let src = _mm256_set_ph(
17872 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
17873 );
17874 let r = _mm256_mask_sub_ph(src, 0b0101010101010101, a, b);
17875 let e = _mm256_set_ph(
17876 18., -13., 20., -9., 22., -5., 24., -1., 26., 3., 28., 7., 30., 11., 32., 15.,
17877 );
17878 assert_eq_m256h(r, e);
17879 }
17880
17881 #[simd_test(enable = "avx512fp16,avx512vl")]
17882 unsafe fn test_mm256_maskz_sub_ph() {
17883 let a = _mm256_set_ph(
17884 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17885 );
17886 let b = _mm256_set_ph(
17887 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
17888 );
17889 let r = _mm256_maskz_sub_ph(0b0101010101010101, a, b);
17890 let e = _mm256_set_ph(
17891 0., -13., 0., -9., 0., -5., 0., -1., 0., 3., 0., 7., 0., 11., 0., 15.,
17892 );
17893 assert_eq_m256h(r, e);
17894 }
17895
17896 #[simd_test(enable = "avx512fp16")]
17897 unsafe fn test_mm512_sub_ph() {
17898 let a = _mm512_set_ph(
17899 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17900 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17901 31.0, 32.0,
17902 );
17903 let b = _mm512_set_ph(
17904 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17905 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17906 3.0, 2.0, 1.0,
17907 );
17908 let r = _mm512_sub_ph(a, b);
17909 let e = _mm512_set_ph(
17910 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17911 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17912 23.0, 25.0, 27.0, 29.0, 31.0,
17913 );
17914 assert_eq_m512h(r, e);
17915 }
17916
17917 #[simd_test(enable = "avx512fp16")]
17918 unsafe fn test_mm512_mask_sub_ph() {
17919 let a = _mm512_set_ph(
17920 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17921 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17922 31.0, 32.0,
17923 );
17924 let b = _mm512_set_ph(
17925 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17926 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17927 3.0, 2.0, 1.0,
17928 );
17929 let src = _mm512_set_ph(
17930 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17931 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17932 );
17933 let r = _mm512_mask_sub_ph(src, 0b01010101010101010101010101010101, a, b);
17934 let e = _mm512_set_ph(
17935 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
17936 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
17937 );
17938 assert_eq_m512h(r, e);
17939 }
17940
17941 #[simd_test(enable = "avx512fp16")]
17942 unsafe fn test_mm512_maskz_sub_ph() {
17943 let a = _mm512_set_ph(
17944 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17945 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17946 31.0, 32.0,
17947 );
17948 let b = _mm512_set_ph(
17949 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17950 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17951 3.0, 2.0, 1.0,
17952 );
17953 let r = _mm512_maskz_sub_ph(0b01010101010101010101010101010101, a, b);
17954 let e = _mm512_set_ph(
17955 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
17956 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
17957 );
17958 assert_eq_m512h(r, e);
17959 }
17960
17961 #[simd_test(enable = "avx512fp16")]
17962 unsafe fn test_mm512_sub_round_ph() {
17963 let a = _mm512_set_ph(
17964 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17965 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17966 31.0, 32.0,
17967 );
17968 let b = _mm512_set_ph(
17969 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17970 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17971 3.0, 2.0, 1.0,
17972 );
17973 let r = _mm512_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
17974 let e = _mm512_set_ph(
17975 -31.0, -29.0, -27.0, -25.0, -23.0, -21.0, -19.0, -17.0, -15.0, -13.0, -11.0, -9.0,
17976 -7.0, -5.0, -3.0, -1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0,
17977 23.0, 25.0, 27.0, 29.0, 31.0,
17978 );
17979 assert_eq_m512h(r, e);
17980 }
17981
17982 #[simd_test(enable = "avx512fp16")]
17983 unsafe fn test_mm512_mask_sub_round_ph() {
17984 let a = _mm512_set_ph(
17985 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17986 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
17987 31.0, 32.0,
17988 );
17989 let b = _mm512_set_ph(
17990 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
17991 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
17992 3.0, 2.0, 1.0,
17993 );
17994 let src = _mm512_set_ph(
17995 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
17996 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
17997 );
17998 let r = _mm512_mask_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
17999 src,
18000 0b01010101010101010101010101010101,
18001 a,
18002 b,
18003 );
18004 let e = _mm512_set_ph(
18005 34., -29., 36., -25., 38., -21., 40., -17., 42., -13., 44., -9., 46., -5., 48., -1.,
18006 50., 3., 52., 7., 54., 11., 56., 15., 58., 19., 60., 23., 62., 27., 64., 31.,
18007 );
18008 assert_eq_m512h(r, e);
18009 }
18010
18011 #[simd_test(enable = "avx512fp16")]
18012 unsafe fn test_mm512_maskz_sub_round_ph() {
18013 let a = _mm512_set_ph(
18014 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18015 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18016 31.0, 32.0,
18017 );
18018 let b = _mm512_set_ph(
18019 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18020 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18021 3.0, 2.0, 1.0,
18022 );
18023 let r = _mm512_maskz_sub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18024 0b01010101010101010101010101010101,
18025 a,
18026 b,
18027 );
18028 let e = _mm512_set_ph(
18029 0., -29., 0., -25., 0., -21., 0., -17., 0., -13., 0., -9., 0., -5., 0., -1., 0., 3.,
18030 0., 7., 0., 11., 0., 15., 0., 19., 0., 23., 0., 27., 0., 31.,
18031 );
18032 assert_eq_m512h(r, e);
18033 }
18034
18035 #[simd_test(enable = "avx512fp16,avx512vl")]
18036 unsafe fn test_mm_sub_round_sh() {
18037 let a = _mm_set_sh(1.0);
18038 let b = _mm_set_sh(2.0);
18039 let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18040 let e = _mm_set_sh(-1.0);
18041 assert_eq_m128h(r, e);
18042 }
18043
18044 #[simd_test(enable = "avx512fp16,avx512vl")]
18045 unsafe fn test_mm_mask_sub_round_sh() {
18046 let a = _mm_set_sh(1.0);
18047 let b = _mm_set_sh(2.0);
18048 let src = _mm_set_sh(4.0);
18049 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18050 src, 0, a, b,
18051 );
18052 let e = _mm_set_sh(4.0);
18053 assert_eq_m128h(r, e);
18054 let r = _mm_mask_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18055 src, 1, a, b,
18056 );
18057 let e = _mm_set_sh(-1.0);
18058 assert_eq_m128h(r, e);
18059 }
18060
18061 #[simd_test(enable = "avx512fp16,avx512vl")]
18062 unsafe fn test_mm_maskz_sub_round_sh() {
18063 let a = _mm_set_sh(1.0);
18064 let b = _mm_set_sh(2.0);
18065 let r =
18066 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18067 let e = _mm_set_sh(0.0);
18068 assert_eq_m128h(r, e);
18069 let r =
18070 _mm_maskz_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18071 let e = _mm_set_sh(-1.0);
18072 assert_eq_m128h(r, e);
18073 }
18074
18075 #[simd_test(enable = "avx512fp16,avx512vl")]
18076 unsafe fn test_mm_sub_sh() {
18077 let a = _mm_set_sh(1.0);
18078 let b = _mm_set_sh(2.0);
18079 let r = _mm_sub_sh(a, b);
18080 let e = _mm_set_sh(-1.0);
18081 assert_eq_m128h(r, e);
18082 }
18083
18084 #[simd_test(enable = "avx512fp16,avx512vl")]
18085 unsafe fn test_mm_mask_sub_sh() {
18086 let a = _mm_set_sh(1.0);
18087 let b = _mm_set_sh(2.0);
18088 let src = _mm_set_sh(4.0);
18089 let r = _mm_mask_sub_sh(src, 0, a, b);
18090 let e = _mm_set_sh(4.0);
18091 assert_eq_m128h(r, e);
18092 let r = _mm_mask_sub_sh(src, 1, a, b);
18093 let e = _mm_set_sh(-1.0);
18094 assert_eq_m128h(r, e);
18095 }
18096
18097 #[simd_test(enable = "avx512fp16,avx512vl")]
18098 unsafe fn test_mm_maskz_sub_sh() {
18099 let a = _mm_set_sh(1.0);
18100 let b = _mm_set_sh(2.0);
18101 let r = _mm_maskz_sub_sh(0, a, b);
18102 let e = _mm_set_sh(0.0);
18103 assert_eq_m128h(r, e);
18104 let r = _mm_maskz_sub_sh(1, a, b);
18105 let e = _mm_set_sh(-1.0);
18106 assert_eq_m128h(r, e);
18107 }
18108
18109 #[simd_test(enable = "avx512fp16,avx512vl")]
18110 unsafe fn test_mm_mul_ph() {
18111 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18112 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18113 let r = _mm_mul_ph(a, b);
18114 let e = _mm_set_ph(8.0, 14.0, 18.0, 20.0, 20.0, 18.0, 14.0, 8.0);
18115 assert_eq_m128h(r, e);
18116 }
18117
18118 #[simd_test(enable = "avx512fp16,avx512vl")]
18119 unsafe fn test_mm_mask_mul_ph() {
18120 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18121 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18122 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
18123 let r = _mm_mask_mul_ph(src, 0b01010101, a, b);
18124 let e = _mm_set_ph(10., 14., 12., 20., 14., 18., 16., 8.);
18125 assert_eq_m128h(r, e);
18126 }
18127
18128 #[simd_test(enable = "avx512fp16,avx512vl")]
18129 unsafe fn test_mm_maskz_mul_ph() {
18130 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
18131 let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
18132 let r = _mm_maskz_mul_ph(0b01010101, a, b);
18133 let e = _mm_set_ph(0., 14., 0., 20., 0., 18., 0., 8.);
18134 assert_eq_m128h(r, e);
18135 }
18136
18137 #[simd_test(enable = "avx512fp16,avx512vl")]
18138 unsafe fn test_mm256_mul_ph() {
18139 let a = _mm256_set_ph(
18140 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18141 );
18142 let b = _mm256_set_ph(
18143 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18144 );
18145 let r = _mm256_mul_ph(a, b);
18146 let e = _mm256_set_ph(
18147 16.0, 30.0, 42.0, 52.0, 60.0, 66.0, 70.0, 72.0, 72.0, 70.0, 66.0, 60.0, 52.0, 42.0,
18148 30.0, 16.0,
18149 );
18150 assert_eq_m256h(r, e);
18151 }
18152
18153 #[simd_test(enable = "avx512fp16,avx512vl")]
18154 unsafe fn test_mm256_mask_mul_ph() {
18155 let a = _mm256_set_ph(
18156 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18157 );
18158 let b = _mm256_set_ph(
18159 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18160 );
18161 let src = _mm256_set_ph(
18162 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
18163 );
18164 let r = _mm256_mask_mul_ph(src, 0b0101010101010101, a, b);
18165 let e = _mm256_set_ph(
18166 18., 30., 20., 52., 22., 66., 24., 72., 26., 70., 28., 60., 30., 42., 32., 16.,
18167 );
18168 assert_eq_m256h(r, e);
18169 }
18170
18171 #[simd_test(enable = "avx512fp16,avx512vl")]
18172 unsafe fn test_mm256_maskz_mul_ph() {
18173 let a = _mm256_set_ph(
18174 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18175 );
18176 let b = _mm256_set_ph(
18177 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0,
18178 );
18179 let r = _mm256_maskz_mul_ph(0b0101010101010101, a, b);
18180 let e = _mm256_set_ph(
18181 0., 30., 0., 52., 0., 66., 0., 72., 0., 70., 0., 60., 0., 42., 0., 16.,
18182 );
18183 assert_eq_m256h(r, e);
18184 }
18185
18186 #[simd_test(enable = "avx512fp16")]
18187 unsafe fn test_mm512_mul_ph() {
18188 let a = _mm512_set_ph(
18189 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18190 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18191 31.0, 32.0,
18192 );
18193 let b = _mm512_set_ph(
18194 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18195 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18196 3.0, 2.0, 1.0,
18197 );
18198 let r = _mm512_mul_ph(a, b);
18199 let e = _mm512_set_ph(
18200 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18201 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18202 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18203 );
18204 assert_eq_m512h(r, e);
18205 }
18206
18207 #[simd_test(enable = "avx512fp16")]
18208 unsafe fn test_mm512_mask_mul_ph() {
18209 let a = _mm512_set_ph(
18210 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18211 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18212 31.0, 32.0,
18213 );
18214 let b = _mm512_set_ph(
18215 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18216 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18217 3.0, 2.0, 1.0,
18218 );
18219 let src = _mm512_set_ph(
18220 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18221 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18222 );
18223 let r = _mm512_mask_mul_ph(src, 0b01010101010101010101010101010101, a, b);
18224 let e = _mm512_set_ph(
18225 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18226 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18227 );
18228 assert_eq_m512h(r, e);
18229 }
18230
18231 #[simd_test(enable = "avx512fp16")]
18232 unsafe fn test_mm512_maskz_mul_ph() {
18233 let a = _mm512_set_ph(
18234 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18235 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18236 31.0, 32.0,
18237 );
18238 let b = _mm512_set_ph(
18239 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18240 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18241 3.0, 2.0, 1.0,
18242 );
18243 let r = _mm512_maskz_mul_ph(0b01010101010101010101010101010101, a, b);
18244 let e = _mm512_set_ph(
18245 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18246 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18247 );
18248 assert_eq_m512h(r, e);
18249 }
18250
18251 #[simd_test(enable = "avx512fp16")]
18252 unsafe fn test_mm512_mul_round_ph() {
18253 let a = _mm512_set_ph(
18254 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18255 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18256 31.0, 32.0,
18257 );
18258 let b = _mm512_set_ph(
18259 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18260 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18261 3.0, 2.0, 1.0,
18262 );
18263 let r = _mm512_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18264 let e = _mm512_set_ph(
18265 32.0, 62.0, 90.0, 116.0, 140.0, 162.0, 182.0, 200.0, 216.0, 230.0, 242.0, 252.0, 260.0,
18266 266.0, 270.0, 272.0, 272.0, 270.0, 266.0, 260.0, 252.0, 242.0, 230.0, 216.0, 200.0,
18267 182.0, 162.0, 140.0, 116.0, 90.0, 62.0, 32.0,
18268 );
18269 assert_eq_m512h(r, e);
18270 }
18271
18272 #[simd_test(enable = "avx512fp16")]
18273 unsafe fn test_mm512_mask_mul_round_ph() {
18274 let a = _mm512_set_ph(
18275 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18276 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18277 31.0, 32.0,
18278 );
18279 let b = _mm512_set_ph(
18280 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18281 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18282 3.0, 2.0, 1.0,
18283 );
18284 let src = _mm512_set_ph(
18285 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.,
18286 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
18287 );
18288 let r = _mm512_mask_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18289 src,
18290 0b01010101010101010101010101010101,
18291 a,
18292 b,
18293 );
18294 let e = _mm512_set_ph(
18295 34., 62., 36., 116., 38., 162., 40., 200., 42., 230., 44., 252., 46., 266., 48., 272.,
18296 50., 270., 52., 260., 54., 242., 56., 216., 58., 182., 60., 140., 62., 90., 64., 32.,
18297 );
18298 assert_eq_m512h(r, e);
18299 }
18300
18301 #[simd_test(enable = "avx512fp16")]
18302 unsafe fn test_mm512_maskz_mul_round_ph() {
18303 let a = _mm512_set_ph(
18304 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
18305 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
18306 31.0, 32.0,
18307 );
18308 let b = _mm512_set_ph(
18309 32.0, 31.0, 30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0,
18310 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0,
18311 3.0, 2.0, 1.0,
18312 );
18313 let r = _mm512_maskz_mul_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18314 0b01010101010101010101010101010101,
18315 a,
18316 b,
18317 );
18318 let e = _mm512_set_ph(
18319 0., 62., 0., 116., 0., 162., 0., 200., 0., 230., 0., 252., 0., 266., 0., 272., 0.,
18320 270., 0., 260., 0., 242., 0., 216., 0., 182., 0., 140., 0., 90., 0., 32.,
18321 );
18322 assert_eq_m512h(r, e);
18323 }
18324
18325 #[simd_test(enable = "avx512fp16,avx512vl")]
18326 unsafe fn test_mm_mul_round_sh() {
18327 let a = _mm_set_sh(1.0);
18328 let b = _mm_set_sh(2.0);
18329 let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18330 let e = _mm_set_sh(2.0);
18331 assert_eq_m128h(r, e);
18332 }
18333
18334 #[simd_test(enable = "avx512fp16,avx512vl")]
18335 unsafe fn test_mm_mask_mul_round_sh() {
18336 let a = _mm_set_sh(1.0);
18337 let b = _mm_set_sh(2.0);
18338 let src = _mm_set_sh(4.0);
18339 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18340 src, 0, a, b,
18341 );
18342 let e = _mm_set_sh(4.0);
18343 assert_eq_m128h(r, e);
18344 let r = _mm_mask_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18345 src, 1, a, b,
18346 );
18347 let e = _mm_set_sh(2.0);
18348 assert_eq_m128h(r, e);
18349 }
18350
18351 #[simd_test(enable = "avx512fp16,avx512vl")]
18352 unsafe fn test_mm_maskz_mul_round_sh() {
18353 let a = _mm_set_sh(1.0);
18354 let b = _mm_set_sh(2.0);
18355 let r =
18356 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18357 let e = _mm_set_sh(0.0);
18358 assert_eq_m128h(r, e);
18359 let r =
18360 _mm_maskz_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18361 let e = _mm_set_sh(2.0);
18362 assert_eq_m128h(r, e);
18363 }
18364
18365 #[simd_test(enable = "avx512fp16,avx512vl")]
18366 unsafe fn test_mm_mul_sh() {
18367 let a = _mm_set_sh(1.0);
18368 let b = _mm_set_sh(2.0);
18369 let r = _mm_mul_sh(a, b);
18370 let e = _mm_set_sh(2.0);
18371 assert_eq_m128h(r, e);
18372 }
18373
18374 #[simd_test(enable = "avx512fp16,avx512vl")]
18375 unsafe fn test_mm_mask_mul_sh() {
18376 let a = _mm_set_sh(1.0);
18377 let b = _mm_set_sh(2.0);
18378 let src = _mm_set_sh(4.0);
18379 let r = _mm_mask_mul_sh(src, 0, a, b);
18380 let e = _mm_set_sh(4.0);
18381 assert_eq_m128h(r, e);
18382 let r = _mm_mask_mul_sh(src, 1, a, b);
18383 let e = _mm_set_sh(2.0);
18384 assert_eq_m128h(r, e);
18385 }
18386
18387 #[simd_test(enable = "avx512fp16,avx512vl")]
18388 unsafe fn test_mm_maskz_mul_sh() {
18389 let a = _mm_set_sh(1.0);
18390 let b = _mm_set_sh(2.0);
18391 let r = _mm_maskz_mul_sh(0, a, b);
18392 let e = _mm_set_sh(0.0);
18393 assert_eq_m128h(r, e);
18394 let r = _mm_maskz_mul_sh(1, a, b);
18395 let e = _mm_set_sh(2.0);
18396 assert_eq_m128h(r, e);
18397 }
18398
18399 #[simd_test(enable = "avx512fp16,avx512vl")]
18400 unsafe fn test_mm_div_ph() {
18401 let a = _mm_set1_ph(1.0);
18402 let b = _mm_set1_ph(2.0);
18403 let r = _mm_div_ph(a, b);
18404 let e = _mm_set1_ph(0.5);
18405 assert_eq_m128h(r, e);
18406 }
18407
18408 #[simd_test(enable = "avx512fp16,avx512vl")]
18409 unsafe fn test_mm_mask_div_ph() {
18410 let a = _mm_set1_ph(1.0);
18411 let b = _mm_set1_ph(2.0);
18412 let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
18413 let r = _mm_mask_div_ph(src, 0b01010101, a, b);
18414 let e = _mm_set_ph(4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5);
18415 assert_eq_m128h(r, e);
18416 }
18417
18418 #[simd_test(enable = "avx512fp16,avx512vl")]
18419 unsafe fn test_mm_maskz_div_ph() {
18420 let a = _mm_set1_ph(1.0);
18421 let b = _mm_set1_ph(2.0);
18422 let r = _mm_maskz_div_ph(0b01010101, a, b);
18423 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
18424 assert_eq_m128h(r, e);
18425 }
18426
18427 #[simd_test(enable = "avx512fp16,avx512vl")]
18428 unsafe fn test_mm256_div_ph() {
18429 let a = _mm256_set1_ph(1.0);
18430 let b = _mm256_set1_ph(2.0);
18431 let r = _mm256_div_ph(a, b);
18432 let e = _mm256_set1_ph(0.5);
18433 assert_eq_m256h(r, e);
18434 }
18435
18436 #[simd_test(enable = "avx512fp16,avx512vl")]
18437 unsafe fn test_mm256_mask_div_ph() {
18438 let a = _mm256_set1_ph(1.0);
18439 let b = _mm256_set1_ph(2.0);
18440 let src = _mm256_set_ph(
18441 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18442 19.0,
18443 );
18444 let r = _mm256_mask_div_ph(src, 0b0101010101010101, a, b);
18445 let e = _mm256_set_ph(
18446 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18447 );
18448 assert_eq_m256h(r, e);
18449 }
18450
18451 #[simd_test(enable = "avx512fp16,avx512vl")]
18452 unsafe fn test_mm256_maskz_div_ph() {
18453 let a = _mm256_set1_ph(1.0);
18454 let b = _mm256_set1_ph(2.0);
18455 let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
18456 let e = _mm256_set_ph(
18457 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18458 );
18459 assert_eq_m256h(r, e);
18460 }
18461
18462 #[simd_test(enable = "avx512fp16")]
18463 unsafe fn test_mm512_div_ph() {
18464 let a = _mm512_set1_ph(1.0);
18465 let b = _mm512_set1_ph(2.0);
18466 let r = _mm512_div_ph(a, b);
18467 let e = _mm512_set1_ph(0.5);
18468 assert_eq_m512h(r, e);
18469 }
18470
18471 #[simd_test(enable = "avx512fp16")]
18472 unsafe fn test_mm512_mask_div_ph() {
18473 let a = _mm512_set1_ph(1.0);
18474 let b = _mm512_set1_ph(2.0);
18475 let src = _mm512_set_ph(
18476 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18477 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18478 33.0, 34.0, 35.0,
18479 );
18480 let r = _mm512_mask_div_ph(src, 0b01010101010101010101010101010101, a, b);
18481 let e = _mm512_set_ph(
18482 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18483 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18484 );
18485 assert_eq_m512h(r, e);
18486 }
18487
18488 #[simd_test(enable = "avx512fp16")]
18489 unsafe fn test_mm512_maskz_div_ph() {
18490 let a = _mm512_set1_ph(1.0);
18491 let b = _mm512_set1_ph(2.0);
18492 let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
18493 let e = _mm512_set_ph(
18494 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18495 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18496 );
18497 assert_eq_m512h(r, e);
18498 }
18499
18500 #[simd_test(enable = "avx512fp16")]
18501 unsafe fn test_mm512_div_round_ph() {
18502 let a = _mm512_set1_ph(1.0);
18503 let b = _mm512_set1_ph(2.0);
18504 let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18505 let e = _mm512_set1_ph(0.5);
18506 assert_eq_m512h(r, e);
18507 }
18508
18509 #[simd_test(enable = "avx512fp16")]
18510 unsafe fn test_mm512_mask_div_round_ph() {
18511 let a = _mm512_set1_ph(1.0);
18512 let b = _mm512_set1_ph(2.0);
18513 let src = _mm512_set_ph(
18514 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0,
18515 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0,
18516 33.0, 34.0, 35.0,
18517 );
18518 let r = _mm512_mask_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18519 src,
18520 0b01010101010101010101010101010101,
18521 a,
18522 b,
18523 );
18524 let e = _mm512_set_ph(
18525 4.0, 0.5, 6.0, 0.5, 8.0, 0.5, 10.0, 0.5, 12.0, 0.5, 14.0, 0.5, 16.0, 0.5, 18.0, 0.5,
18526 20.0, 0.5, 22.0, 0.5, 24.0, 0.5, 26.0, 0.5, 28.0, 0.5, 30.0, 0.5, 32.0, 0.5, 34.0, 0.5,
18527 );
18528 assert_eq_m512h(r, e);
18529 }
18530
18531 #[simd_test(enable = "avx512fp16")]
18532 unsafe fn test_mm512_maskz_div_round_ph() {
18533 let a = _mm512_set1_ph(1.0);
18534 let b = _mm512_set1_ph(2.0);
18535 let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18536 0b01010101010101010101010101010101,
18537 a,
18538 b,
18539 );
18540 let e = _mm512_set_ph(
18541 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
18542 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
18543 );
18544 assert_eq_m512h(r, e);
18545 }
18546
18547 #[simd_test(enable = "avx512fp16,avx512vl")]
18548 unsafe fn test_mm_div_round_sh() {
18549 let a = _mm_set_sh(1.0);
18550 let b = _mm_set_sh(2.0);
18551 let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18552 let e = _mm_set_sh(0.5);
18553 assert_eq_m128h(r, e);
18554 }
18555
18556 #[simd_test(enable = "avx512fp16,avx512vl")]
18557 unsafe fn test_mm_mask_div_round_sh() {
18558 let a = _mm_set_sh(1.0);
18559 let b = _mm_set_sh(2.0);
18560 let src = _mm_set_sh(4.0);
18561 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18562 src, 0, a, b,
18563 );
18564 let e = _mm_set_sh(4.0);
18565 assert_eq_m128h(r, e);
18566 let r = _mm_mask_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18567 src, 1, a, b,
18568 );
18569 let e = _mm_set_sh(0.5);
18570 assert_eq_m128h(r, e);
18571 }
18572
18573 #[simd_test(enable = "avx512fp16,avx512vl")]
18574 unsafe fn test_mm_maskz_div_round_sh() {
18575 let a = _mm_set_sh(1.0);
18576 let b = _mm_set_sh(2.0);
18577 let r =
18578 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18579 let e = _mm_set_sh(0.0);
18580 assert_eq_m128h(r, e);
18581 let r =
18582 _mm_maskz_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
18583 let e = _mm_set_sh(0.5);
18584 assert_eq_m128h(r, e);
18585 }
18586
18587 #[simd_test(enable = "avx512fp16,avx512vl")]
18588 unsafe fn test_mm_div_sh() {
18589 let a = _mm_set_sh(1.0);
18590 let b = _mm_set_sh(2.0);
18591 let r = _mm_div_sh(a, b);
18592 let e = _mm_set_sh(0.5);
18593 assert_eq_m128h(r, e);
18594 }
18595
18596 #[simd_test(enable = "avx512fp16,avx512vl")]
18597 unsafe fn test_mm_mask_div_sh() {
18598 let a = _mm_set_sh(1.0);
18599 let b = _mm_set_sh(2.0);
18600 let src = _mm_set_sh(4.0);
18601 let r = _mm_mask_div_sh(src, 0, a, b);
18602 let e = _mm_set_sh(4.0);
18603 assert_eq_m128h(r, e);
18604 let r = _mm_mask_div_sh(src, 1, a, b);
18605 let e = _mm_set_sh(0.5);
18606 assert_eq_m128h(r, e);
18607 }
18608
18609 #[simd_test(enable = "avx512fp16,avx512vl")]
18610 unsafe fn test_mm_maskz_div_sh() {
18611 let a = _mm_set_sh(1.0);
18612 let b = _mm_set_sh(2.0);
18613 let r = _mm_maskz_div_sh(0, a, b);
18614 let e = _mm_set_sh(0.0);
18615 assert_eq_m128h(r, e);
18616 let r = _mm_maskz_div_sh(1, a, b);
18617 let e = _mm_set_sh(0.5);
18618 assert_eq_m128h(r, e);
18619 }
18620
18621 #[simd_test(enable = "avx512fp16,avx512vl")]
18622 unsafe fn test_mm_mul_pch() {
18623 let a = _mm_set1_pch(0.0, 1.0);
18624 let b = _mm_set1_pch(0.0, 1.0);
18625 let r = _mm_mul_pch(a, b);
18626 let e = _mm_set1_pch(-1.0, 0.0);
18627 assert_eq_m128h(r, e);
18628 }
18629
18630 #[simd_test(enable = "avx512fp16,avx512vl")]
18631 unsafe fn test_mm_mask_mul_pch() {
18632 let a = _mm_set1_pch(0.0, 1.0);
18633 let b = _mm_set1_pch(0.0, 1.0);
18634 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18635 let r = _mm_mask_mul_pch(src, 0b0101, a, b);
18636 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18637 assert_eq_m128h(r, e);
18638 }
18639
18640 #[simd_test(enable = "avx512fp16,avx512vl")]
18641 unsafe fn test_mm_maskz_mul_pch() {
18642 let a = _mm_set1_pch(0.0, 1.0);
18643 let b = _mm_set1_pch(0.0, 1.0);
18644 let r = _mm_maskz_mul_pch(0b0101, a, b);
18645 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18646 assert_eq_m128h(r, e);
18647 }
18648
18649 #[simd_test(enable = "avx512fp16,avx512vl")]
18650 unsafe fn test_mm256_mul_pch() {
18651 let a = _mm256_set1_pch(0.0, 1.0);
18652 let b = _mm256_set1_pch(0.0, 1.0);
18653 let r = _mm256_mul_pch(a, b);
18654 let e = _mm256_set1_pch(-1.0, 0.0);
18655 assert_eq_m256h(r, e);
18656 }
18657
18658 #[simd_test(enable = "avx512fp16,avx512vl")]
18659 unsafe fn test_mm256_mask_mul_pch() {
18660 let a = _mm256_set1_pch(0.0, 1.0);
18661 let b = _mm256_set1_pch(0.0, 1.0);
18662 let src = _mm256_setr_ph(
18663 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18664 );
18665 let r = _mm256_mask_mul_pch(src, 0b01010101, a, b);
18666 let e = _mm256_setr_ph(
18667 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18668 );
18669 assert_eq_m256h(r, e);
18670 }
18671
18672 #[simd_test(enable = "avx512fp16,avx512vl")]
18673 unsafe fn test_mm256_maskz_mul_pch() {
18674 let a = _mm256_set1_pch(0.0, 1.0);
18675 let b = _mm256_set1_pch(0.0, 1.0);
18676 let r = _mm256_maskz_mul_pch(0b01010101, a, b);
18677 let e = _mm256_setr_ph(
18678 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18679 );
18680 assert_eq_m256h(r, e);
18681 }
18682
18683 #[simd_test(enable = "avx512fp16")]
18684 unsafe fn test_mm512_mul_pch() {
18685 let a = _mm512_set1_pch(0.0, 1.0);
18686 let b = _mm512_set1_pch(0.0, 1.0);
18687 let r = _mm512_mul_pch(a, b);
18688 let e = _mm512_set1_pch(-1.0, 0.0);
18689 assert_eq_m512h(r, e);
18690 }
18691
18692 #[simd_test(enable = "avx512fp16")]
18693 unsafe fn test_mm512_mask_mul_pch() {
18694 let a = _mm512_set1_pch(0.0, 1.0);
18695 let b = _mm512_set1_pch(0.0, 1.0);
18696 let src = _mm512_setr_ph(
18697 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18698 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18699 32.0, 33.0,
18700 );
18701 let r = _mm512_mask_mul_pch(src, 0b0101010101010101, a, b);
18702 let e = _mm512_setr_ph(
18703 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18704 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18705 33.0,
18706 );
18707 assert_eq_m512h(r, e);
18708 }
18709
18710 #[simd_test(enable = "avx512fp16")]
18711 unsafe fn test_mm512_maskz_mul_pch() {
18712 let a = _mm512_set1_pch(0.0, 1.0);
18713 let b = _mm512_set1_pch(0.0, 1.0);
18714 let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
18715 let e = _mm512_setr_ph(
18716 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18717 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18718 );
18719 assert_eq_m512h(r, e);
18720 }
18721
18722 #[simd_test(enable = "avx512fp16")]
18723 unsafe fn test_mm512_mul_round_pch() {
18724 let a = _mm512_set1_pch(0.0, 1.0);
18725 let b = _mm512_set1_pch(0.0, 1.0);
18726 let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18727 let e = _mm512_set1_pch(-1.0, 0.0);
18728 assert_eq_m512h(r, e);
18729 }
18730
18731 #[simd_test(enable = "avx512fp16")]
18732 unsafe fn test_mm512_mask_mul_round_pch() {
18733 let a = _mm512_set1_pch(0.0, 1.0);
18734 let b = _mm512_set1_pch(0.0, 1.0);
18735 let src = _mm512_setr_ph(
18736 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18737 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18738 32.0, 33.0,
18739 );
18740 let r = _mm512_mask_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18741 src,
18742 0b0101010101010101,
18743 a,
18744 b,
18745 );
18746 let e = _mm512_setr_ph(
18747 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18748 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18749 33.0,
18750 );
18751 assert_eq_m512h(r, e);
18752 }
18753
18754 #[simd_test(enable = "avx512fp16")]
18755 unsafe fn test_mm512_maskz_mul_round_pch() {
18756 let a = _mm512_set1_pch(0.0, 1.0);
18757 let b = _mm512_set1_pch(0.0, 1.0);
18758 let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18759 0b0101010101010101,
18760 a,
18761 b,
18762 );
18763 let e = _mm512_setr_ph(
18764 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18765 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18766 );
18767 assert_eq_m512h(r, e);
18768 }
18769
18770 #[simd_test(enable = "avx512fp16,avx512vl")]
18771 unsafe fn test_mm_mul_round_sch() {
18772 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18773 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18774 let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18775 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18776 assert_eq_m128h(r, e);
18777 }
18778
18779 #[simd_test(enable = "avx512fp16,avx512vl")]
18780 unsafe fn test_mm_mask_mul_round_sch() {
18781 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18782 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18783 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18784 let r = _mm_mask_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18785 src, 0, a, b,
18786 );
18787 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18788 assert_eq_m128h(r, e);
18789 }
18790
18791 #[simd_test(enable = "avx512fp16,avx512vl")]
18792 unsafe fn test_mm_maskz_mul_round_sch() {
18793 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18794 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18795 let r =
18796 _mm_maskz_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
18797 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18798 assert_eq_m128h(r, e);
18799 }
18800
18801 #[simd_test(enable = "avx512fp16,avx512vl")]
18802 unsafe fn test_mm_mul_sch() {
18803 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18804 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18805 let r = _mm_mul_sch(a, b);
18806 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18807 assert_eq_m128h(r, e);
18808 }
18809
18810 #[simd_test(enable = "avx512fp16,avx512vl")]
18811 unsafe fn test_mm_mask_mul_sch() {
18812 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18813 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18814 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18815 let r = _mm_mask_mul_sch(src, 0, a, b);
18816 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18817 assert_eq_m128h(r, e);
18818 }
18819
18820 #[simd_test(enable = "avx512fp16,avx512vl")]
18821 unsafe fn test_mm_maskz_mul_sch() {
18822 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18823 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18824 let r = _mm_maskz_mul_sch(0, a, b);
18825 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18826 assert_eq_m128h(r, e);
18827 }
18828
18829 #[simd_test(enable = "avx512fp16,avx512vl")]
18830 unsafe fn test_mm_fmul_pch() {
18831 let a = _mm_set1_pch(0.0, 1.0);
18832 let b = _mm_set1_pch(0.0, 1.0);
18833 let r = _mm_fmul_pch(a, b);
18834 let e = _mm_set1_pch(-1.0, 0.0);
18835 assert_eq_m128h(r, e);
18836 }
18837
18838 #[simd_test(enable = "avx512fp16,avx512vl")]
18839 unsafe fn test_mm_mask_fmul_pch() {
18840 let a = _mm_set1_pch(0.0, 1.0);
18841 let b = _mm_set1_pch(0.0, 1.0);
18842 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
18843 let r = _mm_mask_fmul_pch(src, 0b0101, a, b);
18844 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
18845 assert_eq_m128h(r, e);
18846 }
18847
18848 #[simd_test(enable = "avx512fp16,avx512vl")]
18849 unsafe fn test_mm_maskz_fmul_pch() {
18850 let a = _mm_set1_pch(0.0, 1.0);
18851 let b = _mm_set1_pch(0.0, 1.0);
18852 let r = _mm_maskz_fmul_pch(0b0101, a, b);
18853 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
18854 assert_eq_m128h(r, e);
18855 }
18856
18857 #[simd_test(enable = "avx512fp16,avx512vl")]
18858 unsafe fn test_mm256_fmul_pch() {
18859 let a = _mm256_set1_pch(0.0, 1.0);
18860 let b = _mm256_set1_pch(0.0, 1.0);
18861 let r = _mm256_fmul_pch(a, b);
18862 let e = _mm256_set1_pch(-1.0, 0.0);
18863 assert_eq_m256h(r, e);
18864 }
18865
18866 #[simd_test(enable = "avx512fp16,avx512vl")]
18867 unsafe fn test_mm256_mask_fmul_pch() {
18868 let a = _mm256_set1_pch(0.0, 1.0);
18869 let b = _mm256_set1_pch(0.0, 1.0);
18870 let src = _mm256_setr_ph(
18871 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18872 );
18873 let r = _mm256_mask_fmul_pch(src, 0b01010101, a, b);
18874 let e = _mm256_setr_ph(
18875 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18876 );
18877 assert_eq_m256h(r, e);
18878 }
18879
18880 #[simd_test(enable = "avx512fp16,avx512vl")]
18881 unsafe fn test_mm256_maskz_fmul_pch() {
18882 let a = _mm256_set1_pch(0.0, 1.0);
18883 let b = _mm256_set1_pch(0.0, 1.0);
18884 let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
18885 let e = _mm256_setr_ph(
18886 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18887 );
18888 assert_eq_m256h(r, e);
18889 }
18890
18891 #[simd_test(enable = "avx512fp16")]
18892 unsafe fn test_mm512_fmul_pch() {
18893 let a = _mm512_set1_pch(0.0, 1.0);
18894 let b = _mm512_set1_pch(0.0, 1.0);
18895 let r = _mm512_fmul_pch(a, b);
18896 let e = _mm512_set1_pch(-1.0, 0.0);
18897 assert_eq_m512h(r, e);
18898 }
18899
18900 #[simd_test(enable = "avx512fp16")]
18901 unsafe fn test_mm512_mask_fmul_pch() {
18902 let a = _mm512_set1_pch(0.0, 1.0);
18903 let b = _mm512_set1_pch(0.0, 1.0);
18904 let src = _mm512_setr_ph(
18905 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18906 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18907 32.0, 33.0,
18908 );
18909 let r = _mm512_mask_fmul_pch(src, 0b0101010101010101, a, b);
18910 let e = _mm512_setr_ph(
18911 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18912 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18913 33.0,
18914 );
18915 assert_eq_m512h(r, e);
18916 }
18917
18918 #[simd_test(enable = "avx512fp16")]
18919 unsafe fn test_mm512_maskz_fmul_pch() {
18920 let a = _mm512_set1_pch(0.0, 1.0);
18921 let b = _mm512_set1_pch(0.0, 1.0);
18922 let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
18923 let e = _mm512_setr_ph(
18924 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18925 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18926 );
18927 assert_eq_m512h(r, e);
18928 }
18929
18930 #[simd_test(enable = "avx512fp16")]
18931 unsafe fn test_mm512_fmul_round_pch() {
18932 let a = _mm512_set1_pch(0.0, 1.0);
18933 let b = _mm512_set1_pch(0.0, 1.0);
18934 let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18935 let e = _mm512_set1_pch(-1.0, 0.0);
18936 assert_eq_m512h(r, e);
18937 }
18938
18939 #[simd_test(enable = "avx512fp16")]
18940 unsafe fn test_mm512_mask_fmul_round_pch() {
18941 let a = _mm512_set1_pch(0.0, 1.0);
18942 let b = _mm512_set1_pch(0.0, 1.0);
18943 let src = _mm512_setr_ph(
18944 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
18945 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
18946 32.0, 33.0,
18947 );
18948 let r = _mm512_mask_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18949 src,
18950 0b0101010101010101,
18951 a,
18952 b,
18953 );
18954 let e = _mm512_setr_ph(
18955 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
18956 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
18957 33.0,
18958 );
18959 assert_eq_m512h(r, e);
18960 }
18961
18962 #[simd_test(enable = "avx512fp16")]
18963 unsafe fn test_mm512_maskz_fmul_round_pch() {
18964 let a = _mm512_set1_pch(0.0, 1.0);
18965 let b = _mm512_set1_pch(0.0, 1.0);
18966 let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18967 0b0101010101010101,
18968 a,
18969 b,
18970 );
18971 let e = _mm512_setr_ph(
18972 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18973 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
18974 );
18975 assert_eq_m512h(r, e);
18976 }
18977
18978 #[simd_test(enable = "avx512fp16,avx512vl")]
18979 unsafe fn test_mm_fmul_round_sch() {
18980 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18981 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18982 let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
18983 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18984 assert_eq_m128h(r, e);
18985 }
18986
18987 #[simd_test(enable = "avx512fp16,avx512vl")]
18988 unsafe fn test_mm_mask_fmul_round_sch() {
18989 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18990 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
18991 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
18992 let r = _mm_mask_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
18993 src, 0, a, b,
18994 );
18995 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
18996 assert_eq_m128h(r, e);
18997 }
18998
18999 #[simd_test(enable = "avx512fp16,avx512vl")]
19000 unsafe fn test_mm_maskz_fmul_round_sch() {
19001 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19002 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19003 let r =
19004 _mm_maskz_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19005 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19006 assert_eq_m128h(r, e);
19007 }
19008
19009 #[simd_test(enable = "avx512fp16,avx512vl")]
19010 unsafe fn test_mm_fmul_sch() {
19011 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19012 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19013 let r = _mm_fmul_sch(a, b);
19014 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19015 assert_eq_m128h(r, e);
19016 }
19017
19018 #[simd_test(enable = "avx512fp16,avx512vl")]
19019 unsafe fn test_mm_mask_fmul_sch() {
19020 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19021 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19022 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19023 let r = _mm_mask_fmul_sch(src, 0, a, b);
19024 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19025 assert_eq_m128h(r, e);
19026 }
19027
19028 #[simd_test(enable = "avx512fp16,avx512vl")]
19029 unsafe fn test_mm_maskz_fmul_sch() {
19030 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19031 let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19032 let r = _mm_maskz_fmul_sch(0, a, b);
19033 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19034 assert_eq_m128h(r, e);
19035 }
19036
19037 #[simd_test(enable = "avx512fp16,avx512vl")]
19038 unsafe fn test_mm_cmul_pch() {
19039 let a = _mm_set1_pch(0.0, 1.0);
19040 let b = _mm_set1_pch(0.0, -1.0);
19041 let r = _mm_cmul_pch(a, b);
19042 let e = _mm_set1_pch(-1.0, 0.0);
19043 assert_eq_m128h(r, e);
19044 }
19045
19046 #[simd_test(enable = "avx512fp16,avx512vl")]
19047 unsafe fn test_mm_mask_cmul_pch() {
19048 let a = _mm_set1_pch(0.0, 1.0);
19049 let b = _mm_set1_pch(0.0, -1.0);
19050 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19051 let r = _mm_mask_cmul_pch(src, 0b0101, a, b);
19052 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19053 assert_eq_m128h(r, e);
19054 }
19055
19056 #[simd_test(enable = "avx512fp16,avx512vl")]
19057 unsafe fn test_mm_maskz_cmul_pch() {
19058 let a = _mm_set1_pch(0.0, 1.0);
19059 let b = _mm_set1_pch(0.0, -1.0);
19060 let r = _mm_maskz_cmul_pch(0b0101, a, b);
19061 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19062 assert_eq_m128h(r, e);
19063 }
19064
19065 #[simd_test(enable = "avx512fp16,avx512vl")]
19066 unsafe fn test_mm256_cmul_pch() {
19067 let a = _mm256_set1_pch(0.0, 1.0);
19068 let b = _mm256_set1_pch(0.0, -1.0);
19069 let r = _mm256_cmul_pch(a, b);
19070 let e = _mm256_set1_pch(-1.0, 0.0);
19071 assert_eq_m256h(r, e);
19072 }
19073
19074 #[simd_test(enable = "avx512fp16,avx512vl")]
19075 unsafe fn test_mm256_mask_cmul_pch() {
19076 let a = _mm256_set1_pch(0.0, 1.0);
19077 let b = _mm256_set1_pch(0.0, -1.0);
19078 let src = _mm256_setr_ph(
19079 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19080 );
19081 let r = _mm256_mask_cmul_pch(src, 0b01010101, a, b);
19082 let e = _mm256_setr_ph(
19083 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19084 );
19085 assert_eq_m256h(r, e);
19086 }
19087
19088 #[simd_test(enable = "avx512fp16,avx512vl")]
19089 unsafe fn test_mm256_maskz_cmul_pch() {
19090 let a = _mm256_set1_pch(0.0, 1.0);
19091 let b = _mm256_set1_pch(0.0, -1.0);
19092 let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
19093 let e = _mm256_setr_ph(
19094 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19095 );
19096 assert_eq_m256h(r, e);
19097 }
19098
19099 #[simd_test(enable = "avx512fp16")]
19100 unsafe fn test_mm512_cmul_pch() {
19101 let a = _mm512_set1_pch(0.0, 1.0);
19102 let b = _mm512_set1_pch(0.0, -1.0);
19103 let r = _mm512_cmul_pch(a, b);
19104 let e = _mm512_set1_pch(-1.0, 0.0);
19105 assert_eq_m512h(r, e);
19106 }
19107
19108 #[simd_test(enable = "avx512fp16")]
19109 unsafe fn test_mm512_mask_cmul_pch() {
19110 let a = _mm512_set1_pch(0.0, 1.0);
19111 let b = _mm512_set1_pch(0.0, -1.0);
19112 let src = _mm512_setr_ph(
19113 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19114 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19115 32.0, 33.0,
19116 );
19117 let r = _mm512_mask_cmul_pch(src, 0b0101010101010101, a, b);
19118 let e = _mm512_setr_ph(
19119 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19120 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19121 33.0,
19122 );
19123 assert_eq_m512h(r, e);
19124 }
19125
19126 #[simd_test(enable = "avx512fp16")]
19127 unsafe fn test_mm512_maskz_cmul_pch() {
19128 let a = _mm512_set1_pch(0.0, 1.0);
19129 let b = _mm512_set1_pch(0.0, -1.0);
19130 let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
19131 let e = _mm512_setr_ph(
19132 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19133 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19134 );
19135 assert_eq_m512h(r, e);
19136 }
19137
19138 #[simd_test(enable = "avx512fp16")]
19139 unsafe fn test_mm512_cmul_round_pch() {
19140 let a = _mm512_set1_pch(0.0, 1.0);
19141 let b = _mm512_set1_pch(0.0, -1.0);
19142 let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19143 let e = _mm512_set1_pch(-1.0, 0.0);
19144 assert_eq_m512h(r, e);
19145 }
19146
19147 #[simd_test(enable = "avx512fp16")]
19148 unsafe fn test_mm512_mask_cmul_round_pch() {
19149 let a = _mm512_set1_pch(0.0, 1.0);
19150 let b = _mm512_set1_pch(0.0, -1.0);
19151 let src = _mm512_setr_ph(
19152 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19153 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19154 32.0, 33.0,
19155 );
19156 let r = _mm512_mask_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19157 src,
19158 0b0101010101010101,
19159 a,
19160 b,
19161 );
19162 let e = _mm512_setr_ph(
19163 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19164 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19165 33.0,
19166 );
19167 assert_eq_m512h(r, e);
19168 }
19169
19170 #[simd_test(enable = "avx512fp16")]
19171 unsafe fn test_mm512_maskz_cmul_round_pch() {
19172 let a = _mm512_set1_pch(0.0, 1.0);
19173 let b = _mm512_set1_pch(0.0, -1.0);
19174 let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19175 0b0101010101010101,
19176 a,
19177 b,
19178 );
19179 let e = _mm512_setr_ph(
19180 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19181 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19182 );
19183 assert_eq_m512h(r, e);
19184 }
19185
19186 #[simd_test(enable = "avx512fp16,avx512vl")]
19187 unsafe fn test_mm_cmul_sch() {
19188 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19189 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19190 let r = _mm_cmul_sch(a, b);
19191 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19192 assert_eq_m128h(r, e);
19193 }
19194
19195 #[simd_test(enable = "avx512fp16,avx512vl")]
19196 unsafe fn test_mm_mask_cmul_sch() {
19197 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19198 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19199 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19200 let r = _mm_mask_cmul_sch(src, 0, a, b);
19201 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19202 assert_eq_m128h(r, e);
19203 }
19204
19205 #[simd_test(enable = "avx512fp16,avx512vl")]
19206 unsafe fn test_mm_maskz_cmul_sch() {
19207 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19208 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19209 let r = _mm_maskz_cmul_sch(0, a, b);
19210 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19211 assert_eq_m128h(r, e);
19212 }
19213
19214 #[simd_test(enable = "avx512fp16,avx512vl")]
19215 unsafe fn test_mm_cmul_round_sch() {
19216 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19217 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19218 let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19219 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19220 assert_eq_m128h(r, e);
19221 }
19222
19223 #[simd_test(enable = "avx512fp16,avx512vl")]
19224 unsafe fn test_mm_mask_cmul_round_sch() {
19225 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19226 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19227 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19228 let r = _mm_mask_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19229 src, 0, a, b,
19230 );
19231 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19232 assert_eq_m128h(r, e);
19233 }
19234
19235 #[simd_test(enable = "avx512fp16,avx512vl")]
19236 unsafe fn test_mm_maskz_cmul_round_sch() {
19237 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19238 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19239 let r =
19240 _mm_maskz_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19241 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19242 assert_eq_m128h(r, e);
19243 }
19244
19245 #[simd_test(enable = "avx512fp16,avx512vl")]
19246 unsafe fn test_mm_fcmul_pch() {
19247 let a = _mm_set1_pch(0.0, 1.0);
19248 let b = _mm_set1_pch(0.0, -1.0);
19249 let r = _mm_fcmul_pch(a, b);
19250 let e = _mm_set1_pch(-1.0, 0.0);
19251 assert_eq_m128h(r, e);
19252 }
19253
19254 #[simd_test(enable = "avx512fp16,avx512vl")]
19255 unsafe fn test_mm_mask_fcmul_pch() {
19256 let a = _mm_set1_pch(0.0, 1.0);
19257 let b = _mm_set1_pch(0.0, -1.0);
19258 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19259 let r = _mm_mask_fcmul_pch(src, 0b0101, a, b);
19260 let e = _mm_setr_ph(-1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0);
19261 assert_eq_m128h(r, e);
19262 }
19263
19264 #[simd_test(enable = "avx512fp16,avx512vl")]
19265 unsafe fn test_mm_maskz_fcmul_pch() {
19266 let a = _mm_set1_pch(0.0, 1.0);
19267 let b = _mm_set1_pch(0.0, -1.0);
19268 let r = _mm_maskz_fcmul_pch(0b0101, a, b);
19269 let e = _mm_setr_ph(-1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0);
19270 assert_eq_m128h(r, e);
19271 }
19272
19273 #[simd_test(enable = "avx512fp16,avx512vl")]
19274 unsafe fn test_mm256_fcmul_pch() {
19275 let a = _mm256_set1_pch(0.0, 1.0);
19276 let b = _mm256_set1_pch(0.0, -1.0);
19277 let r = _mm256_fcmul_pch(a, b);
19278 let e = _mm256_set1_pch(-1.0, 0.0);
19279 assert_eq_m256h(r, e);
19280 }
19281
19282 #[simd_test(enable = "avx512fp16,avx512vl")]
19283 unsafe fn test_mm256_mask_fcmul_pch() {
19284 let a = _mm256_set1_pch(0.0, 1.0);
19285 let b = _mm256_set1_pch(0.0, -1.0);
19286 let src = _mm256_setr_ph(
19287 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19288 );
19289 let r = _mm256_mask_fcmul_pch(src, 0b01010101, a, b);
19290 let e = _mm256_setr_ph(
19291 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19292 );
19293 assert_eq_m256h(r, e);
19294 }
19295
19296 #[simd_test(enable = "avx512fp16,avx512vl")]
19297 unsafe fn test_mm256_maskz_fcmul_pch() {
19298 let a = _mm256_set1_pch(0.0, 1.0);
19299 let b = _mm256_set1_pch(0.0, -1.0);
19300 let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
19301 let e = _mm256_setr_ph(
19302 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19303 );
19304 assert_eq_m256h(r, e);
19305 }
19306
19307 #[simd_test(enable = "avx512fp16")]
19308 unsafe fn test_mm512_fcmul_pch() {
19309 let a = _mm512_set1_pch(0.0, 1.0);
19310 let b = _mm512_set1_pch(0.0, -1.0);
19311 let r = _mm512_fcmul_pch(a, b);
19312 let e = _mm512_set1_pch(-1.0, 0.0);
19313 assert_eq_m512h(r, e);
19314 }
19315
19316 #[simd_test(enable = "avx512fp16")]
19317 unsafe fn test_mm512_mask_fcmul_pch() {
19318 let a = _mm512_set1_pch(0.0, 1.0);
19319 let b = _mm512_set1_pch(0.0, -1.0);
19320 let src = _mm512_setr_ph(
19321 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19322 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19323 32.0, 33.0,
19324 );
19325 let r = _mm512_mask_fcmul_pch(src, 0b0101010101010101, a, b);
19326 let e = _mm512_setr_ph(
19327 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19328 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19329 33.0,
19330 );
19331 assert_eq_m512h(r, e);
19332 }
19333
19334 #[simd_test(enable = "avx512fp16")]
19335 unsafe fn test_mm512_maskz_fcmul_pch() {
19336 let a = _mm512_set1_pch(0.0, 1.0);
19337 let b = _mm512_set1_pch(0.0, -1.0);
19338 let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
19339 let e = _mm512_setr_ph(
19340 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19341 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19342 );
19343 assert_eq_m512h(r, e);
19344 }
19345
19346 #[simd_test(enable = "avx512fp16")]
19347 unsafe fn test_mm512_fcmul_round_pch() {
19348 let a = _mm512_set1_pch(0.0, 1.0);
19349 let b = _mm512_set1_pch(0.0, -1.0);
19350 let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19351 let e = _mm512_set1_pch(-1.0, 0.0);
19352 assert_eq_m512h(r, e);
19353 }
19354
19355 #[simd_test(enable = "avx512fp16")]
19356 unsafe fn test_mm512_mask_fcmul_round_pch() {
19357 let a = _mm512_set1_pch(0.0, 1.0);
19358 let b = _mm512_set1_pch(0.0, -1.0);
19359 let src = _mm512_setr_ph(
19360 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19361 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19362 32.0, 33.0,
19363 );
19364 let r = _mm512_mask_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19365 src,
19366 0b0101010101010101,
19367 a,
19368 b,
19369 );
19370 let e = _mm512_setr_ph(
19371 -1.0, 0.0, 4.0, 5.0, -1.0, 0.0, 8.0, 9.0, -1.0, 0.0, 12.0, 13.0, -1.0, 0.0, 16.0, 17.0,
19372 -1.0, 0.0, 20.0, 21.0, -1.0, 0.0, 24.0, 25.0, -1.0, 0.0, 28.0, 29.0, -1.0, 0.0, 32.0,
19373 33.0,
19374 );
19375 assert_eq_m512h(r, e);
19376 }
19377
19378 #[simd_test(enable = "avx512fp16")]
19379 unsafe fn test_mm512_maskz_fcmul_round_pch() {
19380 let a = _mm512_set1_pch(0.0, 1.0);
19381 let b = _mm512_set1_pch(0.0, -1.0);
19382 let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19383 0b0101010101010101,
19384 a,
19385 b,
19386 );
19387 let e = _mm512_setr_ph(
19388 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19389 -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0,
19390 );
19391 assert_eq_m512h(r, e);
19392 }
19393
19394 #[simd_test(enable = "avx512fp16,avx512vl")]
19395 unsafe fn test_mm_fcmul_sch() {
19396 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19397 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19398 let r = _mm_fcmul_sch(a, b);
19399 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19400 assert_eq_m128h(r, e);
19401 }
19402
19403 #[simd_test(enable = "avx512fp16,avx512vl")]
19404 unsafe fn test_mm_mask_fcmul_sch() {
19405 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19406 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19407 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19408 let r = _mm_mask_fcmul_sch(src, 0, a, b);
19409 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19410 assert_eq_m128h(r, e);
19411 }
19412
19413 #[simd_test(enable = "avx512fp16,avx512vl")]
19414 unsafe fn test_mm_maskz_fcmul_sch() {
19415 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19416 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19417 let r = _mm_maskz_fcmul_sch(0, a, b);
19418 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19419 assert_eq_m128h(r, e);
19420 }
19421
19422 #[simd_test(enable = "avx512fp16,avx512vl")]
19423 unsafe fn test_mm_fcmul_round_sch() {
19424 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19425 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19426 let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
19427 let e = _mm_setr_ph(-1.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19428 assert_eq_m128h(r, e);
19429 }
19430
19431 #[simd_test(enable = "avx512fp16,avx512vl")]
19432 unsafe fn test_mm_mask_fcmul_round_sch() {
19433 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19434 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19435 let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
19436 let r = _mm_mask_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19437 src, 0, a, b,
19438 );
19439 let e = _mm_setr_ph(14.0, 15.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19440 assert_eq_m128h(r, e);
19441 }
19442
19443 #[simd_test(enable = "avx512fp16,avx512vl")]
19444 unsafe fn test_mm_maskz_fcmul_round_sch() {
19445 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19446 let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
19447 let r =
19448 _mm_maskz_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
19449 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19450 assert_eq_m128h(r, e);
19451 }
19452
19453 #[simd_test(enable = "avx512fp16,avx512vl")]
19454 unsafe fn test_mm_abs_ph() {
19455 let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
19456 let r = _mm_abs_ph(a);
19457 let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
19458 assert_eq_m128h(r, e);
19459 }
19460
19461 #[simd_test(enable = "avx512fp16,avx512vl")]
19462 unsafe fn test_mm256_abs_ph() {
19463 let a = _mm256_set_ph(
19464 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19465 -14.0,
19466 );
19467 let r = _mm256_abs_ph(a);
19468 let e = _mm256_set_ph(
19469 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19470 );
19471 assert_eq_m256h(r, e);
19472 }
19473
19474 #[simd_test(enable = "avx512fp16")]
19475 unsafe fn test_mm512_abs_ph() {
19476 let a = _mm512_set_ph(
19477 -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
19478 -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
19479 27.0, -28.0, 29.0, -30.0,
19480 );
19481 let r = _mm512_abs_ph(a);
19482 let e = _mm512_set_ph(
19483 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
19484 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0,
19485 29.0, 30.0,
19486 );
19487 assert_eq_m512h(r, e);
19488 }
19489
19490 #[simd_test(enable = "avx512fp16,avx512vl")]
19491 unsafe fn test_mm_conj_pch() {
19492 let a = _mm_set1_pch(0.0, 1.0);
19493 let r = _mm_conj_pch(a);
19494 let e = _mm_set1_pch(0.0, -1.0);
19495 assert_eq_m128h(r, e);
19496 }
19497
19498 #[simd_test(enable = "avx512fp16,avx512vl")]
19499 unsafe fn test_mm_mask_conj_pch() {
19500 let a = _mm_set1_pch(0.0, 1.0);
19501 let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
19502 let r = _mm_mask_conj_pch(src, 0b0101, a);
19503 let e = _mm_setr_ph(0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0);
19504 assert_eq_m128h(r, e);
19505 }
19506
19507 #[simd_test(enable = "avx512fp16,avx512vl")]
19508 unsafe fn test_mm_maskz_conj_pch() {
19509 let a = _mm_set1_pch(0.0, 1.0);
19510 let r = _mm_maskz_conj_pch(0b0101, a);
19511 let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
19512 assert_eq_m128h(r, e);
19513 }
19514
19515 #[simd_test(enable = "avx512fp16,avx512vl")]
19516 unsafe fn test_mm256_conj_pch() {
19517 let a = _mm256_set1_pch(0.0, 1.0);
19518 let r = _mm256_conj_pch(a);
19519 let e = _mm256_set1_pch(0.0, -1.0);
19520 assert_eq_m256h(r, e);
19521 }
19522
19523 #[simd_test(enable = "avx512fp16,avx512vl")]
19524 unsafe fn test_mm256_mask_conj_pch() {
19525 let a = _mm256_set1_pch(0.0, 1.0);
19526 let src = _mm256_setr_ph(
19527 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19528 );
19529 let r = _mm256_mask_conj_pch(src, 0b01010101, a);
19530 let e = _mm256_setr_ph(
19531 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19532 );
19533 assert_eq_m256h(r, e);
19534 }
19535
19536 #[simd_test(enable = "avx512fp16,avx512vl")]
19537 unsafe fn test_mm256_maskz_conj_pch() {
19538 let a = _mm256_set1_pch(0.0, 1.0);
19539 let r = _mm256_maskz_conj_pch(0b01010101, a);
19540 let e = _mm256_setr_ph(
19541 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19542 );
19543 assert_eq_m256h(r, e);
19544 }
19545
19546 #[simd_test(enable = "avx512fp16")]
19547 unsafe fn test_mm512_conj_pch() {
19548 let a = _mm512_set1_pch(0.0, 1.0);
19549 let r = _mm512_conj_pch(a);
19550 let e = _mm512_set1_pch(0.0, -1.0);
19551 assert_eq_m512h(r, e);
19552 }
19553
19554 #[simd_test(enable = "avx512fp16")]
19555 unsafe fn test_mm512_mask_conj_pch() {
19556 let a = _mm512_set1_pch(0.0, 1.0);
19557 let src = _mm512_setr_ph(
19558 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
19559 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
19560 32.0, 33.0,
19561 );
19562 let r = _mm512_mask_conj_pch(src, 0b0101010101010101, a);
19563 let e = _mm512_setr_ph(
19564 0.0, -1.0, 4.0, 5.0, 0.0, -1.0, 8.0, 9.0, 0.0, -1.0, 12.0, 13.0, 0.0, -1.0, 16.0, 17.0,
19565 0.0, -1.0, 20.0, 21.0, 0.0, -1.0, 24.0, 25.0, 0.0, -1.0, 28.0, 29.0, 0.0, -1.0, 32.0,
19566 33.0,
19567 );
19568 assert_eq_m512h(r, e);
19569 }
19570
19571 #[simd_test(enable = "avx512fp16")]
19572 unsafe fn test_mm512_maskz_conj_pch() {
19573 let a = _mm512_set1_pch(0.0, 1.0);
19574 let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
19575 let e = _mm512_setr_ph(
19576 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19577 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0,
19578 );
19579 assert_eq_m512h(r, e);
19580 }
19581
19582 #[simd_test(enable = "avx512fp16,avx512vl")]
19583 unsafe fn test_mm_fmadd_pch() {
19584 let a = _mm_set1_pch(0.0, 1.0);
19585 let b = _mm_set1_pch(0.0, 2.0);
19586 let c = _mm_set1_pch(0.0, 3.0);
19587 let r = _mm_fmadd_pch(a, b, c);
19588 let e = _mm_set1_pch(-2.0, 3.0);
19589 assert_eq_m128h(r, e);
19590 }
19591
19592 #[simd_test(enable = "avx512fp16,avx512vl")]
19593 unsafe fn test_mm_mask_fmadd_pch() {
19594 let a = _mm_set1_pch(0.0, 1.0);
19595 let b = _mm_set1_pch(0.0, 2.0);
19596 let c = _mm_set1_pch(0.0, 3.0);
19597 let r = _mm_mask_fmadd_pch(a, 0b0101, b, c);
19598 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0);
19599 assert_eq_m128h(r, e);
19600 }
19601
19602 #[simd_test(enable = "avx512fp16,avx512vl")]
19603 unsafe fn test_mm_mask3_fmadd_pch() {
19604 let a = _mm_set1_pch(0.0, 1.0);
19605 let b = _mm_set1_pch(0.0, 2.0);
19606 let c = _mm_set1_pch(0.0, 3.0);
19607 let r = _mm_mask3_fmadd_pch(a, b, c, 0b0101);
19608 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0);
19609 assert_eq_m128h(r, e);
19610 }
19611
19612 #[simd_test(enable = "avx512fp16,avx512vl")]
19613 unsafe fn test_mm_maskz_fmadd_pch() {
19614 let a = _mm_set1_pch(0.0, 1.0);
19615 let b = _mm_set1_pch(0.0, 2.0);
19616 let c = _mm_set1_pch(0.0, 3.0);
19617 let r = _mm_maskz_fmadd_pch(0b0101, a, b, c);
19618 let e = _mm_setr_ph(-2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0);
19619 assert_eq_m128h(r, e);
19620 }
19621
19622 #[simd_test(enable = "avx512fp16,avx512vl")]
19623 unsafe fn test_mm256_fmadd_pch() {
19624 let a = _mm256_set1_pch(0.0, 1.0);
19625 let b = _mm256_set1_pch(0.0, 2.0);
19626 let c = _mm256_set1_pch(0.0, 3.0);
19627 let r = _mm256_fmadd_pch(a, b, c);
19628 let e = _mm256_set1_pch(-2.0, 3.0);
19629 assert_eq_m256h(r, e);
19630 }
19631
19632 #[simd_test(enable = "avx512fp16,avx512vl")]
19633 unsafe fn test_mm256_mask_fmadd_pch() {
19634 let a = _mm256_set1_pch(0.0, 1.0);
19635 let b = _mm256_set1_pch(0.0, 2.0);
19636 let c = _mm256_set1_pch(0.0, 3.0);
19637 let r = _mm256_mask_fmadd_pch(a, 0b01010101, b, c);
19638 let e = _mm256_setr_ph(
19639 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19640 );
19641 assert_eq_m256h(r, e);
19642 }
19643
19644 #[simd_test(enable = "avx512fp16,avx512vl")]
19645 unsafe fn test_mm256_mask3_fmadd_pch() {
19646 let a = _mm256_set1_pch(0.0, 1.0);
19647 let b = _mm256_set1_pch(0.0, 2.0);
19648 let c = _mm256_set1_pch(0.0, 3.0);
19649 let r = _mm256_mask3_fmadd_pch(a, b, c, 0b01010101);
19650 let e = _mm256_setr_ph(
19651 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19652 );
19653 assert_eq_m256h(r, e);
19654 }
19655
19656 #[simd_test(enable = "avx512fp16,avx512vl")]
19657 unsafe fn test_mm256_maskz_fmadd_pch() {
19658 let a = _mm256_set1_pch(0.0, 1.0);
19659 let b = _mm256_set1_pch(0.0, 2.0);
19660 let c = _mm256_set1_pch(0.0, 3.0);
19661 let r = _mm256_maskz_fmadd_pch(0b01010101, a, b, c);
19662 let e = _mm256_setr_ph(
19663 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19664 );
19665 assert_eq_m256h(r, e);
19666 }
19667
19668 #[simd_test(enable = "avx512fp16")]
19669 unsafe fn test_mm512_fmadd_pch() {
19670 let a = _mm512_set1_pch(0.0, 1.0);
19671 let b = _mm512_set1_pch(0.0, 2.0);
19672 let c = _mm512_set1_pch(0.0, 3.0);
19673 let r = _mm512_fmadd_pch(a, b, c);
19674 let e = _mm512_set1_pch(-2.0, 3.0);
19675 assert_eq_m512h(r, e);
19676 }
19677
19678 #[simd_test(enable = "avx512fp16")]
19679 unsafe fn test_mm512_mask_fmadd_pch() {
19680 let a = _mm512_set1_pch(0.0, 1.0);
19681 let b = _mm512_set1_pch(0.0, 2.0);
19682 let c = _mm512_set1_pch(0.0, 3.0);
19683 let r = _mm512_mask_fmadd_pch(a, 0b0101010101010101, b, c);
19684 let e = _mm512_setr_ph(
19685 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19686 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19687 );
19688 assert_eq_m512h(r, e);
19689 }
19690
19691 #[simd_test(enable = "avx512fp16")]
19692 unsafe fn test_mm512_mask3_fmadd_pch() {
19693 let a = _mm512_set1_pch(0.0, 1.0);
19694 let b = _mm512_set1_pch(0.0, 2.0);
19695 let c = _mm512_set1_pch(0.0, 3.0);
19696 let r = _mm512_mask3_fmadd_pch(a, b, c, 0b0101010101010101);
19697 let e = _mm512_setr_ph(
19698 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19699 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19700 );
19701 assert_eq_m512h(r, e);
19702 }
19703
19704 #[simd_test(enable = "avx512fp16")]
19705 unsafe fn test_mm512_maskz_fmadd_pch() {
19706 let a = _mm512_set1_pch(0.0, 1.0);
19707 let b = _mm512_set1_pch(0.0, 2.0);
19708 let c = _mm512_set1_pch(0.0, 3.0);
19709 let r = _mm512_maskz_fmadd_pch(0b0101010101010101, a, b, c);
19710 let e = _mm512_setr_ph(
19711 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19712 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19713 );
19714 assert_eq_m512h(r, e);
19715 }
19716
19717 #[simd_test(enable = "avx512fp16")]
19718 unsafe fn test_mm512_fmadd_round_pch() {
19719 let a = _mm512_set1_pch(0.0, 1.0);
19720 let b = _mm512_set1_pch(0.0, 2.0);
19721 let c = _mm512_set1_pch(0.0, 3.0);
19722 let r =
19723 _mm512_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19724 let e = _mm512_set1_pch(-2.0, 3.0);
19725 assert_eq_m512h(r, e);
19726 }
19727
19728 #[simd_test(enable = "avx512fp16")]
19729 unsafe fn test_mm512_mask_fmadd_round_pch() {
19730 let a = _mm512_set1_pch(0.0, 1.0);
19731 let b = _mm512_set1_pch(0.0, 2.0);
19732 let c = _mm512_set1_pch(0.0, 3.0);
19733 let r = _mm512_mask_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19734 a,
19735 0b0101010101010101,
19736 b,
19737 c,
19738 );
19739 let e = _mm512_setr_ph(
19740 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19741 -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0, -2.0, 3.0, 0.0, 1.0,
19742 );
19743 assert_eq_m512h(r, e);
19744 }
19745
19746 #[simd_test(enable = "avx512fp16")]
19747 unsafe fn test_mm512_mask3_fmadd_round_pch() {
19748 let a = _mm512_set1_pch(0.0, 1.0);
19749 let b = _mm512_set1_pch(0.0, 2.0);
19750 let c = _mm512_set1_pch(0.0, 3.0);
19751 let r = _mm512_mask3_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19752 a,
19753 b,
19754 c,
19755 0b0101010101010101,
19756 );
19757 let e = _mm512_setr_ph(
19758 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19759 -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0, -2.0, 3.0, 0.0, 3.0,
19760 );
19761 assert_eq_m512h(r, e);
19762 }
19763
19764 #[simd_test(enable = "avx512fp16")]
19765 unsafe fn test_mm512_maskz_fmadd_round_pch() {
19766 let a = _mm512_set1_pch(0.0, 1.0);
19767 let b = _mm512_set1_pch(0.0, 2.0);
19768 let c = _mm512_set1_pch(0.0, 3.0);
19769 let r = _mm512_maskz_fmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19770 0b0101010101010101,
19771 a,
19772 b,
19773 c,
19774 );
19775 let e = _mm512_setr_ph(
19776 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19777 -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0, -2.0, 3.0, 0.0, 0.0,
19778 );
19779 assert_eq_m512h(r, e);
19780 }
19781
19782 #[simd_test(enable = "avx512fp16,avx512vl")]
19783 unsafe fn test_mm_fmadd_sch() {
19784 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19785 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19786 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19787 let r = _mm_fmadd_sch(a, b, c);
19788 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19789 assert_eq_m128h(r, e);
19790 }
19791
19792 #[simd_test(enable = "avx512fp16,avx512vl")]
19793 unsafe fn test_mm_mask_fmadd_sch() {
19794 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19795 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19796 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19797 let r = _mm_mask_fmadd_sch(a, 0, b, c);
19798 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19799 assert_eq_m128h(r, e);
19800 let r = _mm_mask_fmadd_sch(a, 1, b, c);
19801 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19802 assert_eq_m128h(r, e);
19803 }
19804
19805 #[simd_test(enable = "avx512fp16,avx512vl")]
19806 unsafe fn test_mm_mask3_fmadd_sch() {
19807 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19808 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19809 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19810 let r = _mm_mask3_fmadd_sch(a, b, c, 0);
19811 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19812 assert_eq_m128h(r, e);
19813 let r = _mm_mask3_fmadd_sch(a, b, c, 1);
19814 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19815 assert_eq_m128h(r, e);
19816 }
19817
19818 #[simd_test(enable = "avx512fp16,avx512vl")]
19819 unsafe fn test_mm_maskz_fmadd_sch() {
19820 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19821 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19822 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19823 let r = _mm_maskz_fmadd_sch(0, a, b, c);
19824 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19825 assert_eq_m128h(r, e);
19826 let r = _mm_maskz_fmadd_sch(1, a, b, c);
19827 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19828 assert_eq_m128h(r, e);
19829 }
19830
19831 #[simd_test(enable = "avx512fp16,avx512vl")]
19832 unsafe fn test_mm_fmadd_round_sch() {
19833 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19834 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19835 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19836 let r = _mm_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
19837 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19838 assert_eq_m128h(r, e);
19839 }
19840
19841 #[simd_test(enable = "avx512fp16,avx512vl")]
19842 unsafe fn test_mm_mask_fmadd_round_sch() {
19843 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19844 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19845 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19846 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19847 a, 0, b, c,
19848 );
19849 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19850 assert_eq_m128h(r, e);
19851 let r = _mm_mask_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19852 a, 1, b, c,
19853 );
19854 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19855 assert_eq_m128h(r, e);
19856 }
19857
19858 #[simd_test(enable = "avx512fp16,avx512vl")]
19859 unsafe fn test_mm_mask3_fmadd_round_sch() {
19860 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19861 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19862 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19863 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19864 a, b, c, 0,
19865 );
19866 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19867 assert_eq_m128h(r, e);
19868 let r = _mm_mask3_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19869 a, b, c, 1,
19870 );
19871 let e = _mm_setr_ph(-2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19872 assert_eq_m128h(r, e);
19873 }
19874
19875 #[simd_test(enable = "avx512fp16,avx512vl")]
19876 unsafe fn test_mm_maskz_fmadd_round_sch() {
19877 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19878 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
19879 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
19880 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19881 0, a, b, c,
19882 );
19883 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19884 assert_eq_m128h(r, e);
19885 let r = _mm_maskz_fmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
19886 1, a, b, c,
19887 );
19888 let e = _mm_setr_ph(-2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
19889 assert_eq_m128h(r, e);
19890 }
19891
19892 #[simd_test(enable = "avx512fp16,avx512vl")]
19893 unsafe fn test_mm_fcmadd_pch() {
19894 let a = _mm_set1_pch(0.0, 1.0);
19895 let b = _mm_set1_pch(0.0, 2.0);
19896 let c = _mm_set1_pch(0.0, 3.0);
19897 let r = _mm_fcmadd_pch(a, b, c);
19898 let e = _mm_set1_pch(2.0, 3.0);
19899 assert_eq_m128h(r, e);
19900 }
19901
19902 #[simd_test(enable = "avx512fp16,avx512vl")]
19903 unsafe fn test_mm_mask_fcmadd_pch() {
19904 let a = _mm_set1_pch(0.0, 1.0);
19905 let b = _mm_set1_pch(0.0, 2.0);
19906 let c = _mm_set1_pch(0.0, 3.0);
19907 let r = _mm_mask_fcmadd_pch(a, 0b0101, b, c);
19908 let e = _mm_setr_ph(2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0);
19909 assert_eq_m128h(r, e);
19910 }
19911
19912 #[simd_test(enable = "avx512fp16,avx512vl")]
19913 unsafe fn test_mm_mask3_fcmadd_pch() {
19914 let a = _mm_set1_pch(0.0, 1.0);
19915 let b = _mm_set1_pch(0.0, 2.0);
19916 let c = _mm_set1_pch(0.0, 3.0);
19917 let r = _mm_mask3_fcmadd_pch(a, b, c, 0b0101);
19918 let e = _mm_setr_ph(2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0);
19919 assert_eq_m128h(r, e);
19920 }
19921
19922 #[simd_test(enable = "avx512fp16,avx512vl")]
19923 unsafe fn test_mm_maskz_fcmadd_pch() {
19924 let a = _mm_set1_pch(0.0, 1.0);
19925 let b = _mm_set1_pch(0.0, 2.0);
19926 let c = _mm_set1_pch(0.0, 3.0);
19927 let r = _mm_maskz_fcmadd_pch(0b0101, a, b, c);
19928 let e = _mm_setr_ph(2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0);
19929 assert_eq_m128h(r, e);
19930 }
19931
19932 #[simd_test(enable = "avx512fp16,avx512vl")]
19933 unsafe fn test_mm256_fcmadd_pch() {
19934 let a = _mm256_set1_pch(0.0, 1.0);
19935 let b = _mm256_set1_pch(0.0, 2.0);
19936 let c = _mm256_set1_pch(0.0, 3.0);
19937 let r = _mm256_fcmadd_pch(a, b, c);
19938 let e = _mm256_set1_pch(2.0, 3.0);
19939 assert_eq_m256h(r, e);
19940 }
19941
19942 #[simd_test(enable = "avx512fp16,avx512vl")]
19943 unsafe fn test_mm256_mask_fcmadd_pch() {
19944 let a = _mm256_set1_pch(0.0, 1.0);
19945 let b = _mm256_set1_pch(0.0, 2.0);
19946 let c = _mm256_set1_pch(0.0, 3.0);
19947 let r = _mm256_mask_fcmadd_pch(a, 0b01010101, b, c);
19948 let e = _mm256_setr_ph(
19949 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19950 );
19951 assert_eq_m256h(r, e);
19952 }
19953
19954 #[simd_test(enable = "avx512fp16,avx512vl")]
19955 unsafe fn test_mm256_mask3_fcmadd_pch() {
19956 let a = _mm256_set1_pch(0.0, 1.0);
19957 let b = _mm256_set1_pch(0.0, 2.0);
19958 let c = _mm256_set1_pch(0.0, 3.0);
19959 let r = _mm256_mask3_fcmadd_pch(a, b, c, 0b01010101);
19960 let e = _mm256_setr_ph(
19961 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
19962 );
19963 assert_eq_m256h(r, e);
19964 }
19965
19966 #[simd_test(enable = "avx512fp16,avx512vl")]
19967 unsafe fn test_mm256_maskz_fcmadd_pch() {
19968 let a = _mm256_set1_pch(0.0, 1.0);
19969 let b = _mm256_set1_pch(0.0, 2.0);
19970 let c = _mm256_set1_pch(0.0, 3.0);
19971 let r = _mm256_maskz_fcmadd_pch(0b01010101, a, b, c);
19972 let e = _mm256_setr_ph(
19973 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
19974 );
19975 assert_eq_m256h(r, e);
19976 }
19977
19978 #[simd_test(enable = "avx512fp16")]
19979 unsafe fn test_mm512_fcmadd_pch() {
19980 let a = _mm512_set1_pch(0.0, 1.0);
19981 let b = _mm512_set1_pch(0.0, 2.0);
19982 let c = _mm512_set1_pch(0.0, 3.0);
19983 let r = _mm512_fcmadd_pch(a, b, c);
19984 let e = _mm512_set1_pch(2.0, 3.0);
19985 assert_eq_m512h(r, e);
19986 }
19987
19988 #[simd_test(enable = "avx512fp16")]
19989 unsafe fn test_mm512_mask_fcmadd_pch() {
19990 let a = _mm512_set1_pch(0.0, 1.0);
19991 let b = _mm512_set1_pch(0.0, 2.0);
19992 let c = _mm512_set1_pch(0.0, 3.0);
19993 let r = _mm512_mask_fcmadd_pch(a, 0b0101010101010101, b, c);
19994 let e = _mm512_setr_ph(
19995 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
19996 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
19997 );
19998 assert_eq_m512h(r, e);
19999 }
20000
20001 #[simd_test(enable = "avx512fp16")]
20002 unsafe fn test_mm512_mask3_fcmadd_pch() {
20003 let a = _mm512_set1_pch(0.0, 1.0);
20004 let b = _mm512_set1_pch(0.0, 2.0);
20005 let c = _mm512_set1_pch(0.0, 3.0);
20006 let r = _mm512_mask3_fcmadd_pch(a, b, c, 0b0101010101010101);
20007 let e = _mm512_setr_ph(
20008 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
20009 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
20010 );
20011 assert_eq_m512h(r, e);
20012 }
20013
20014 #[simd_test(enable = "avx512fp16")]
20015 unsafe fn test_mm512_maskz_fcmadd_pch() {
20016 let a = _mm512_set1_pch(0.0, 1.0);
20017 let b = _mm512_set1_pch(0.0, 2.0);
20018 let c = _mm512_set1_pch(0.0, 3.0);
20019 let r = _mm512_maskz_fcmadd_pch(0b0101010101010101, a, b, c);
20020 let e = _mm512_setr_ph(
20021 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20022 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20023 );
20024 assert_eq_m512h(r, e);
20025 }
20026
20027 #[simd_test(enable = "avx512fp16")]
20028 unsafe fn test_mm512_fcmadd_round_pch() {
20029 let a = _mm512_set1_pch(0.0, 1.0);
20030 let b = _mm512_set1_pch(0.0, 2.0);
20031 let c = _mm512_set1_pch(0.0, 3.0);
20032 let r =
20033 _mm512_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20034 let e = _mm512_set1_pch(2.0, 3.0);
20035 assert_eq_m512h(r, e);
20036 }
20037
20038 #[simd_test(enable = "avx512fp16")]
20039 unsafe fn test_mm512_mask_fcmadd_round_pch() {
20040 let a = _mm512_set1_pch(0.0, 1.0);
20041 let b = _mm512_set1_pch(0.0, 2.0);
20042 let c = _mm512_set1_pch(0.0, 3.0);
20043 let r = _mm512_mask_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20044 a,
20045 0b0101010101010101,
20046 b,
20047 c,
20048 );
20049 let e = _mm512_setr_ph(
20050 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0,
20051 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0,
20052 );
20053 assert_eq_m512h(r, e);
20054 }
20055
20056 #[simd_test(enable = "avx512fp16")]
20057 unsafe fn test_mm512_mask3_fcmadd_round_pch() {
20058 let a = _mm512_set1_pch(0.0, 1.0);
20059 let b = _mm512_set1_pch(0.0, 2.0);
20060 let c = _mm512_set1_pch(0.0, 3.0);
20061 let r = _mm512_mask3_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20062 a,
20063 b,
20064 c,
20065 0b0101010101010101,
20066 );
20067 let e = _mm512_setr_ph(
20068 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0,
20069 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0, 2.0, 3.0, 0.0, 3.0,
20070 );
20071 assert_eq_m512h(r, e);
20072 }
20073
20074 #[simd_test(enable = "avx512fp16")]
20075 unsafe fn test_mm512_maskz_fcmadd_round_pch() {
20076 let a = _mm512_set1_pch(0.0, 1.0);
20077 let b = _mm512_set1_pch(0.0, 2.0);
20078 let c = _mm512_set1_pch(0.0, 3.0);
20079 let r = _mm512_maskz_fcmadd_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20080 0b0101010101010101,
20081 a,
20082 b,
20083 c,
20084 );
20085 let e = _mm512_setr_ph(
20086 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0,
20087 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0, 2.0, 3.0, 0.0, 0.0,
20088 );
20089 assert_eq_m512h(r, e);
20090 }
20091
20092 #[simd_test(enable = "avx512fp16,avx512vl")]
20093 unsafe fn test_mm_fcmadd_sch() {
20094 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20095 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20096 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20097 let r = _mm_fcmadd_sch(a, b, c);
20098 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20099 assert_eq_m128h(r, e);
20100 }
20101
20102 #[simd_test(enable = "avx512fp16,avx512vl")]
20103 unsafe fn test_mm_mask_fcmadd_sch() {
20104 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20105 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20106 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20107 let r = _mm_mask_fcmadd_sch(a, 0, b, c);
20108 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20109 assert_eq_m128h(r, e);
20110 let r = _mm_mask_fcmadd_sch(a, 1, b, c);
20111 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20112 assert_eq_m128h(r, e);
20113 }
20114
20115 #[simd_test(enable = "avx512fp16,avx512vl")]
20116 unsafe fn test_mm_mask3_fcmadd_sch() {
20117 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20118 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20119 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20120 let r = _mm_mask3_fcmadd_sch(a, b, c, 0);
20121 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20122 assert_eq_m128h(r, e);
20123 let r = _mm_mask3_fcmadd_sch(a, b, c, 1);
20124 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20125 assert_eq_m128h(r, e);
20126 }
20127
20128 #[simd_test(enable = "avx512fp16,avx512vl")]
20129 unsafe fn test_mm_maskz_fcmadd_sch() {
20130 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20131 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20132 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20133 let r = _mm_maskz_fcmadd_sch(0, a, b, c);
20134 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20135 assert_eq_m128h(r, e);
20136 let r = _mm_maskz_fcmadd_sch(1, a, b, c);
20137 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20138 assert_eq_m128h(r, e);
20139 }
20140
20141 #[simd_test(enable = "avx512fp16,avx512vl")]
20142 unsafe fn test_mm_fcmadd_round_sch() {
20143 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20144 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20145 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20146 let r = _mm_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20147 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20148 assert_eq_m128h(r, e);
20149 }
20150
20151 #[simd_test(enable = "avx512fp16,avx512vl")]
20152 unsafe fn test_mm_mask_fcmadd_round_sch() {
20153 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20154 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20155 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20156 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20157 a, 0, b, c,
20158 );
20159 let e = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20160 assert_eq_m128h(r, e);
20161 let r = _mm_mask_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20162 a, 1, b, c,
20163 );
20164 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20165 assert_eq_m128h(r, e);
20166 }
20167
20168 #[simd_test(enable = "avx512fp16,avx512vl")]
20169 unsafe fn test_mm_mask3_fcmadd_round_sch() {
20170 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20171 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20172 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20173 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20174 a, b, c, 0,
20175 );
20176 let e = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20177 assert_eq_m128h(r, e);
20178 let r = _mm_mask3_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20179 a, b, c, 1,
20180 );
20181 let e = _mm_setr_ph(2.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20182 assert_eq_m128h(r, e);
20183 }
20184
20185 #[simd_test(enable = "avx512fp16,avx512vl")]
20186 unsafe fn test_mm_maskz_fcmadd_round_sch() {
20187 let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20188 let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
20189 let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
20190 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20191 0, a, b, c,
20192 );
20193 let e = _mm_setr_ph(0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20194 assert_eq_m128h(r, e);
20195 let r = _mm_maskz_fcmadd_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20196 1, a, b, c,
20197 );
20198 let e = _mm_setr_ph(2.0, 3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
20199 assert_eq_m128h(r, e);
20200 }
20201
20202 #[simd_test(enable = "avx512fp16,avx512vl")]
20203 unsafe fn test_mm_fmadd_ph() {
20204 let a = _mm_set1_ph(1.0);
20205 let b = _mm_set1_ph(2.0);
20206 let c = _mm_set1_ph(3.0);
20207 let r = _mm_fmadd_ph(a, b, c);
20208 let e = _mm_set1_ph(5.0);
20209 assert_eq_m128h(r, e);
20210 }
20211
20212 #[simd_test(enable = "avx512fp16,avx512vl")]
20213 unsafe fn test_mm_mask_fmadd_ph() {
20214 let a = _mm_set1_ph(1.0);
20215 let b = _mm_set1_ph(2.0);
20216 let c = _mm_set1_ph(3.0);
20217 let r = _mm_mask_fmadd_ph(a, 0b01010101, b, c);
20218 let e = _mm_set_ph(1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0);
20219 assert_eq_m128h(r, e);
20220 }
20221
20222 #[simd_test(enable = "avx512fp16,avx512vl")]
20223 unsafe fn test_mm_mask3_fmadd_ph() {
20224 let a = _mm_set1_ph(1.0);
20225 let b = _mm_set1_ph(2.0);
20226 let c = _mm_set1_ph(3.0);
20227 let r = _mm_mask3_fmadd_ph(a, b, c, 0b01010101);
20228 let e = _mm_set_ph(3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0);
20229 assert_eq_m128h(r, e);
20230 }
20231
20232 #[simd_test(enable = "avx512fp16,avx512vl")]
20233 unsafe fn test_mm_maskz_fmadd_ph() {
20234 let a = _mm_set1_ph(1.0);
20235 let b = _mm_set1_ph(2.0);
20236 let c = _mm_set1_ph(3.0);
20237 let r = _mm_maskz_fmadd_ph(0b01010101, a, b, c);
20238 let e = _mm_set_ph(0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0);
20239 assert_eq_m128h(r, e);
20240 }
20241
20242 #[simd_test(enable = "avx512fp16,avx512vl")]
20243 unsafe fn test_mm256_fmadd_ph() {
20244 let a = _mm256_set1_ph(1.0);
20245 let b = _mm256_set1_ph(2.0);
20246 let c = _mm256_set1_ph(3.0);
20247 let r = _mm256_fmadd_ph(a, b, c);
20248 let e = _mm256_set1_ph(5.0);
20249 assert_eq_m256h(r, e);
20250 }
20251
20252 #[simd_test(enable = "avx512fp16,avx512vl")]
20253 unsafe fn test_mm256_mask_fmadd_ph() {
20254 let a = _mm256_set1_ph(1.0);
20255 let b = _mm256_set1_ph(2.0);
20256 let c = _mm256_set1_ph(3.0);
20257 let r = _mm256_mask_fmadd_ph(a, 0b0101010101010101, b, c);
20258 let e = _mm256_set_ph(
20259 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20260 );
20261 assert_eq_m256h(r, e);
20262 }
20263
20264 #[simd_test(enable = "avx512fp16,avx512vl")]
20265 unsafe fn test_mm256_mask3_fmadd_ph() {
20266 let a = _mm256_set1_ph(1.0);
20267 let b = _mm256_set1_ph(2.0);
20268 let c = _mm256_set1_ph(3.0);
20269 let r = _mm256_mask3_fmadd_ph(a, b, c, 0b0101010101010101);
20270 let e = _mm256_set_ph(
20271 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20272 );
20273 assert_eq_m256h(r, e);
20274 }
20275
20276 #[simd_test(enable = "avx512fp16,avx512vl")]
20277 unsafe fn test_mm256_maskz_fmadd_ph() {
20278 let a = _mm256_set1_ph(1.0);
20279 let b = _mm256_set1_ph(2.0);
20280 let c = _mm256_set1_ph(3.0);
20281 let r = _mm256_maskz_fmadd_ph(0b0101010101010101, a, b, c);
20282 let e = _mm256_set_ph(
20283 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20284 );
20285 assert_eq_m256h(r, e);
20286 }
20287
20288 #[simd_test(enable = "avx512fp16")]
20289 unsafe fn test_mm512_fmadd_ph() {
20290 let a = _mm512_set1_ph(1.0);
20291 let b = _mm512_set1_ph(2.0);
20292 let c = _mm512_set1_ph(3.0);
20293 let r = _mm512_fmadd_ph(a, b, c);
20294 let e = _mm512_set1_ph(5.0);
20295 assert_eq_m512h(r, e);
20296 }
20297
20298 #[simd_test(enable = "avx512fp16")]
20299 unsafe fn test_mm512_mask_fmadd_ph() {
20300 let a = _mm512_set1_ph(1.0);
20301 let b = _mm512_set1_ph(2.0);
20302 let c = _mm512_set1_ph(3.0);
20303 let r = _mm512_mask_fmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20304 let e = _mm512_set_ph(
20305 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20306 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20307 );
20308 assert_eq_m512h(r, e);
20309 }
20310
20311 #[simd_test(enable = "avx512fp16")]
20312 unsafe fn test_mm512_mask3_fmadd_ph() {
20313 let a = _mm512_set1_ph(1.0);
20314 let b = _mm512_set1_ph(2.0);
20315 let c = _mm512_set1_ph(3.0);
20316 let r = _mm512_mask3_fmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20317 let e = _mm512_set_ph(
20318 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20319 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20320 );
20321 assert_eq_m512h(r, e);
20322 }
20323
20324 #[simd_test(enable = "avx512fp16")]
20325 unsafe fn test_mm512_maskz_fmadd_ph() {
20326 let a = _mm512_set1_ph(1.0);
20327 let b = _mm512_set1_ph(2.0);
20328 let c = _mm512_set1_ph(3.0);
20329 let r = _mm512_maskz_fmadd_ph(0b01010101010101010101010101010101, a, b, c);
20330 let e = _mm512_set_ph(
20331 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20332 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20333 );
20334 assert_eq_m512h(r, e);
20335 }
20336
20337 #[simd_test(enable = "avx512fp16")]
20338 unsafe fn test_mm512_fmadd_round_ph() {
20339 let a = _mm512_set1_ph(1.0);
20340 let b = _mm512_set1_ph(2.0);
20341 let c = _mm512_set1_ph(3.0);
20342 let r = _mm512_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20343 let e = _mm512_set1_ph(5.0);
20344 assert_eq_m512h(r, e);
20345 }
20346
20347 #[simd_test(enable = "avx512fp16")]
20348 unsafe fn test_mm512_mask_fmadd_round_ph() {
20349 let a = _mm512_set1_ph(1.0);
20350 let b = _mm512_set1_ph(2.0);
20351 let c = _mm512_set1_ph(3.0);
20352 let r = _mm512_mask_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20353 a,
20354 0b01010101010101010101010101010101,
20355 b,
20356 c,
20357 );
20358 let e = _mm512_set_ph(
20359 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0,
20360 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0, 1.0, 5.0,
20361 );
20362 assert_eq_m512h(r, e);
20363 }
20364
20365 #[simd_test(enable = "avx512fp16")]
20366 unsafe fn test_mm512_mask3_fmadd_round_ph() {
20367 let a = _mm512_set1_ph(1.0);
20368 let b = _mm512_set1_ph(2.0);
20369 let c = _mm512_set1_ph(3.0);
20370 let r = _mm512_mask3_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20371 a,
20372 b,
20373 c,
20374 0b01010101010101010101010101010101,
20375 );
20376 let e = _mm512_set_ph(
20377 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0,
20378 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0, 3.0, 5.0,
20379 );
20380 assert_eq_m512h(r, e);
20381 }
20382
20383 #[simd_test(enable = "avx512fp16")]
20384 unsafe fn test_mm512_maskz_fmadd_round_ph() {
20385 let a = _mm512_set1_ph(1.0);
20386 let b = _mm512_set1_ph(2.0);
20387 let c = _mm512_set1_ph(3.0);
20388 let r = _mm512_maskz_fmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20389 0b01010101010101010101010101010101,
20390 a,
20391 b,
20392 c,
20393 );
20394 let e = _mm512_set_ph(
20395 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0,
20396 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0, 0.0, 5.0,
20397 );
20398 assert_eq_m512h(r, e);
20399 }
20400
20401 #[simd_test(enable = "avx512fp16,avx512vl")]
20402 unsafe fn test_mm_fmadd_sh() {
20403 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20404 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20405 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20406 let r = _mm_fmadd_sh(a, b, c);
20407 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20408 assert_eq_m128h(r, e);
20409 }
20410
20411 #[simd_test(enable = "avx512fp16,avx512vl")]
20412 unsafe fn test_mm_mask_fmadd_sh() {
20413 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20414 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20415 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20416 let r = _mm_mask_fmadd_sh(a, 0, b, c);
20417 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20418 assert_eq_m128h(r, e);
20419 let r = _mm_mask_fmadd_sh(a, 1, b, c);
20420 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20421 assert_eq_m128h(r, e);
20422 }
20423
20424 #[simd_test(enable = "avx512fp16,avx512vl")]
20425 unsafe fn test_mm_mask3_fmadd_sh() {
20426 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20427 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20428 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20429 let r = _mm_mask3_fmadd_sh(a, b, c, 0);
20430 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20431 assert_eq_m128h(r, e);
20432 let r = _mm_mask3_fmadd_sh(a, b, c, 1);
20433 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20434 assert_eq_m128h(r, e);
20435 }
20436
20437 #[simd_test(enable = "avx512fp16,avx512vl")]
20438 unsafe fn test_mm_maskz_fmadd_sh() {
20439 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20440 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20441 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20442 let r = _mm_maskz_fmadd_sh(0, a, b, c);
20443 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20444 assert_eq_m128h(r, e);
20445 let r = _mm_maskz_fmadd_sh(1, a, b, c);
20446 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20447 assert_eq_m128h(r, e);
20448 }
20449
20450 #[simd_test(enable = "avx512fp16,avx512vl")]
20451 unsafe fn test_mm_fmadd_round_sh() {
20452 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20453 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20454 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20455 let r = _mm_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20456 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20457 assert_eq_m128h(r, e);
20458 }
20459
20460 #[simd_test(enable = "avx512fp16,avx512vl")]
20461 unsafe fn test_mm_mask_fmadd_round_sh() {
20462 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20463 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20464 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20465 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20466 a, 0, b, c,
20467 );
20468 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20469 assert_eq_m128h(r, e);
20470 let r = _mm_mask_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20471 a, 1, b, c,
20472 );
20473 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20474 assert_eq_m128h(r, e);
20475 }
20476
20477 #[simd_test(enable = "avx512fp16,avx512vl")]
20478 unsafe fn test_mm_mask3_fmadd_round_sh() {
20479 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20480 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20481 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20482 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20483 a, b, c, 0,
20484 );
20485 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20486 assert_eq_m128h(r, e);
20487 let r = _mm_mask3_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20488 a, b, c, 1,
20489 );
20490 let e = _mm_setr_ph(5.0, 30., 31., 32., 33., 34., 35., 36.);
20491 assert_eq_m128h(r, e);
20492 }
20493
20494 #[simd_test(enable = "avx512fp16,avx512vl")]
20495 unsafe fn test_mm_maskz_fmadd_round_sh() {
20496 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20497 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20498 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20499 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20500 0, a, b, c,
20501 );
20502 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20503 assert_eq_m128h(r, e);
20504 let r = _mm_maskz_fmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20505 1, a, b, c,
20506 );
20507 let e = _mm_setr_ph(5.0, 10., 11., 12., 13., 14., 15., 16.);
20508 assert_eq_m128h(r, e);
20509 }
20510
20511 #[simd_test(enable = "avx512fp16,avx512vl")]
20512 unsafe fn test_mm_fmsub_ph() {
20513 let a = _mm_set1_ph(1.0);
20514 let b = _mm_set1_ph(2.0);
20515 let c = _mm_set1_ph(3.0);
20516 let r = _mm_fmsub_ph(a, b, c);
20517 let e = _mm_set1_ph(-1.0);
20518 assert_eq_m128h(r, e);
20519 }
20520
20521 #[simd_test(enable = "avx512fp16,avx512vl")]
20522 unsafe fn test_mm_mask_fmsub_ph() {
20523 let a = _mm_set1_ph(1.0);
20524 let b = _mm_set1_ph(2.0);
20525 let c = _mm_set1_ph(3.0);
20526 let r = _mm_mask_fmsub_ph(a, 0b01010101, b, c);
20527 let e = _mm_set_ph(1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0);
20528 assert_eq_m128h(r, e);
20529 }
20530
20531 #[simd_test(enable = "avx512fp16,avx512vl")]
20532 unsafe fn test_mm_mask3_fmsub_ph() {
20533 let a = _mm_set1_ph(1.0);
20534 let b = _mm_set1_ph(2.0);
20535 let c = _mm_set1_ph(3.0);
20536 let r = _mm_mask3_fmsub_ph(a, b, c, 0b01010101);
20537 let e = _mm_set_ph(3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0);
20538 assert_eq_m128h(r, e);
20539 }
20540
20541 #[simd_test(enable = "avx512fp16,avx512vl")]
20542 unsafe fn test_mm_maskz_fmsub_ph() {
20543 let a = _mm_set1_ph(1.0);
20544 let b = _mm_set1_ph(2.0);
20545 let c = _mm_set1_ph(3.0);
20546 let r = _mm_maskz_fmsub_ph(0b01010101, a, b, c);
20547 let e = _mm_set_ph(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0);
20548 assert_eq_m128h(r, e);
20549 }
20550
20551 #[simd_test(enable = "avx512fp16,avx512vl")]
20552 unsafe fn test_mm256_fmsub_ph() {
20553 let a = _mm256_set1_ph(1.0);
20554 let b = _mm256_set1_ph(2.0);
20555 let c = _mm256_set1_ph(3.0);
20556 let r = _mm256_fmsub_ph(a, b, c);
20557 let e = _mm256_set1_ph(-1.0);
20558 assert_eq_m256h(r, e);
20559 }
20560
20561 #[simd_test(enable = "avx512fp16,avx512vl")]
20562 unsafe fn test_mm256_mask_fmsub_ph() {
20563 let a = _mm256_set1_ph(1.0);
20564 let b = _mm256_set1_ph(2.0);
20565 let c = _mm256_set1_ph(3.0);
20566 let r = _mm256_mask_fmsub_ph(a, 0b0101010101010101, b, c);
20567 let e = _mm256_set_ph(
20568 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20569 );
20570 assert_eq_m256h(r, e);
20571 }
20572
20573 #[simd_test(enable = "avx512fp16,avx512vl")]
20574 unsafe fn test_mm256_mask3_fmsub_ph() {
20575 let a = _mm256_set1_ph(1.0);
20576 let b = _mm256_set1_ph(2.0);
20577 let c = _mm256_set1_ph(3.0);
20578 let r = _mm256_mask3_fmsub_ph(a, b, c, 0b0101010101010101);
20579 let e = _mm256_set_ph(
20580 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20581 );
20582 assert_eq_m256h(r, e);
20583 }
20584
20585 #[simd_test(enable = "avx512fp16,avx512vl")]
20586 unsafe fn test_mm256_maskz_fmsub_ph() {
20587 let a = _mm256_set1_ph(1.0);
20588 let b = _mm256_set1_ph(2.0);
20589 let c = _mm256_set1_ph(3.0);
20590 let r = _mm256_maskz_fmsub_ph(0b0101010101010101, a, b, c);
20591 let e = _mm256_set_ph(
20592 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20593 );
20594 assert_eq_m256h(r, e);
20595 }
20596
20597 #[simd_test(enable = "avx512fp16")]
20598 unsafe fn test_mm512_fmsub_ph() {
20599 let a = _mm512_set1_ph(1.0);
20600 let b = _mm512_set1_ph(2.0);
20601 let c = _mm512_set1_ph(3.0);
20602 let r = _mm512_fmsub_ph(a, b, c);
20603 let e = _mm512_set1_ph(-1.0);
20604 assert_eq_m512h(r, e);
20605 }
20606
20607 #[simd_test(enable = "avx512fp16")]
20608 unsafe fn test_mm512_mask_fmsub_ph() {
20609 let a = _mm512_set1_ph(1.0);
20610 let b = _mm512_set1_ph(2.0);
20611 let c = _mm512_set1_ph(3.0);
20612 let r = _mm512_mask_fmsub_ph(a, 0b01010101010101010101010101010101, b, c);
20613 let e = _mm512_set_ph(
20614 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20615 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20616 );
20617 assert_eq_m512h(r, e);
20618 }
20619
20620 #[simd_test(enable = "avx512fp16")]
20621 unsafe fn test_mm512_mask3_fmsub_ph() {
20622 let a = _mm512_set1_ph(1.0);
20623 let b = _mm512_set1_ph(2.0);
20624 let c = _mm512_set1_ph(3.0);
20625 let r = _mm512_mask3_fmsub_ph(a, b, c, 0b01010101010101010101010101010101);
20626 let e = _mm512_set_ph(
20627 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20628 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20629 );
20630 assert_eq_m512h(r, e);
20631 }
20632
20633 #[simd_test(enable = "avx512fp16")]
20634 unsafe fn test_mm512_maskz_fmsub_ph() {
20635 let a = _mm512_set1_ph(1.0);
20636 let b = _mm512_set1_ph(2.0);
20637 let c = _mm512_set1_ph(3.0);
20638 let r = _mm512_maskz_fmsub_ph(0b01010101010101010101010101010101, a, b, c);
20639 let e = _mm512_set_ph(
20640 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20641 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20642 );
20643 assert_eq_m512h(r, e);
20644 }
20645
20646 #[simd_test(enable = "avx512fp16")]
20647 unsafe fn test_mm512_fmsub_round_ph() {
20648 let a = _mm512_set1_ph(1.0);
20649 let b = _mm512_set1_ph(2.0);
20650 let c = _mm512_set1_ph(3.0);
20651 let r = _mm512_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20652 let e = _mm512_set1_ph(-1.0);
20653 assert_eq_m512h(r, e);
20654 }
20655
20656 #[simd_test(enable = "avx512fp16")]
20657 unsafe fn test_mm512_mask_fmsub_round_ph() {
20658 let a = _mm512_set1_ph(1.0);
20659 let b = _mm512_set1_ph(2.0);
20660 let c = _mm512_set1_ph(3.0);
20661 let r = _mm512_mask_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20662 a,
20663 0b01010101010101010101010101010101,
20664 b,
20665 c,
20666 );
20667 let e = _mm512_set_ph(
20668 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20669 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
20670 );
20671 assert_eq_m512h(r, e);
20672 }
20673
20674 #[simd_test(enable = "avx512fp16")]
20675 unsafe fn test_mm512_mask3_fmsub_round_ph() {
20676 let a = _mm512_set1_ph(1.0);
20677 let b = _mm512_set1_ph(2.0);
20678 let c = _mm512_set1_ph(3.0);
20679 let r = _mm512_mask3_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20680 a,
20681 b,
20682 c,
20683 0b01010101010101010101010101010101,
20684 );
20685 let e = _mm512_set_ph(
20686 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20687 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0, 3.0, -1.0,
20688 );
20689 assert_eq_m512h(r, e);
20690 }
20691
20692 #[simd_test(enable = "avx512fp16")]
20693 unsafe fn test_mm512_maskz_fmsub_round_ph() {
20694 let a = _mm512_set1_ph(1.0);
20695 let b = _mm512_set1_ph(2.0);
20696 let c = _mm512_set1_ph(3.0);
20697 let r = _mm512_maskz_fmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20698 0b01010101010101010101010101010101,
20699 a,
20700 b,
20701 c,
20702 );
20703 let e = _mm512_set_ph(
20704 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20705 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0,
20706 );
20707 assert_eq_m512h(r, e);
20708 }
20709
20710 #[simd_test(enable = "avx512fp16,avx512vl")]
20711 unsafe fn test_mm_fmsub_sh() {
20712 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20713 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20714 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20715 let r = _mm_fmsub_sh(a, b, c);
20716 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20717 assert_eq_m128h(r, e);
20718 }
20719
20720 #[simd_test(enable = "avx512fp16,avx512vl")]
20721 unsafe fn test_mm_mask_fmsub_sh() {
20722 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20723 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20724 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20725 let r = _mm_mask_fmsub_sh(a, 0, b, c);
20726 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20727 assert_eq_m128h(r, e);
20728 let r = _mm_mask_fmsub_sh(a, 1, b, c);
20729 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20730 assert_eq_m128h(r, e);
20731 }
20732
20733 #[simd_test(enable = "avx512fp16,avx512vl")]
20734 unsafe fn test_mm_mask3_fmsub_sh() {
20735 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20736 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20737 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20738 let r = _mm_mask3_fmsub_sh(a, b, c, 0);
20739 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20740 assert_eq_m128h(r, e);
20741 let r = _mm_mask3_fmsub_sh(a, b, c, 1);
20742 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20743 assert_eq_m128h(r, e);
20744 }
20745
20746 #[simd_test(enable = "avx512fp16,avx512vl")]
20747 unsafe fn test_mm_maskz_fmsub_sh() {
20748 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20749 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20750 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20751 let r = _mm_maskz_fmsub_sh(0, a, b, c);
20752 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20753 assert_eq_m128h(r, e);
20754 let r = _mm_maskz_fmsub_sh(1, a, b, c);
20755 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20756 assert_eq_m128h(r, e);
20757 }
20758
20759 #[simd_test(enable = "avx512fp16,avx512vl")]
20760 unsafe fn test_mm_fmsub_round_sh() {
20761 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20762 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20763 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20764 let r = _mm_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20765 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20766 assert_eq_m128h(r, e);
20767 }
20768
20769 #[simd_test(enable = "avx512fp16,avx512vl")]
20770 unsafe fn test_mm_mask_fmsub_round_sh() {
20771 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20772 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20773 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20774 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20775 a, 0, b, c,
20776 );
20777 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20778 assert_eq_m128h(r, e);
20779 let r = _mm_mask_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20780 a, 1, b, c,
20781 );
20782 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20783 assert_eq_m128h(r, e);
20784 }
20785
20786 #[simd_test(enable = "avx512fp16,avx512vl")]
20787 unsafe fn test_mm_mask3_fmsub_round_sh() {
20788 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20789 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20790 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20791 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20792 a, b, c, 0,
20793 );
20794 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20795 assert_eq_m128h(r, e);
20796 let r = _mm_mask3_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20797 a, b, c, 1,
20798 );
20799 let e = _mm_setr_ph(-1.0, 30., 31., 32., 33., 34., 35., 36.);
20800 assert_eq_m128h(r, e);
20801 }
20802
20803 #[simd_test(enable = "avx512fp16,avx512vl")]
20804 unsafe fn test_mm_maskz_fmsub_round_sh() {
20805 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
20806 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
20807 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
20808 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20809 0, a, b, c,
20810 );
20811 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
20812 assert_eq_m128h(r, e);
20813 let r = _mm_maskz_fmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20814 1, a, b, c,
20815 );
20816 let e = _mm_setr_ph(-1.0, 10., 11., 12., 13., 14., 15., 16.);
20817 assert_eq_m128h(r, e);
20818 }
20819
20820 #[simd_test(enable = "avx512fp16,avx512vl")]
20821 unsafe fn test_mm_fnmadd_ph() {
20822 let a = _mm_set1_ph(1.0);
20823 let b = _mm_set1_ph(2.0);
20824 let c = _mm_set1_ph(3.0);
20825 let r = _mm_fnmadd_ph(a, b, c);
20826 let e = _mm_set1_ph(1.0);
20827 assert_eq_m128h(r, e);
20828 }
20829
20830 #[simd_test(enable = "avx512fp16,avx512vl")]
20831 unsafe fn test_mm_mask_fnmadd_ph() {
20832 let a = _mm_set1_ph(1.0);
20833 let b = _mm_set1_ph(2.0);
20834 let c = _mm_set1_ph(3.0);
20835 let r = _mm_mask_fnmadd_ph(a, 0b01010101, b, c);
20836 let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
20837 assert_eq_m128h(r, e);
20838 }
20839
20840 #[simd_test(enable = "avx512fp16,avx512vl")]
20841 unsafe fn test_mm_mask3_fnmadd_ph() {
20842 let a = _mm_set1_ph(1.0);
20843 let b = _mm_set1_ph(2.0);
20844 let c = _mm_set1_ph(3.0);
20845 let r = _mm_mask3_fnmadd_ph(a, b, c, 0b01010101);
20846 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
20847 assert_eq_m128h(r, e);
20848 }
20849
20850 #[simd_test(enable = "avx512fp16,avx512vl")]
20851 unsafe fn test_mm_maskz_fnmadd_ph() {
20852 let a = _mm_set1_ph(1.0);
20853 let b = _mm_set1_ph(2.0);
20854 let c = _mm_set1_ph(3.0);
20855 let r = _mm_maskz_fnmadd_ph(0b01010101, a, b, c);
20856 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
20857 assert_eq_m128h(r, e);
20858 }
20859
20860 #[simd_test(enable = "avx512fp16,avx512vl")]
20861 unsafe fn test_mm256_fnmadd_ph() {
20862 let a = _mm256_set1_ph(1.0);
20863 let b = _mm256_set1_ph(2.0);
20864 let c = _mm256_set1_ph(3.0);
20865 let r = _mm256_fnmadd_ph(a, b, c);
20866 let e = _mm256_set1_ph(1.0);
20867 assert_eq_m256h(r, e);
20868 }
20869
20870 #[simd_test(enable = "avx512fp16,avx512vl")]
20871 unsafe fn test_mm256_mask_fnmadd_ph() {
20872 let a = _mm256_set1_ph(1.0);
20873 let b = _mm256_set1_ph(2.0);
20874 let c = _mm256_set1_ph(3.0);
20875 let r = _mm256_mask_fnmadd_ph(a, 0b0101010101010101, b, c);
20876 let e = _mm256_set_ph(
20877 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20878 );
20879 assert_eq_m256h(r, e);
20880 }
20881
20882 #[simd_test(enable = "avx512fp16,avx512vl")]
20883 unsafe fn test_mm256_mask3_fnmadd_ph() {
20884 let a = _mm256_set1_ph(1.0);
20885 let b = _mm256_set1_ph(2.0);
20886 let c = _mm256_set1_ph(3.0);
20887 let r = _mm256_mask3_fnmadd_ph(a, b, c, 0b0101010101010101);
20888 let e = _mm256_set_ph(
20889 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20890 );
20891 assert_eq_m256h(r, e);
20892 }
20893
20894 #[simd_test(enable = "avx512fp16,avx512vl")]
20895 unsafe fn test_mm256_maskz_fnmadd_ph() {
20896 let a = _mm256_set1_ph(1.0);
20897 let b = _mm256_set1_ph(2.0);
20898 let c = _mm256_set1_ph(3.0);
20899 let r = _mm256_maskz_fnmadd_ph(0b0101010101010101, a, b, c);
20900 let e = _mm256_set_ph(
20901 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20902 );
20903 assert_eq_m256h(r, e);
20904 }
20905
20906 #[simd_test(enable = "avx512fp16")]
20907 unsafe fn test_mm512_fnmadd_ph() {
20908 let a = _mm512_set1_ph(1.0);
20909 let b = _mm512_set1_ph(2.0);
20910 let c = _mm512_set1_ph(3.0);
20911 let r = _mm512_fnmadd_ph(a, b, c);
20912 let e = _mm512_set1_ph(1.0);
20913 assert_eq_m512h(r, e);
20914 }
20915
20916 #[simd_test(enable = "avx512fp16")]
20917 unsafe fn test_mm512_mask_fnmadd_ph() {
20918 let a = _mm512_set1_ph(1.0);
20919 let b = _mm512_set1_ph(2.0);
20920 let c = _mm512_set1_ph(3.0);
20921 let r = _mm512_mask_fnmadd_ph(a, 0b01010101010101010101010101010101, b, c);
20922 let e = _mm512_set_ph(
20923 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20924 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20925 );
20926 assert_eq_m512h(r, e);
20927 }
20928
20929 #[simd_test(enable = "avx512fp16")]
20930 unsafe fn test_mm512_mask3_fnmadd_ph() {
20931 let a = _mm512_set1_ph(1.0);
20932 let b = _mm512_set1_ph(2.0);
20933 let c = _mm512_set1_ph(3.0);
20934 let r = _mm512_mask3_fnmadd_ph(a, b, c, 0b01010101010101010101010101010101);
20935 let e = _mm512_set_ph(
20936 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20937 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20938 );
20939 assert_eq_m512h(r, e);
20940 }
20941
20942 #[simd_test(enable = "avx512fp16")]
20943 unsafe fn test_mm512_maskz_fnmadd_ph() {
20944 let a = _mm512_set1_ph(1.0);
20945 let b = _mm512_set1_ph(2.0);
20946 let c = _mm512_set1_ph(3.0);
20947 let r = _mm512_maskz_fnmadd_ph(0b01010101010101010101010101010101, a, b, c);
20948 let e = _mm512_set_ph(
20949 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
20950 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
20951 );
20952 assert_eq_m512h(r, e);
20953 }
20954
20955 #[simd_test(enable = "avx512fp16")]
20956 unsafe fn test_mm512_fnmadd_round_ph() {
20957 let a = _mm512_set1_ph(1.0);
20958 let b = _mm512_set1_ph(2.0);
20959 let c = _mm512_set1_ph(3.0);
20960 let r =
20961 _mm512_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
20962 let e = _mm512_set1_ph(1.0);
20963 assert_eq_m512h(r, e);
20964 }
20965
20966 #[simd_test(enable = "avx512fp16")]
20967 unsafe fn test_mm512_mask_fnmadd_round_ph() {
20968 let a = _mm512_set1_ph(1.0);
20969 let b = _mm512_set1_ph(2.0);
20970 let c = _mm512_set1_ph(3.0);
20971 let r = _mm512_mask_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20972 a,
20973 0b01010101010101010101010101010101,
20974 b,
20975 c,
20976 );
20977 let e = _mm512_set_ph(
20978 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20979 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
20980 );
20981 assert_eq_m512h(r, e);
20982 }
20983
20984 #[simd_test(enable = "avx512fp16")]
20985 unsafe fn test_mm512_mask3_fnmadd_round_ph() {
20986 let a = _mm512_set1_ph(1.0);
20987 let b = _mm512_set1_ph(2.0);
20988 let c = _mm512_set1_ph(3.0);
20989 let r = _mm512_mask3_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
20990 a,
20991 b,
20992 c,
20993 0b01010101010101010101010101010101,
20994 );
20995 let e = _mm512_set_ph(
20996 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
20997 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
20998 );
20999 assert_eq_m512h(r, e);
21000 }
21001
21002 #[simd_test(enable = "avx512fp16")]
21003 unsafe fn test_mm512_maskz_fnmadd_round_ph() {
21004 let a = _mm512_set1_ph(1.0);
21005 let b = _mm512_set1_ph(2.0);
21006 let c = _mm512_set1_ph(3.0);
21007 let r = _mm512_maskz_fnmadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21008 0b01010101010101010101010101010101,
21009 a,
21010 b,
21011 c,
21012 );
21013 let e = _mm512_set_ph(
21014 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
21015 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
21016 );
21017 assert_eq_m512h(r, e);
21018 }
21019
21020 #[simd_test(enable = "avx512fp16,avx512vl")]
21021 unsafe fn test_mm_fnmadd_sh() {
21022 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21023 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21024 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21025 let r = _mm_fnmadd_sh(a, b, c);
21026 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21027 assert_eq_m128h(r, e);
21028 }
21029
21030 #[simd_test(enable = "avx512fp16,avx512vl")]
21031 unsafe fn test_mm_mask_fnmadd_sh() {
21032 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21033 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21034 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21035 let r = _mm_mask_fnmadd_sh(a, 0, b, c);
21036 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21037 assert_eq_m128h(r, e);
21038 let r = _mm_mask_fnmadd_sh(a, 1, b, c);
21039 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21040 assert_eq_m128h(r, e);
21041 }
21042
21043 #[simd_test(enable = "avx512fp16,avx512vl")]
21044 unsafe fn test_mm_mask3_fnmadd_sh() {
21045 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21046 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21047 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21048 let r = _mm_mask3_fnmadd_sh(a, b, c, 0);
21049 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21050 assert_eq_m128h(r, e);
21051 let r = _mm_mask3_fnmadd_sh(a, b, c, 1);
21052 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21053 assert_eq_m128h(r, e);
21054 }
21055
21056 #[simd_test(enable = "avx512fp16,avx512vl")]
21057 unsafe fn test_mm_maskz_fnmadd_sh() {
21058 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21059 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21060 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21061 let r = _mm_maskz_fnmadd_sh(0, a, b, c);
21062 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21063 assert_eq_m128h(r, e);
21064 let r = _mm_maskz_fnmadd_sh(1, a, b, c);
21065 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21066 assert_eq_m128h(r, e);
21067 }
21068
21069 #[simd_test(enable = "avx512fp16,avx512vl")]
21070 unsafe fn test_mm_fnmadd_round_sh() {
21071 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21072 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21073 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21074 let r = _mm_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21075 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21076 assert_eq_m128h(r, e);
21077 }
21078
21079 #[simd_test(enable = "avx512fp16,avx512vl")]
21080 unsafe fn test_mm_mask_fnmadd_round_sh() {
21081 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21082 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21083 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21084 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21085 a, 0, b, c,
21086 );
21087 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21088 assert_eq_m128h(r, e);
21089 let r = _mm_mask_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21090 a, 1, b, c,
21091 );
21092 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21093 assert_eq_m128h(r, e);
21094 }
21095
21096 #[simd_test(enable = "avx512fp16,avx512vl")]
21097 unsafe fn test_mm_mask3_fnmadd_round_sh() {
21098 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21099 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21100 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21101 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21102 a, b, c, 0,
21103 );
21104 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21105 assert_eq_m128h(r, e);
21106 let r = _mm_mask3_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21107 a, b, c, 1,
21108 );
21109 let e = _mm_setr_ph(1.0, 30., 31., 32., 33., 34., 35., 36.);
21110 assert_eq_m128h(r, e);
21111 }
21112
21113 #[simd_test(enable = "avx512fp16,avx512vl")]
21114 unsafe fn test_mm_maskz_fnmadd_round_sh() {
21115 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21116 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21117 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21118 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21119 0, a, b, c,
21120 );
21121 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21122 assert_eq_m128h(r, e);
21123 let r = _mm_maskz_fnmadd_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21124 1, a, b, c,
21125 );
21126 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21127 assert_eq_m128h(r, e);
21128 }
21129
21130 #[simd_test(enable = "avx512fp16,avx512vl")]
21131 unsafe fn test_mm_fnmsub_ph() {
21132 let a = _mm_set1_ph(1.0);
21133 let b = _mm_set1_ph(2.0);
21134 let c = _mm_set1_ph(3.0);
21135 let r = _mm_fnmsub_ph(a, b, c);
21136 let e = _mm_set1_ph(-5.0);
21137 assert_eq_m128h(r, e);
21138 }
21139
21140 #[simd_test(enable = "avx512fp16,avx512vl")]
21141 unsafe fn test_mm_mask_fnmsub_ph() {
21142 let a = _mm_set1_ph(1.0);
21143 let b = _mm_set1_ph(2.0);
21144 let c = _mm_set1_ph(3.0);
21145 let r = _mm_mask_fnmsub_ph(a, 0b01010101, b, c);
21146 let e = _mm_set_ph(1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0);
21147 assert_eq_m128h(r, e);
21148 }
21149
21150 #[simd_test(enable = "avx512fp16,avx512vl")]
21151 unsafe fn test_mm_mask3_fnmsub_ph() {
21152 let a = _mm_set1_ph(1.0);
21153 let b = _mm_set1_ph(2.0);
21154 let c = _mm_set1_ph(3.0);
21155 let r = _mm_mask3_fnmsub_ph(a, b, c, 0b01010101);
21156 let e = _mm_set_ph(3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0);
21157 assert_eq_m128h(r, e);
21158 }
21159
21160 #[simd_test(enable = "avx512fp16,avx512vl")]
21161 unsafe fn test_mm_maskz_fnmsub_ph() {
21162 let a = _mm_set1_ph(1.0);
21163 let b = _mm_set1_ph(2.0);
21164 let c = _mm_set1_ph(3.0);
21165 let r = _mm_maskz_fnmsub_ph(0b01010101, a, b, c);
21166 let e = _mm_set_ph(0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0);
21167 assert_eq_m128h(r, e);
21168 }
21169
21170 #[simd_test(enable = "avx512fp16,avx512vl")]
21171 unsafe fn test_mm256_fnmsub_ph() {
21172 let a = _mm256_set1_ph(1.0);
21173 let b = _mm256_set1_ph(2.0);
21174 let c = _mm256_set1_ph(3.0);
21175 let r = _mm256_fnmsub_ph(a, b, c);
21176 let e = _mm256_set1_ph(-5.0);
21177 assert_eq_m256h(r, e);
21178 }
21179
21180 #[simd_test(enable = "avx512fp16,avx512vl")]
21181 unsafe fn test_mm256_mask_fnmsub_ph() {
21182 let a = _mm256_set1_ph(1.0);
21183 let b = _mm256_set1_ph(2.0);
21184 let c = _mm256_set1_ph(3.0);
21185 let r = _mm256_mask_fnmsub_ph(a, 0b0101010101010101, b, c);
21186 let e = _mm256_set_ph(
21187 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21188 );
21189 assert_eq_m256h(r, e);
21190 }
21191
21192 #[simd_test(enable = "avx512fp16,avx512vl")]
21193 unsafe fn test_mm256_mask3_fnmsub_ph() {
21194 let a = _mm256_set1_ph(1.0);
21195 let b = _mm256_set1_ph(2.0);
21196 let c = _mm256_set1_ph(3.0);
21197 let r = _mm256_mask3_fnmsub_ph(a, b, c, 0b0101010101010101);
21198 let e = _mm256_set_ph(
21199 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21200 );
21201 assert_eq_m256h(r, e);
21202 }
21203
21204 #[simd_test(enable = "avx512fp16,avx512vl")]
21205 unsafe fn test_mm256_maskz_fnmsub_ph() {
21206 let a = _mm256_set1_ph(1.0);
21207 let b = _mm256_set1_ph(2.0);
21208 let c = _mm256_set1_ph(3.0);
21209 let r = _mm256_maskz_fnmsub_ph(0b0101010101010101, a, b, c);
21210 let e = _mm256_set_ph(
21211 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21212 );
21213 assert_eq_m256h(r, e);
21214 }
21215
21216 #[simd_test(enable = "avx512fp16")]
21217 unsafe fn test_mm512_fnmsub_ph() {
21218 let a = _mm512_set1_ph(1.0);
21219 let b = _mm512_set1_ph(2.0);
21220 let c = _mm512_set1_ph(3.0);
21221 let r = _mm512_fnmsub_ph(a, b, c);
21222 let e = _mm512_set1_ph(-5.0);
21223 assert_eq_m512h(r, e);
21224 }
21225
21226 #[simd_test(enable = "avx512fp16")]
21227 unsafe fn test_mm512_mask_fnmsub_ph() {
21228 let a = _mm512_set1_ph(1.0);
21229 let b = _mm512_set1_ph(2.0);
21230 let c = _mm512_set1_ph(3.0);
21231 let r = _mm512_mask_fnmsub_ph(a, 0b01010101010101010101010101010101, b, c);
21232 let e = _mm512_set_ph(
21233 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21234 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21235 );
21236 assert_eq_m512h(r, e);
21237 }
21238
21239 #[simd_test(enable = "avx512fp16")]
21240 unsafe fn test_mm512_mask3_fnmsub_ph() {
21241 let a = _mm512_set1_ph(1.0);
21242 let b = _mm512_set1_ph(2.0);
21243 let c = _mm512_set1_ph(3.0);
21244 let r = _mm512_mask3_fnmsub_ph(a, b, c, 0b01010101010101010101010101010101);
21245 let e = _mm512_set_ph(
21246 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21247 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21248 );
21249 assert_eq_m512h(r, e);
21250 }
21251
21252 #[simd_test(enable = "avx512fp16")]
21253 unsafe fn test_mm512_maskz_fnmsub_ph() {
21254 let a = _mm512_set1_ph(1.0);
21255 let b = _mm512_set1_ph(2.0);
21256 let c = _mm512_set1_ph(3.0);
21257 let r = _mm512_maskz_fnmsub_ph(0b01010101010101010101010101010101, a, b, c);
21258 let e = _mm512_set_ph(
21259 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21260 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21261 );
21262 assert_eq_m512h(r, e);
21263 }
21264
21265 #[simd_test(enable = "avx512fp16")]
21266 unsafe fn test_mm512_fnmsub_round_ph() {
21267 let a = _mm512_set1_ph(1.0);
21268 let b = _mm512_set1_ph(2.0);
21269 let c = _mm512_set1_ph(3.0);
21270 let r =
21271 _mm512_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21272 let e = _mm512_set1_ph(-5.0);
21273 assert_eq_m512h(r, e);
21274 }
21275
21276 #[simd_test(enable = "avx512fp16")]
21277 unsafe fn test_mm512_mask_fnmsub_round_ph() {
21278 let a = _mm512_set1_ph(1.0);
21279 let b = _mm512_set1_ph(2.0);
21280 let c = _mm512_set1_ph(3.0);
21281 let r = _mm512_mask_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21282 a,
21283 0b01010101010101010101010101010101,
21284 b,
21285 c,
21286 );
21287 let e = _mm512_set_ph(
21288 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21289 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0, 1.0, -5.0,
21290 );
21291 assert_eq_m512h(r, e);
21292 }
21293
21294 #[simd_test(enable = "avx512fp16")]
21295 unsafe fn test_mm512_mask3_fnmsub_round_ph() {
21296 let a = _mm512_set1_ph(1.0);
21297 let b = _mm512_set1_ph(2.0);
21298 let c = _mm512_set1_ph(3.0);
21299 let r = _mm512_mask3_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21300 a,
21301 b,
21302 c,
21303 0b01010101010101010101010101010101,
21304 );
21305 let e = _mm512_set_ph(
21306 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21307 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0, 3.0, -5.0,
21308 );
21309 assert_eq_m512h(r, e);
21310 }
21311
21312 #[simd_test(enable = "avx512fp16")]
21313 unsafe fn test_mm512_maskz_fnmsub_round_ph() {
21314 let a = _mm512_set1_ph(1.0);
21315 let b = _mm512_set1_ph(2.0);
21316 let c = _mm512_set1_ph(3.0);
21317 let r = _mm512_maskz_fnmsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21318 0b01010101010101010101010101010101,
21319 a,
21320 b,
21321 c,
21322 );
21323 let e = _mm512_set_ph(
21324 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21325 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0, 0.0, -5.0,
21326 );
21327 assert_eq_m512h(r, e);
21328 }
21329
21330 #[simd_test(enable = "avx512fp16,avx512vl")]
21331 unsafe fn test_mm_fnmsub_sh() {
21332 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21333 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21334 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21335 let r = _mm_fnmsub_sh(a, b, c);
21336 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21337 assert_eq_m128h(r, e);
21338 }
21339
21340 #[simd_test(enable = "avx512fp16,avx512vl")]
21341 unsafe fn test_mm_mask_fnmsub_sh() {
21342 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21343 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21344 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21345 let r = _mm_mask_fnmsub_sh(a, 0, b, c);
21346 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21347 assert_eq_m128h(r, e);
21348 let r = _mm_mask_fnmsub_sh(a, 1, b, c);
21349 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21350 assert_eq_m128h(r, e);
21351 }
21352
21353 #[simd_test(enable = "avx512fp16,avx512vl")]
21354 unsafe fn test_mm_mask3_fnmsub_sh() {
21355 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21356 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21357 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21358 let r = _mm_mask3_fnmsub_sh(a, b, c, 0);
21359 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21360 assert_eq_m128h(r, e);
21361 let r = _mm_mask3_fnmsub_sh(a, b, c, 1);
21362 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21363 assert_eq_m128h(r, e);
21364 }
21365
21366 #[simd_test(enable = "avx512fp16,avx512vl")]
21367 unsafe fn test_mm_maskz_fnmsub_sh() {
21368 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21369 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21370 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21371 let r = _mm_maskz_fnmsub_sh(0, a, b, c);
21372 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21373 assert_eq_m128h(r, e);
21374 let r = _mm_maskz_fnmsub_sh(1, a, b, c);
21375 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21376 assert_eq_m128h(r, e);
21377 }
21378
21379 #[simd_test(enable = "avx512fp16,avx512vl")]
21380 unsafe fn test_mm_fnmsub_round_sh() {
21381 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21382 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21383 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21384 let r = _mm_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21385 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21386 assert_eq_m128h(r, e);
21387 }
21388
21389 #[simd_test(enable = "avx512fp16,avx512vl")]
21390 unsafe fn test_mm_mask_fnmsub_round_sh() {
21391 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21392 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21393 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21394 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21395 a, 0, b, c,
21396 );
21397 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21398 assert_eq_m128h(r, e);
21399 let r = _mm_mask_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21400 a, 1, b, c,
21401 );
21402 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21403 assert_eq_m128h(r, e);
21404 }
21405
21406 #[simd_test(enable = "avx512fp16,avx512vl")]
21407 unsafe fn test_mm_mask3_fnmsub_round_sh() {
21408 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21409 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21410 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21411 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21412 a, b, c, 0,
21413 );
21414 let e = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21415 assert_eq_m128h(r, e);
21416 let r = _mm_mask3_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21417 a, b, c, 1,
21418 );
21419 let e = _mm_setr_ph(-5.0, 30., 31., 32., 33., 34., 35., 36.);
21420 assert_eq_m128h(r, e);
21421 }
21422
21423 #[simd_test(enable = "avx512fp16,avx512vl")]
21424 unsafe fn test_mm_maskz_fnmsub_round_sh() {
21425 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
21426 let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
21427 let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
21428 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21429 0, a, b, c,
21430 );
21431 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
21432 assert_eq_m128h(r, e);
21433 let r = _mm_maskz_fnmsub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21434 1, a, b, c,
21435 );
21436 let e = _mm_setr_ph(-5.0, 10., 11., 12., 13., 14., 15., 16.);
21437 assert_eq_m128h(r, e);
21438 }
21439
21440 #[simd_test(enable = "avx512fp16,avx512vl")]
21441 unsafe fn test_mm_fmaddsub_ph() {
21442 let a = _mm_set1_ph(1.0);
21443 let b = _mm_set1_ph(2.0);
21444 let c = _mm_set1_ph(3.0);
21445 let r = _mm_fmaddsub_ph(a, b, c);
21446 let e = _mm_set_ph(5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0);
21447 assert_eq_m128h(r, e);
21448 }
21449
21450 #[simd_test(enable = "avx512fp16,avx512vl")]
21451 unsafe fn test_mm_mask_fmaddsub_ph() {
21452 let a = _mm_set1_ph(1.0);
21453 let b = _mm_set1_ph(2.0);
21454 let c = _mm_set1_ph(3.0);
21455 let r = _mm_mask_fmaddsub_ph(a, 0b00110011, b, c);
21456 let e = _mm_set_ph(1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0);
21457 assert_eq_m128h(r, e);
21458 }
21459
21460 #[simd_test(enable = "avx512fp16,avx512vl")]
21461 unsafe fn test_mm_mask3_fmaddsub_ph() {
21462 let a = _mm_set1_ph(1.0);
21463 let b = _mm_set1_ph(2.0);
21464 let c = _mm_set1_ph(3.0);
21465 let r = _mm_mask3_fmaddsub_ph(a, b, c, 0b00110011);
21466 let e = _mm_set_ph(3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0);
21467 assert_eq_m128h(r, e);
21468 }
21469
21470 #[simd_test(enable = "avx512fp16,avx512vl")]
21471 unsafe fn test_mm_maskz_fmaddsub_ph() {
21472 let a = _mm_set1_ph(1.0);
21473 let b = _mm_set1_ph(2.0);
21474 let c = _mm_set1_ph(3.0);
21475 let r = _mm_maskz_fmaddsub_ph(0b00110011, a, b, c);
21476 let e = _mm_set_ph(0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0);
21477 assert_eq_m128h(r, e);
21478 }
21479
21480 #[simd_test(enable = "avx512fp16,avx512vl")]
21481 unsafe fn test_mm256_fmaddsub_ph() {
21482 let a = _mm256_set1_ph(1.0);
21483 let b = _mm256_set1_ph(2.0);
21484 let c = _mm256_set1_ph(3.0);
21485 let r = _mm256_fmaddsub_ph(a, b, c);
21486 let e = _mm256_set_ph(
21487 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21488 );
21489 assert_eq_m256h(r, e);
21490 }
21491
21492 #[simd_test(enable = "avx512fp16,avx512vl")]
21493 unsafe fn test_mm256_mask_fmaddsub_ph() {
21494 let a = _mm256_set1_ph(1.0);
21495 let b = _mm256_set1_ph(2.0);
21496 let c = _mm256_set1_ph(3.0);
21497 let r = _mm256_mask_fmaddsub_ph(a, 0b0011001100110011, b, c);
21498 let e = _mm256_set_ph(
21499 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21500 );
21501 assert_eq_m256h(r, e);
21502 }
21503
21504 #[simd_test(enable = "avx512fp16,avx512vl")]
21505 unsafe fn test_mm256_mask3_fmaddsub_ph() {
21506 let a = _mm256_set1_ph(1.0);
21507 let b = _mm256_set1_ph(2.0);
21508 let c = _mm256_set1_ph(3.0);
21509 let r = _mm256_mask3_fmaddsub_ph(a, b, c, 0b0011001100110011);
21510 let e = _mm256_set_ph(
21511 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21512 );
21513 assert_eq_m256h(r, e);
21514 }
21515
21516 #[simd_test(enable = "avx512fp16,avx512vl")]
21517 unsafe fn test_mm256_maskz_fmaddsub_ph() {
21518 let a = _mm256_set1_ph(1.0);
21519 let b = _mm256_set1_ph(2.0);
21520 let c = _mm256_set1_ph(3.0);
21521 let r = _mm256_maskz_fmaddsub_ph(0b0011001100110011, a, b, c);
21522 let e = _mm256_set_ph(
21523 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21524 );
21525 assert_eq_m256h(r, e);
21526 }
21527
21528 #[simd_test(enable = "avx512fp16")]
21529 unsafe fn test_mm512_fmaddsub_ph() {
21530 let a = _mm512_set1_ph(1.0);
21531 let b = _mm512_set1_ph(2.0);
21532 let c = _mm512_set1_ph(3.0);
21533 let r = _mm512_fmaddsub_ph(a, b, c);
21534 let e = _mm512_set_ph(
21535 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21536 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21537 );
21538 assert_eq_m512h(r, e);
21539 }
21540
21541 #[simd_test(enable = "avx512fp16")]
21542 unsafe fn test_mm512_mask_fmaddsub_ph() {
21543 let a = _mm512_set1_ph(1.0);
21544 let b = _mm512_set1_ph(2.0);
21545 let c = _mm512_set1_ph(3.0);
21546 let r = _mm512_mask_fmaddsub_ph(a, 0b00110011001100110011001100110011, b, c);
21547 let e = _mm512_set_ph(
21548 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21549 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21550 );
21551 assert_eq_m512h(r, e);
21552 }
21553
21554 #[simd_test(enable = "avx512fp16")]
21555 unsafe fn test_mm512_mask3_fmaddsub_ph() {
21556 let a = _mm512_set1_ph(1.0);
21557 let b = _mm512_set1_ph(2.0);
21558 let c = _mm512_set1_ph(3.0);
21559 let r = _mm512_mask3_fmaddsub_ph(a, b, c, 0b00110011001100110011001100110011);
21560 let e = _mm512_set_ph(
21561 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21562 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21563 );
21564 assert_eq_m512h(r, e);
21565 }
21566
21567 #[simd_test(enable = "avx512fp16")]
21568 unsafe fn test_mm512_maskz_fmaddsub_ph() {
21569 let a = _mm512_set1_ph(1.0);
21570 let b = _mm512_set1_ph(2.0);
21571 let c = _mm512_set1_ph(3.0);
21572 let r = _mm512_maskz_fmaddsub_ph(0b00110011001100110011001100110011, a, b, c);
21573 let e = _mm512_set_ph(
21574 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21575 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21576 );
21577 assert_eq_m512h(r, e);
21578 }
21579
21580 #[simd_test(enable = "avx512fp16")]
21581 unsafe fn test_mm512_fmaddsub_round_ph() {
21582 let a = _mm512_set1_ph(1.0);
21583 let b = _mm512_set1_ph(2.0);
21584 let c = _mm512_set1_ph(3.0);
21585 let r =
21586 _mm512_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21587 let e = _mm512_set_ph(
21588 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21589 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0,
21590 );
21591 assert_eq_m512h(r, e);
21592 }
21593
21594 #[simd_test(enable = "avx512fp16")]
21595 unsafe fn test_mm512_mask_fmaddsub_round_ph() {
21596 let a = _mm512_set1_ph(1.0);
21597 let b = _mm512_set1_ph(2.0);
21598 let c = _mm512_set1_ph(3.0);
21599 let r = _mm512_mask_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21600 a,
21601 0b00110011001100110011001100110011,
21602 b,
21603 c,
21604 );
21605 let e = _mm512_set_ph(
21606 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21607 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0, 1.0, 1.0, 5.0, -1.0,
21608 );
21609 assert_eq_m512h(r, e);
21610 }
21611
21612 #[simd_test(enable = "avx512fp16")]
21613 unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
21614 let a = _mm512_set1_ph(1.0);
21615 let b = _mm512_set1_ph(2.0);
21616 let c = _mm512_set1_ph(3.0);
21617 let r = _mm512_mask3_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21618 a,
21619 b,
21620 c,
21621 0b00110011001100110011001100110011,
21622 );
21623 let e = _mm512_set_ph(
21624 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21625 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0, 3.0, 3.0, 5.0, -1.0,
21626 );
21627 assert_eq_m512h(r, e);
21628 }
21629
21630 #[simd_test(enable = "avx512fp16")]
21631 unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
21632 let a = _mm512_set1_ph(1.0);
21633 let b = _mm512_set1_ph(2.0);
21634 let c = _mm512_set1_ph(3.0);
21635 let r = _mm512_maskz_fmaddsub_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21636 0b00110011001100110011001100110011,
21637 a,
21638 b,
21639 c,
21640 );
21641 let e = _mm512_set_ph(
21642 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21643 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0, 0.0, 0.0, 5.0, -1.0,
21644 );
21645 assert_eq_m512h(r, e);
21646 }
21647
21648 #[simd_test(enable = "avx512fp16,avx512vl")]
21649 unsafe fn test_mm_fmsubadd_ph() {
21650 let a = _mm_set1_ph(1.0);
21651 let b = _mm_set1_ph(2.0);
21652 let c = _mm_set1_ph(3.0);
21653 let r = _mm_fmsubadd_ph(a, b, c);
21654 let e = _mm_set_ph(-1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0);
21655 assert_eq_m128h(r, e);
21656 }
21657
21658 #[simd_test(enable = "avx512fp16,avx512vl")]
21659 unsafe fn test_mm_mask_fmsubadd_ph() {
21660 let a = _mm_set1_ph(1.0);
21661 let b = _mm_set1_ph(2.0);
21662 let c = _mm_set1_ph(3.0);
21663 let r = _mm_mask_fmsubadd_ph(a, 0b00110011, b, c);
21664 let e = _mm_set_ph(1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0);
21665 assert_eq_m128h(r, e);
21666 }
21667
21668 #[simd_test(enable = "avx512fp16,avx512vl")]
21669 unsafe fn test_mm_mask3_fmsubadd_ph() {
21670 let a = _mm_set1_ph(1.0);
21671 let b = _mm_set1_ph(2.0);
21672 let c = _mm_set1_ph(3.0);
21673 let r = _mm_mask3_fmsubadd_ph(a, b, c, 0b00110011);
21674 let e = _mm_set_ph(3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0);
21675 assert_eq_m128h(r, e);
21676 }
21677
21678 #[simd_test(enable = "avx512fp16,avx512vl")]
21679 unsafe fn test_mm_maskz_fmsubadd_ph() {
21680 let a = _mm_set1_ph(1.0);
21681 let b = _mm_set1_ph(2.0);
21682 let c = _mm_set1_ph(3.0);
21683 let r = _mm_maskz_fmsubadd_ph(0b00110011, a, b, c);
21684 let e = _mm_set_ph(0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0);
21685 assert_eq_m128h(r, e);
21686 }
21687
21688 #[simd_test(enable = "avx512fp16,avx512vl")]
21689 unsafe fn test_mm256_fmsubadd_ph() {
21690 let a = _mm256_set1_ph(1.0);
21691 let b = _mm256_set1_ph(2.0);
21692 let c = _mm256_set1_ph(3.0);
21693 let r = _mm256_fmsubadd_ph(a, b, c);
21694 let e = _mm256_set_ph(
21695 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21696 );
21697 assert_eq_m256h(r, e);
21698 }
21699
21700 #[simd_test(enable = "avx512fp16,avx512vl")]
21701 unsafe fn test_mm256_mask_fmsubadd_ph() {
21702 let a = _mm256_set1_ph(1.0);
21703 let b = _mm256_set1_ph(2.0);
21704 let c = _mm256_set1_ph(3.0);
21705 let r = _mm256_mask_fmsubadd_ph(a, 0b0011001100110011, b, c);
21706 let e = _mm256_set_ph(
21707 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21708 );
21709 assert_eq_m256h(r, e);
21710 }
21711
21712 #[simd_test(enable = "avx512fp16,avx512vl")]
21713 unsafe fn test_mm256_mask3_fmsubadd_ph() {
21714 let a = _mm256_set1_ph(1.0);
21715 let b = _mm256_set1_ph(2.0);
21716 let c = _mm256_set1_ph(3.0);
21717 let r = _mm256_mask3_fmsubadd_ph(a, b, c, 0b0011001100110011);
21718 let e = _mm256_set_ph(
21719 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21720 );
21721 assert_eq_m256h(r, e);
21722 }
21723
21724 #[simd_test(enable = "avx512fp16,avx512vl")]
21725 unsafe fn test_mm256_maskz_fmsubadd_ph() {
21726 let a = _mm256_set1_ph(1.0);
21727 let b = _mm256_set1_ph(2.0);
21728 let c = _mm256_set1_ph(3.0);
21729 let r = _mm256_maskz_fmsubadd_ph(0b0011001100110011, a, b, c);
21730 let e = _mm256_set_ph(
21731 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21732 );
21733 assert_eq_m256h(r, e);
21734 }
21735
21736 #[simd_test(enable = "avx512fp16")]
21737 unsafe fn test_mm512_fmsubadd_ph() {
21738 let a = _mm512_set1_ph(1.0);
21739 let b = _mm512_set1_ph(2.0);
21740 let c = _mm512_set1_ph(3.0);
21741 let r = _mm512_fmsubadd_ph(a, b, c);
21742 let e = _mm512_set_ph(
21743 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21744 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21745 );
21746 assert_eq_m512h(r, e);
21747 }
21748
21749 #[simd_test(enable = "avx512fp16")]
21750 unsafe fn test_mm512_mask_fmsubadd_ph() {
21751 let a = _mm512_set1_ph(1.0);
21752 let b = _mm512_set1_ph(2.0);
21753 let c = _mm512_set1_ph(3.0);
21754 let r = _mm512_mask_fmsubadd_ph(a, 0b00110011001100110011001100110011, b, c);
21755 let e = _mm512_set_ph(
21756 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21757 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21758 );
21759 assert_eq_m512h(r, e);
21760 }
21761
21762 #[simd_test(enable = "avx512fp16")]
21763 unsafe fn test_mm512_mask3_fmsubadd_ph() {
21764 let a = _mm512_set1_ph(1.0);
21765 let b = _mm512_set1_ph(2.0);
21766 let c = _mm512_set1_ph(3.0);
21767 let r = _mm512_mask3_fmsubadd_ph(a, b, c, 0b00110011001100110011001100110011);
21768 let e = _mm512_set_ph(
21769 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21770 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21771 );
21772 assert_eq_m512h(r, e);
21773 }
21774
21775 #[simd_test(enable = "avx512fp16")]
21776 unsafe fn test_mm512_maskz_fmsubadd_ph() {
21777 let a = _mm512_set1_ph(1.0);
21778 let b = _mm512_set1_ph(2.0);
21779 let c = _mm512_set1_ph(3.0);
21780 let r = _mm512_maskz_fmsubadd_ph(0b00110011001100110011001100110011, a, b, c);
21781 let e = _mm512_set_ph(
21782 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21783 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21784 );
21785 assert_eq_m512h(r, e);
21786 }
21787
21788 #[simd_test(enable = "avx512fp16")]
21789 unsafe fn test_mm512_fmsubadd_round_ph() {
21790 let a = _mm512_set1_ph(1.0);
21791 let b = _mm512_set1_ph(2.0);
21792 let c = _mm512_set1_ph(3.0);
21793 let r =
21794 _mm512_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
21795 let e = _mm512_set_ph(
21796 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21797 -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0, -1.0, 5.0,
21798 );
21799 assert_eq_m512h(r, e);
21800 }
21801
21802 #[simd_test(enable = "avx512fp16")]
21803 unsafe fn test_mm512_mask_fmsubadd_round_ph() {
21804 let a = _mm512_set1_ph(1.0);
21805 let b = _mm512_set1_ph(2.0);
21806 let c = _mm512_set1_ph(3.0);
21807 let r = _mm512_mask_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21808 a,
21809 0b00110011001100110011001100110011,
21810 b,
21811 c,
21812 );
21813 let e = _mm512_set_ph(
21814 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21815 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0, 1.0, 1.0, -1.0, 5.0,
21816 );
21817 assert_eq_m512h(r, e);
21818 }
21819
21820 #[simd_test(enable = "avx512fp16")]
21821 unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
21822 let a = _mm512_set1_ph(1.0);
21823 let b = _mm512_set1_ph(2.0);
21824 let c = _mm512_set1_ph(3.0);
21825 let r = _mm512_mask3_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21826 a,
21827 b,
21828 c,
21829 0b00110011001100110011001100110011,
21830 );
21831 let e = _mm512_set_ph(
21832 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21833 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0, 3.0, 3.0, -1.0, 5.0,
21834 );
21835 assert_eq_m512h(r, e);
21836 }
21837
21838 #[simd_test(enable = "avx512fp16")]
21839 unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
21840 let a = _mm512_set1_ph(1.0);
21841 let b = _mm512_set1_ph(2.0);
21842 let c = _mm512_set1_ph(3.0);
21843 let r = _mm512_maskz_fmsubadd_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
21844 0b00110011001100110011001100110011,
21845 a,
21846 b,
21847 c,
21848 );
21849 let e = _mm512_set_ph(
21850 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21851 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0, 0.0, 0.0, -1.0, 5.0,
21852 );
21853 assert_eq_m512h(r, e);
21854 }
21855
21856 #[simd_test(enable = "avx512fp16,avx512vl")]
21857 unsafe fn test_mm_rcp_ph() {
21858 let a = _mm_set1_ph(2.0);
21859 let r = _mm_rcp_ph(a);
21860 let e = _mm_set1_ph(0.5);
21861 assert_eq_m128h(r, e);
21862 }
21863
21864 #[simd_test(enable = "avx512fp16,avx512vl")]
21865 unsafe fn test_mm_mask_rcp_ph() {
21866 let a = _mm_set1_ph(2.0);
21867 let src = _mm_set1_ph(1.0);
21868 let r = _mm_mask_rcp_ph(src, 0b01010101, a);
21869 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21870 assert_eq_m128h(r, e);
21871 }
21872
21873 #[simd_test(enable = "avx512fp16,avx512vl")]
21874 unsafe fn test_mm_maskz_rcp_ph() {
21875 let a = _mm_set1_ph(2.0);
21876 let r = _mm_maskz_rcp_ph(0b01010101, a);
21877 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21878 assert_eq_m128h(r, e);
21879 }
21880
21881 #[simd_test(enable = "avx512fp16,avx512vl")]
21882 unsafe fn test_mm256_rcp_ph() {
21883 let a = _mm256_set1_ph(2.0);
21884 let r = _mm256_rcp_ph(a);
21885 let e = _mm256_set1_ph(0.5);
21886 assert_eq_m256h(r, e);
21887 }
21888
21889 #[simd_test(enable = "avx512fp16,avx512vl")]
21890 unsafe fn test_mm256_mask_rcp_ph() {
21891 let a = _mm256_set1_ph(2.0);
21892 let src = _mm256_set1_ph(1.0);
21893 let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
21894 let e = _mm256_set_ph(
21895 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21896 );
21897 assert_eq_m256h(r, e);
21898 }
21899
21900 #[simd_test(enable = "avx512fp16,avx512vl")]
21901 unsafe fn test_mm256_maskz_rcp_ph() {
21902 let a = _mm256_set1_ph(2.0);
21903 let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
21904 let e = _mm256_set_ph(
21905 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21906 );
21907 assert_eq_m256h(r, e);
21908 }
21909
21910 #[simd_test(enable = "avx512fp16")]
21911 unsafe fn test_mm512_rcp_ph() {
21912 let a = _mm512_set1_ph(2.0);
21913 let r = _mm512_rcp_ph(a);
21914 let e = _mm512_set1_ph(0.5);
21915 assert_eq_m512h(r, e);
21916 }
21917
21918 #[simd_test(enable = "avx512fp16")]
21919 unsafe fn test_mm512_mask_rcp_ph() {
21920 let a = _mm512_set1_ph(2.0);
21921 let src = _mm512_set1_ph(1.0);
21922 let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
21923 let e = _mm512_set_ph(
21924 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
21925 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
21926 );
21927 assert_eq_m512h(r, e);
21928 }
21929
21930 #[simd_test(enable = "avx512fp16")]
21931 unsafe fn test_mm512_maskz_rcp_ph() {
21932 let a = _mm512_set1_ph(2.0);
21933 let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
21934 let e = _mm512_set_ph(
21935 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
21936 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
21937 );
21938 assert_eq_m512h(r, e);
21939 }
21940
21941 #[simd_test(enable = "avx512fp16,avx512vl")]
21942 unsafe fn test_mm_rcp_sh() {
21943 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21944 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21945 let r = _mm_rcp_sh(a, b);
21946 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21947 assert_eq_m128h(r, e);
21948 }
21949
21950 #[simd_test(enable = "avx512fp16,avx512vl")]
21951 unsafe fn test_mm_mask_rcp_sh() {
21952 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21953 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21954 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
21955 let r = _mm_mask_rcp_sh(src, 0, a, b);
21956 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21957 assert_eq_m128h(r, e);
21958 let r = _mm_mask_rcp_sh(src, 1, a, b);
21959 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21960 assert_eq_m128h(r, e);
21961 }
21962
21963 #[simd_test(enable = "avx512fp16,avx512vl")]
21964 unsafe fn test_mm_maskz_rcp_sh() {
21965 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21966 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
21967 let r = _mm_maskz_rcp_sh(0, a, b);
21968 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21969 assert_eq_m128h(r, e);
21970 let r = _mm_maskz_rcp_sh(1, a, b);
21971 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
21972 assert_eq_m128h(r, e);
21973 }
21974
21975 #[simd_test(enable = "avx512fp16,avx512vl")]
21976 unsafe fn test_mm_rsqrt_ph() {
21977 let a = _mm_set1_ph(4.0);
21978 let r = _mm_rsqrt_ph(a);
21979 let e = _mm_set1_ph(0.5);
21980 assert_eq_m128h(r, e);
21981 }
21982
21983 #[simd_test(enable = "avx512fp16,avx512vl")]
21984 unsafe fn test_mm_mask_rsqrt_ph() {
21985 let a = _mm_set1_ph(4.0);
21986 let src = _mm_set1_ph(1.0);
21987 let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
21988 let e = _mm_set_ph(1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5);
21989 assert_eq_m128h(r, e);
21990 }
21991
21992 #[simd_test(enable = "avx512fp16,avx512vl")]
21993 unsafe fn test_mm_maskz_rsqrt_ph() {
21994 let a = _mm_set1_ph(4.0);
21995 let r = _mm_maskz_rsqrt_ph(0b01010101, a);
21996 let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
21997 assert_eq_m128h(r, e);
21998 }
21999
22000 #[simd_test(enable = "avx512fp16,avx512vl")]
22001 unsafe fn test_mm256_rsqrt_ph() {
22002 let a = _mm256_set1_ph(4.0);
22003 let r = _mm256_rsqrt_ph(a);
22004 let e = _mm256_set1_ph(0.5);
22005 assert_eq_m256h(r, e);
22006 }
22007
22008 #[simd_test(enable = "avx512fp16,avx512vl")]
22009 unsafe fn test_mm256_mask_rsqrt_ph() {
22010 let a = _mm256_set1_ph(4.0);
22011 let src = _mm256_set1_ph(1.0);
22012 let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
22013 let e = _mm256_set_ph(
22014 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22015 );
22016 assert_eq_m256h(r, e);
22017 }
22018
22019 #[simd_test(enable = "avx512fp16,avx512vl")]
22020 unsafe fn test_mm256_maskz_rsqrt_ph() {
22021 let a = _mm256_set1_ph(4.0);
22022 let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
22023 let e = _mm256_set_ph(
22024 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22025 );
22026 assert_eq_m256h(r, e);
22027 }
22028
22029 #[simd_test(enable = "avx512fp16")]
22030 unsafe fn test_mm512_rsqrt_ph() {
22031 let a = _mm512_set1_ph(4.0);
22032 let r = _mm512_rsqrt_ph(a);
22033 let e = _mm512_set1_ph(0.5);
22034 assert_eq_m512h(r, e);
22035 }
22036
22037 #[simd_test(enable = "avx512fp16")]
22038 unsafe fn test_mm512_mask_rsqrt_ph() {
22039 let a = _mm512_set1_ph(4.0);
22040 let src = _mm512_set1_ph(1.0);
22041 let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
22042 let e = _mm512_set_ph(
22043 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0,
22044 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5, 1.0, 0.5,
22045 );
22046 assert_eq_m512h(r, e);
22047 }
22048
22049 #[simd_test(enable = "avx512fp16")]
22050 unsafe fn test_mm512_maskz_rsqrt_ph() {
22051 let a = _mm512_set1_ph(4.0);
22052 let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
22053 let e = _mm512_set_ph(
22054 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0,
22055 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5,
22056 );
22057 assert_eq_m512h(r, e);
22058 }
22059
22060 #[simd_test(enable = "avx512fp16,avx512vl")]
22061 unsafe fn test_mm_rsqrt_sh() {
22062 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22063 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22064 let r = _mm_rsqrt_sh(a, b);
22065 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22066 assert_eq_m128h(r, e);
22067 }
22068
22069 #[simd_test(enable = "avx512fp16,avx512vl")]
22070 unsafe fn test_mm_mask_rsqrt_sh() {
22071 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22072 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22073 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22074 let r = _mm_mask_rsqrt_sh(src, 0, a, b);
22075 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22076 assert_eq_m128h(r, e);
22077 let r = _mm_mask_rsqrt_sh(src, 1, a, b);
22078 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22079 assert_eq_m128h(r, e);
22080 }
22081
22082 #[simd_test(enable = "avx512fp16,avx512vl")]
22083 unsafe fn test_mm_maskz_rsqrt_sh() {
22084 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22085 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22086 let r = _mm_maskz_rsqrt_sh(0, a, b);
22087 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22088 assert_eq_m128h(r, e);
22089 let r = _mm_maskz_rsqrt_sh(1, a, b);
22090 let e = _mm_setr_ph(0.5, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22091 assert_eq_m128h(r, e);
22092 }
22093
22094 #[simd_test(enable = "avx512fp16,avx512vl")]
22095 unsafe fn test_mm_sqrt_ph() {
22096 let a = _mm_set1_ph(4.0);
22097 let r = _mm_sqrt_ph(a);
22098 let e = _mm_set1_ph(2.0);
22099 assert_eq_m128h(r, e);
22100 }
22101
22102 #[simd_test(enable = "avx512fp16,avx512vl")]
22103 unsafe fn test_mm_mask_sqrt_ph() {
22104 let a = _mm_set1_ph(4.0);
22105 let src = _mm_set1_ph(1.0);
22106 let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
22107 let e = _mm_set_ph(1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0);
22108 assert_eq_m128h(r, e);
22109 }
22110
22111 #[simd_test(enable = "avx512fp16,avx512vl")]
22112 unsafe fn test_mm_maskz_sqrt_ph() {
22113 let a = _mm_set1_ph(4.0);
22114 let r = _mm_maskz_sqrt_ph(0b01010101, a);
22115 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22116 assert_eq_m128h(r, e);
22117 }
22118
22119 #[simd_test(enable = "avx512fp16,avx512vl")]
22120 unsafe fn test_mm256_sqrt_ph() {
22121 let a = _mm256_set1_ph(4.0);
22122 let r = _mm256_sqrt_ph(a);
22123 let e = _mm256_set1_ph(2.0);
22124 assert_eq_m256h(r, e);
22125 }
22126
22127 #[simd_test(enable = "avx512fp16,avx512vl")]
22128 unsafe fn test_mm256_mask_sqrt_ph() {
22129 let a = _mm256_set1_ph(4.0);
22130 let src = _mm256_set1_ph(1.0);
22131 let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
22132 let e = _mm256_set_ph(
22133 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22134 );
22135 assert_eq_m256h(r, e);
22136 }
22137
22138 #[simd_test(enable = "avx512fp16,avx512vl")]
22139 unsafe fn test_mm256_maskz_sqrt_ph() {
22140 let a = _mm256_set1_ph(4.0);
22141 let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
22142 let e = _mm256_set_ph(
22143 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22144 );
22145 assert_eq_m256h(r, e);
22146 }
22147
22148 #[simd_test(enable = "avx512fp16")]
22149 unsafe fn test_mm512_sqrt_ph() {
22150 let a = _mm512_set1_ph(4.0);
22151 let r = _mm512_sqrt_ph(a);
22152 let e = _mm512_set1_ph(2.0);
22153 assert_eq_m512h(r, e);
22154 }
22155
22156 #[simd_test(enable = "avx512fp16")]
22157 unsafe fn test_mm512_mask_sqrt_ph() {
22158 let a = _mm512_set1_ph(4.0);
22159 let src = _mm512_set1_ph(1.0);
22160 let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
22161 let e = _mm512_set_ph(
22162 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22163 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22164 );
22165 assert_eq_m512h(r, e);
22166 }
22167
22168 #[simd_test(enable = "avx512fp16")]
22169 unsafe fn test_mm512_maskz_sqrt_ph() {
22170 let a = _mm512_set1_ph(4.0);
22171 let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
22172 let e = _mm512_set_ph(
22173 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22174 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22175 );
22176 assert_eq_m512h(r, e);
22177 }
22178
22179 #[simd_test(enable = "avx512fp16")]
22180 unsafe fn test_mm512_sqrt_round_ph() {
22181 let a = _mm512_set1_ph(4.0);
22182 let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
22183 let e = _mm512_set1_ph(2.0);
22184 assert_eq_m512h(r, e);
22185 }
22186
22187 #[simd_test(enable = "avx512fp16")]
22188 unsafe fn test_mm512_mask_sqrt_round_ph() {
22189 let a = _mm512_set1_ph(4.0);
22190 let src = _mm512_set1_ph(1.0);
22191 let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22192 src,
22193 0b01010101010101010101010101010101,
22194 a,
22195 );
22196 let e = _mm512_set_ph(
22197 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
22198 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
22199 );
22200 assert_eq_m512h(r, e);
22201 }
22202
22203 #[simd_test(enable = "avx512fp16")]
22204 unsafe fn test_mm512_maskz_sqrt_round_ph() {
22205 let a = _mm512_set1_ph(4.0);
22206 let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22207 0b01010101010101010101010101010101,
22208 a,
22209 );
22210 let e = _mm512_set_ph(
22211 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22212 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22213 );
22214 assert_eq_m512h(r, e);
22215 }
22216
22217 #[simd_test(enable = "avx512fp16,avx512vl")]
22218 unsafe fn test_mm_sqrt_sh() {
22219 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22220 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22221 let r = _mm_sqrt_sh(a, b);
22222 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22223 assert_eq_m128h(r, e);
22224 }
22225
22226 #[simd_test(enable = "avx512fp16,avx512vl")]
22227 unsafe fn test_mm_mask_sqrt_sh() {
22228 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22229 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22230 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22231 let r = _mm_mask_sqrt_sh(src, 0, a, b);
22232 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22233 assert_eq_m128h(r, e);
22234 let r = _mm_mask_sqrt_sh(src, 1, a, b);
22235 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22236 assert_eq_m128h(r, e);
22237 }
22238
22239 #[simd_test(enable = "avx512fp16,avx512vl")]
22240 unsafe fn test_mm_maskz_sqrt_sh() {
22241 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22242 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22243 let r = _mm_maskz_sqrt_sh(0, a, b);
22244 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22245 assert_eq_m128h(r, e);
22246 let r = _mm_maskz_sqrt_sh(1, a, b);
22247 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22248 assert_eq_m128h(r, e);
22249 }
22250
22251 #[simd_test(enable = "avx512fp16,avx512vl")]
22252 unsafe fn test_mm_sqrt_round_sh() {
22253 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22254 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22255 let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22256 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22257 assert_eq_m128h(r, e);
22258 }
22259
22260 #[simd_test(enable = "avx512fp16,avx512vl")]
22261 unsafe fn test_mm_mask_sqrt_round_sh() {
22262 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22263 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22264 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22265 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22266 src, 0, a, b,
22267 );
22268 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22269 assert_eq_m128h(r, e);
22270 let r = _mm_mask_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22271 src, 1, a, b,
22272 );
22273 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22274 assert_eq_m128h(r, e);
22275 }
22276
22277 #[simd_test(enable = "avx512fp16,avx512vl")]
22278 unsafe fn test_mm_maskz_sqrt_round_sh() {
22279 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22280 let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
22281 let r =
22282 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22283 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22284 assert_eq_m128h(r, e);
22285 let r =
22286 _mm_maskz_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22287 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22288 assert_eq_m128h(r, e);
22289 }
22290
22291 #[simd_test(enable = "avx512fp16,avx512vl")]
22292 unsafe fn test_mm_max_ph() {
22293 let a = _mm_set1_ph(2.0);
22294 let b = _mm_set1_ph(1.0);
22295 let r = _mm_max_ph(a, b);
22296 let e = _mm_set1_ph(2.0);
22297 assert_eq_m128h(r, e);
22298 }
22299
22300 #[simd_test(enable = "avx512fp16,avx512vl")]
22301 unsafe fn test_mm_mask_max_ph() {
22302 let a = _mm_set1_ph(2.0);
22303 let b = _mm_set1_ph(1.0);
22304 let src = _mm_set1_ph(3.0);
22305 let r = _mm_mask_max_ph(src, 0b01010101, a, b);
22306 let e = _mm_set_ph(3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0);
22307 assert_eq_m128h(r, e);
22308 }
22309
22310 #[simd_test(enable = "avx512fp16,avx512vl")]
22311 unsafe fn test_mm_maskz_max_ph() {
22312 let a = _mm_set1_ph(2.0);
22313 let b = _mm_set1_ph(1.0);
22314 let r = _mm_maskz_max_ph(0b01010101, a, b);
22315 let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
22316 assert_eq_m128h(r, e);
22317 }
22318
22319 #[simd_test(enable = "avx512fp16,avx512vl")]
22320 unsafe fn test_mm256_max_ph() {
22321 let a = _mm256_set1_ph(2.0);
22322 let b = _mm256_set1_ph(1.0);
22323 let r = _mm256_max_ph(a, b);
22324 let e = _mm256_set1_ph(2.0);
22325 assert_eq_m256h(r, e);
22326 }
22327
22328 #[simd_test(enable = "avx512fp16,avx512vl")]
22329 unsafe fn test_mm256_mask_max_ph() {
22330 let a = _mm256_set1_ph(2.0);
22331 let b = _mm256_set1_ph(1.0);
22332 let src = _mm256_set1_ph(3.0);
22333 let r = _mm256_mask_max_ph(src, 0b0101010101010101, a, b);
22334 let e = _mm256_set_ph(
22335 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22336 );
22337 assert_eq_m256h(r, e);
22338 }
22339
22340 #[simd_test(enable = "avx512fp16,avx512vl")]
22341 unsafe fn test_mm256_maskz_max_ph() {
22342 let a = _mm256_set1_ph(2.0);
22343 let b = _mm256_set1_ph(1.0);
22344 let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
22345 let e = _mm256_set_ph(
22346 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22347 );
22348 assert_eq_m256h(r, e);
22349 }
22350
22351 #[simd_test(enable = "avx512fp16")]
22352 unsafe fn test_mm512_max_ph() {
22353 let a = _mm512_set1_ph(2.0);
22354 let b = _mm512_set1_ph(1.0);
22355 let r = _mm512_max_ph(a, b);
22356 let e = _mm512_set1_ph(2.0);
22357 assert_eq_m512h(r, e);
22358 }
22359
22360 #[simd_test(enable = "avx512fp16")]
22361 unsafe fn test_mm512_mask_max_ph() {
22362 let a = _mm512_set1_ph(2.0);
22363 let b = _mm512_set1_ph(1.0);
22364 let src = _mm512_set1_ph(3.0);
22365 let r = _mm512_mask_max_ph(src, 0b01010101010101010101010101010101, a, b);
22366 let e = _mm512_set_ph(
22367 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22368 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22369 );
22370 assert_eq_m512h(r, e);
22371 }
22372
22373 #[simd_test(enable = "avx512fp16")]
22374 unsafe fn test_mm512_maskz_max_ph() {
22375 let a = _mm512_set1_ph(2.0);
22376 let b = _mm512_set1_ph(1.0);
22377 let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
22378 let e = _mm512_set_ph(
22379 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22380 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22381 );
22382 assert_eq_m512h(r, e);
22383 }
22384
22385 #[simd_test(enable = "avx512fp16")]
22386 unsafe fn test_mm512_max_round_ph() {
22387 let a = _mm512_set1_ph(2.0);
22388 let b = _mm512_set1_ph(1.0);
22389 let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22390 let e = _mm512_set1_ph(2.0);
22391 assert_eq_m512h(r, e);
22392 }
22393
22394 #[simd_test(enable = "avx512fp16")]
22395 unsafe fn test_mm512_mask_max_round_ph() {
22396 let a = _mm512_set1_ph(2.0);
22397 let b = _mm512_set1_ph(1.0);
22398 let src = _mm512_set1_ph(3.0);
22399 let r = _mm512_mask_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22400 src,
22401 0b01010101010101010101010101010101,
22402 a,
22403 b,
22404 );
22405 let e = _mm512_set_ph(
22406 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0,
22407 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0,
22408 );
22409 assert_eq_m512h(r, e);
22410 }
22411
22412 #[simd_test(enable = "avx512fp16")]
22413 unsafe fn test_mm512_maskz_max_round_ph() {
22414 let a = _mm512_set1_ph(2.0);
22415 let b = _mm512_set1_ph(1.0);
22416 let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22417 0b01010101010101010101010101010101,
22418 a,
22419 b,
22420 );
22421 let e = _mm512_set_ph(
22422 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0,
22423 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0,
22424 );
22425 assert_eq_m512h(r, e);
22426 }
22427
22428 #[simd_test(enable = "avx512fp16,avx512vl")]
22429 unsafe fn test_mm_max_sh() {
22430 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22431 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22432 let r = _mm_max_sh(a, b);
22433 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22434 assert_eq_m128h(r, e);
22435 }
22436
22437 #[simd_test(enable = "avx512fp16,avx512vl")]
22438 unsafe fn test_mm_mask_max_sh() {
22439 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22440 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22441 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22442 let r = _mm_mask_max_sh(src, 0, a, b);
22443 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22444 assert_eq_m128h(r, e);
22445 let r = _mm_mask_max_sh(src, 1, a, b);
22446 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22447 assert_eq_m128h(r, e);
22448 }
22449
22450 #[simd_test(enable = "avx512fp16,avx512vl")]
22451 unsafe fn test_mm_maskz_max_sh() {
22452 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22453 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22454 let r = _mm_maskz_max_sh(0, a, b);
22455 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22456 assert_eq_m128h(r, e);
22457 let r = _mm_maskz_max_sh(1, a, b);
22458 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22459 assert_eq_m128h(r, e);
22460 }
22461
22462 #[simd_test(enable = "avx512fp16,avx512vl")]
22463 unsafe fn test_mm_max_round_sh() {
22464 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22465 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22466 let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22467 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22468 assert_eq_m128h(r, e);
22469 }
22470
22471 #[simd_test(enable = "avx512fp16,avx512vl")]
22472 unsafe fn test_mm_mask_max_round_sh() {
22473 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22474 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22475 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22476 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22477 src, 0, a, b,
22478 );
22479 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22480 assert_eq_m128h(r, e);
22481 let r = _mm_mask_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22482 src, 1, a, b,
22483 );
22484 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22485 assert_eq_m128h(r, e);
22486 }
22487
22488 #[simd_test(enable = "avx512fp16,avx512vl")]
22489 unsafe fn test_mm_maskz_max_round_sh() {
22490 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22491 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22492 let r =
22493 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22494 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22495 assert_eq_m128h(r, e);
22496 let r =
22497 _mm_maskz_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22498 let e = _mm_setr_ph(2.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22499 assert_eq_m128h(r, e);
22500 }
22501
22502 #[simd_test(enable = "avx512fp16,avx512vl")]
22503 unsafe fn test_mm_min_ph() {
22504 let a = _mm_set1_ph(2.0);
22505 let b = _mm_set1_ph(1.0);
22506 let r = _mm_min_ph(a, b);
22507 let e = _mm_set1_ph(1.0);
22508 assert_eq_m128h(r, e);
22509 }
22510
22511 #[simd_test(enable = "avx512fp16,avx512vl")]
22512 unsafe fn test_mm_mask_min_ph() {
22513 let a = _mm_set1_ph(2.0);
22514 let b = _mm_set1_ph(1.0);
22515 let src = _mm_set1_ph(3.0);
22516 let r = _mm_mask_min_ph(src, 0b01010101, a, b);
22517 let e = _mm_set_ph(3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0);
22518 assert_eq_m128h(r, e);
22519 }
22520
22521 #[simd_test(enable = "avx512fp16,avx512vl")]
22522 unsafe fn test_mm_maskz_min_ph() {
22523 let a = _mm_set1_ph(2.0);
22524 let b = _mm_set1_ph(1.0);
22525 let r = _mm_maskz_min_ph(0b01010101, a, b);
22526 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22527 assert_eq_m128h(r, e);
22528 }
22529
22530 #[simd_test(enable = "avx512fp16,avx512vl")]
22531 unsafe fn test_mm256_min_ph() {
22532 let a = _mm256_set1_ph(2.0);
22533 let b = _mm256_set1_ph(1.0);
22534 let r = _mm256_min_ph(a, b);
22535 let e = _mm256_set1_ph(1.0);
22536 assert_eq_m256h(r, e);
22537 }
22538
22539 #[simd_test(enable = "avx512fp16,avx512vl")]
22540 unsafe fn test_mm256_mask_min_ph() {
22541 let a = _mm256_set1_ph(2.0);
22542 let b = _mm256_set1_ph(1.0);
22543 let src = _mm256_set1_ph(3.0);
22544 let r = _mm256_mask_min_ph(src, 0b0101010101010101, a, b);
22545 let e = _mm256_set_ph(
22546 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22547 );
22548 assert_eq_m256h(r, e);
22549 }
22550
22551 #[simd_test(enable = "avx512fp16,avx512vl")]
22552 unsafe fn test_mm256_maskz_min_ph() {
22553 let a = _mm256_set1_ph(2.0);
22554 let b = _mm256_set1_ph(1.0);
22555 let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
22556 let e = _mm256_set_ph(
22557 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22558 );
22559 assert_eq_m256h(r, e);
22560 }
22561
22562 #[simd_test(enable = "avx512fp16")]
22563 unsafe fn test_mm512_min_ph() {
22564 let a = _mm512_set1_ph(2.0);
22565 let b = _mm512_set1_ph(1.0);
22566 let r = _mm512_min_ph(a, b);
22567 let e = _mm512_set1_ph(1.0);
22568 assert_eq_m512h(r, e);
22569 }
22570
22571 #[simd_test(enable = "avx512fp16")]
22572 unsafe fn test_mm512_mask_min_ph() {
22573 let a = _mm512_set1_ph(2.0);
22574 let b = _mm512_set1_ph(1.0);
22575 let src = _mm512_set1_ph(3.0);
22576 let r = _mm512_mask_min_ph(src, 0b01010101010101010101010101010101, a, b);
22577 let e = _mm512_set_ph(
22578 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22579 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22580 );
22581 assert_eq_m512h(r, e);
22582 }
22583
22584 #[simd_test(enable = "avx512fp16")]
22585 unsafe fn test_mm512_maskz_min_ph() {
22586 let a = _mm512_set1_ph(2.0);
22587 let b = _mm512_set1_ph(1.0);
22588 let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
22589 let e = _mm512_set_ph(
22590 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22591 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22592 );
22593 assert_eq_m512h(r, e);
22594 }
22595
22596 #[simd_test(enable = "avx512fp16")]
22597 unsafe fn test_mm512_min_round_ph() {
22598 let a = _mm512_set1_ph(2.0);
22599 let b = _mm512_set1_ph(1.0);
22600 let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22601 let e = _mm512_set1_ph(1.0);
22602 assert_eq_m512h(r, e);
22603 }
22604
22605 #[simd_test(enable = "avx512fp16")]
22606 unsafe fn test_mm512_mask_min_round_ph() {
22607 let a = _mm512_set1_ph(2.0);
22608 let b = _mm512_set1_ph(1.0);
22609 let src = _mm512_set1_ph(3.0);
22610 let r = _mm512_mask_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22611 src,
22612 0b01010101010101010101010101010101,
22613 a,
22614 b,
22615 );
22616 let e = _mm512_set_ph(
22617 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0,
22618 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 1.0,
22619 );
22620 assert_eq_m512h(r, e);
22621 }
22622
22623 #[simd_test(enable = "avx512fp16")]
22624 unsafe fn test_mm512_maskz_min_round_ph() {
22625 let a = _mm512_set1_ph(2.0);
22626 let b = _mm512_set1_ph(1.0);
22627 let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22628 0b01010101010101010101010101010101,
22629 a,
22630 b,
22631 );
22632 let e = _mm512_set_ph(
22633 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22634 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22635 );
22636 assert_eq_m512h(r, e);
22637 }
22638
22639 #[simd_test(enable = "avx512fp16,avx512vl")]
22640 unsafe fn test_mm_min_sh() {
22641 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22642 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22643 let r = _mm_min_sh(a, b);
22644 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22645 assert_eq_m128h(r, e);
22646 }
22647
22648 #[simd_test(enable = "avx512fp16,avx512vl")]
22649 unsafe fn test_mm_mask_min_sh() {
22650 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22651 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22652 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22653 let r = _mm_mask_min_sh(src, 0, a, b);
22654 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22655 assert_eq_m128h(r, e);
22656 let r = _mm_mask_min_sh(src, 1, a, b);
22657 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22658 assert_eq_m128h(r, e);
22659 }
22660
22661 #[simd_test(enable = "avx512fp16,avx512vl")]
22662 unsafe fn test_mm_maskz_min_sh() {
22663 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22664 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22665 let r = _mm_maskz_min_sh(0, a, b);
22666 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22667 assert_eq_m128h(r, e);
22668 let r = _mm_maskz_min_sh(1, a, b);
22669 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22670 assert_eq_m128h(r, e);
22671 }
22672
22673 #[simd_test(enable = "avx512fp16,avx512vl")]
22674 unsafe fn test_mm_min_round_sh() {
22675 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22676 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22677 let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
22678 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22679 assert_eq_m128h(r, e);
22680 }
22681
22682 #[simd_test(enable = "avx512fp16,avx512vl")]
22683 unsafe fn test_mm_mask_min_round_sh() {
22684 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22685 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22686 let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
22687 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22688 src, 0, a, b,
22689 );
22690 let e = _mm_setr_ph(3.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22691 assert_eq_m128h(r, e);
22692 let r = _mm_mask_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
22693 src, 1, a, b,
22694 );
22695 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22696 assert_eq_m128h(r, e);
22697 }
22698
22699 #[simd_test(enable = "avx512fp16,avx512vl")]
22700 unsafe fn test_mm_maskz_min_round_sh() {
22701 let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22702 let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
22703 let r =
22704 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
22705 let e = _mm_setr_ph(0.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22706 assert_eq_m128h(r, e);
22707 let r =
22708 _mm_maskz_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
22709 let e = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
22710 assert_eq_m128h(r, e);
22711 }
22712
22713 #[simd_test(enable = "avx512fp16,avx512vl")]
22714 unsafe fn test_mm_getexp_ph() {
22715 let a = _mm_set1_ph(3.0);
22716 let r = _mm_getexp_ph(a);
22717 let e = _mm_set1_ph(1.0);
22718 assert_eq_m128h(r, e);
22719 }
22720
22721 #[simd_test(enable = "avx512fp16,avx512vl")]
22722 unsafe fn test_mm_mask_getexp_ph() {
22723 let a = _mm_set1_ph(3.0);
22724 let src = _mm_set1_ph(4.0);
22725 let r = _mm_mask_getexp_ph(src, 0b01010101, a);
22726 let e = _mm_set_ph(4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0);
22727 assert_eq_m128h(r, e);
22728 }
22729
22730 #[simd_test(enable = "avx512fp16,avx512vl")]
22731 unsafe fn test_mm_maskz_getexp_ph() {
22732 let a = _mm_set1_ph(3.0);
22733 let r = _mm_maskz_getexp_ph(0b01010101, a);
22734 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
22735 assert_eq_m128h(r, e);
22736 }
22737
22738 #[simd_test(enable = "avx512fp16,avx512vl")]
22739 unsafe fn test_mm256_getexp_ph() {
22740 let a = _mm256_set1_ph(3.0);
22741 let r = _mm256_getexp_ph(a);
22742 let e = _mm256_set1_ph(1.0);
22743 assert_eq_m256h(r, e);
22744 }
22745
22746 #[simd_test(enable = "avx512fp16,avx512vl")]
22747 unsafe fn test_mm256_mask_getexp_ph() {
22748 let a = _mm256_set1_ph(3.0);
22749 let src = _mm256_set1_ph(4.0);
22750 let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
22751 let e = _mm256_set_ph(
22752 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22753 );
22754 assert_eq_m256h(r, e);
22755 }
22756
22757 #[simd_test(enable = "avx512fp16,avx512vl")]
22758 unsafe fn test_mm256_maskz_getexp_ph() {
22759 let a = _mm256_set1_ph(3.0);
22760 let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
22761 let e = _mm256_set_ph(
22762 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22763 );
22764 assert_eq_m256h(r, e);
22765 }
22766
22767 #[simd_test(enable = "avx512fp16")]
22768 unsafe fn test_mm512_getexp_ph() {
22769 let a = _mm512_set1_ph(3.0);
22770 let r = _mm512_getexp_ph(a);
22771 let e = _mm512_set1_ph(1.0);
22772 assert_eq_m512h(r, e);
22773 }
22774
22775 #[simd_test(enable = "avx512fp16")]
22776 unsafe fn test_mm512_mask_getexp_ph() {
22777 let a = _mm512_set1_ph(3.0);
22778 let src = _mm512_set1_ph(4.0);
22779 let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
22780 let e = _mm512_set_ph(
22781 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22782 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22783 );
22784 assert_eq_m512h(r, e);
22785 }
22786
22787 #[simd_test(enable = "avx512fp16")]
22788 unsafe fn test_mm512_maskz_getexp_ph() {
22789 let a = _mm512_set1_ph(3.0);
22790 let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
22791 let e = _mm512_set_ph(
22792 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22793 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22794 );
22795 assert_eq_m512h(r, e);
22796 }
22797
22798 #[simd_test(enable = "avx512fp16")]
22799 unsafe fn test_mm512_getexp_round_ph() {
22800 let a = _mm512_set1_ph(3.0);
22801 let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
22802 let e = _mm512_set1_ph(1.0);
22803 assert_eq_m512h(r, e);
22804 }
22805
22806 #[simd_test(enable = "avx512fp16")]
22807 unsafe fn test_mm512_mask_getexp_round_ph() {
22808 let a = _mm512_set1_ph(3.0);
22809 let src = _mm512_set1_ph(4.0);
22810 let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22811 src,
22812 0b01010101010101010101010101010101,
22813 a,
22814 );
22815 let e = _mm512_set_ph(
22816 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0,
22817 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0, 1.0,
22818 );
22819 assert_eq_m512h(r, e);
22820 }
22821
22822 #[simd_test(enable = "avx512fp16")]
22823 unsafe fn test_mm512_maskz_getexp_round_ph() {
22824 let a = _mm512_set1_ph(3.0);
22825 let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
22826 0b01010101010101010101010101010101,
22827 a,
22828 );
22829 let e = _mm512_set_ph(
22830 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
22831 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
22832 );
22833 assert_eq_m512h(r, e);
22834 }
22835
22836 #[simd_test(enable = "avx512fp16,avx512vl")]
22837 unsafe fn test_mm_getexp_sh() {
22838 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22839 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22840 let r = _mm_getexp_sh(a, b);
22841 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22842 assert_eq_m128h(r, e);
22843 }
22844
22845 #[simd_test(enable = "avx512fp16,avx512vl")]
22846 unsafe fn test_mm_mask_getexp_sh() {
22847 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22848 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22849 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22850 let r = _mm_mask_getexp_sh(src, 0, a, b);
22851 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22852 assert_eq_m128h(r, e);
22853 let r = _mm_mask_getexp_sh(src, 1, a, b);
22854 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22855 assert_eq_m128h(r, e);
22856 }
22857
22858 #[simd_test(enable = "avx512fp16,avx512vl")]
22859 unsafe fn test_mm_maskz_getexp_sh() {
22860 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22861 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22862 let r = _mm_maskz_getexp_sh(0, a, b);
22863 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22864 assert_eq_m128h(r, e);
22865 let r = _mm_maskz_getexp_sh(1, a, b);
22866 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22867 assert_eq_m128h(r, e);
22868 }
22869
22870 #[simd_test(enable = "avx512fp16,avx512vl")]
22871 unsafe fn test_mm_getexp_round_sh() {
22872 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22873 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22874 let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
22875 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22876 assert_eq_m128h(r, e);
22877 }
22878
22879 #[simd_test(enable = "avx512fp16,avx512vl")]
22880 unsafe fn test_mm_mask_getexp_round_sh() {
22881 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22882 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22883 let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
22884 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 0, a, b);
22885 let e = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22886 assert_eq_m128h(r, e);
22887 let r = _mm_mask_getexp_round_sh::<_MM_FROUND_NO_EXC>(src, 1, a, b);
22888 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22889 assert_eq_m128h(r, e);
22890 }
22891
22892 #[simd_test(enable = "avx512fp16,avx512vl")]
22893 unsafe fn test_mm_maskz_getexp_round_sh() {
22894 let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
22895 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
22896 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
22897 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
22898 assert_eq_m128h(r, e);
22899 let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(1, a, b);
22900 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
22901 assert_eq_m128h(r, e);
22902 }
22903
22904 #[simd_test(enable = "avx512fp16,avx512vl")]
22905 unsafe fn test_mm_getmant_ph() {
22906 let a = _mm_set1_ph(10.0);
22907 let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22908 let e = _mm_set1_ph(1.25);
22909 assert_eq_m128h(r, e);
22910 }
22911
22912 #[simd_test(enable = "avx512fp16,avx512vl")]
22913 unsafe fn test_mm_mask_getmant_ph() {
22914 let a = _mm_set1_ph(10.0);
22915 let src = _mm_set1_ph(20.0);
22916 let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
22917 let e = _mm_set_ph(20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25);
22918 assert_eq_m128h(r, e);
22919 }
22920
22921 #[simd_test(enable = "avx512fp16,avx512vl")]
22922 unsafe fn test_mm_maskz_getmant_ph() {
22923 let a = _mm_set1_ph(10.0);
22924 let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
22925 let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
22926 assert_eq_m128h(r, e);
22927 }
22928
22929 #[simd_test(enable = "avx512fp16,avx512vl")]
22930 unsafe fn test_mm256_getmant_ph() {
22931 let a = _mm256_set1_ph(10.0);
22932 let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22933 let e = _mm256_set1_ph(1.25);
22934 assert_eq_m256h(r, e);
22935 }
22936
22937 #[simd_test(enable = "avx512fp16,avx512vl")]
22938 unsafe fn test_mm256_mask_getmant_ph() {
22939 let a = _mm256_set1_ph(10.0);
22940 let src = _mm256_set1_ph(20.0);
22941 let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22942 src,
22943 0b0101010101010101,
22944 a,
22945 );
22946 let e = _mm256_set_ph(
22947 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22948 20.0, 1.25,
22949 );
22950 assert_eq_m256h(r, e);
22951 }
22952
22953 #[simd_test(enable = "avx512fp16,avx512vl")]
22954 unsafe fn test_mm256_maskz_getmant_ph() {
22955 let a = _mm256_set1_ph(10.0);
22956 let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22957 0b0101010101010101,
22958 a,
22959 );
22960 let e = _mm256_set_ph(
22961 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
22962 );
22963 assert_eq_m256h(r, e);
22964 }
22965
22966 #[simd_test(enable = "avx512fp16")]
22967 unsafe fn test_mm512_getmant_ph() {
22968 let a = _mm512_set1_ph(10.0);
22969 let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
22970 let e = _mm512_set1_ph(1.25);
22971 assert_eq_m512h(r, e);
22972 }
22973
22974 #[simd_test(enable = "avx512fp16")]
22975 unsafe fn test_mm512_mask_getmant_ph() {
22976 let a = _mm512_set1_ph(10.0);
22977 let src = _mm512_set1_ph(20.0);
22978 let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22979 src,
22980 0b01010101010101010101010101010101,
22981 a,
22982 );
22983 let e = _mm512_set_ph(
22984 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22985 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
22986 20.0, 1.25, 20.0, 1.25,
22987 );
22988 assert_eq_m512h(r, e);
22989 }
22990
22991 #[simd_test(enable = "avx512fp16")]
22992 unsafe fn test_mm512_maskz_getmant_ph() {
22993 let a = _mm512_set1_ph(10.0);
22994 let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
22995 0b01010101010101010101010101010101,
22996 a,
22997 );
22998 let e = _mm512_set_ph(
22999 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23000 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23001 );
23002 assert_eq_m512h(r, e);
23003 }
23004
23005 #[simd_test(enable = "avx512fp16")]
23006 unsafe fn test_mm512_getmant_round_ph() {
23007 let a = _mm512_set1_ph(10.0);
23008 let r =
23009 _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23010 a,
23011 );
23012 let e = _mm512_set1_ph(1.25);
23013 assert_eq_m512h(r, e);
23014 }
23015
23016 #[simd_test(enable = "avx512fp16")]
23017 unsafe fn test_mm512_mask_getmant_round_ph() {
23018 let a = _mm512_set1_ph(10.0);
23019 let src = _mm512_set1_ph(20.0);
23020 let r = _mm512_mask_getmant_round_ph::<
23021 _MM_MANT_NORM_P75_1P5,
23022 _MM_MANT_SIGN_NAN,
23023 _MM_FROUND_NO_EXC,
23024 >(src, 0b01010101010101010101010101010101, a);
23025 let e = _mm512_set_ph(
23026 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23027 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25, 20.0, 1.25,
23028 20.0, 1.25, 20.0, 1.25,
23029 );
23030 assert_eq_m512h(r, e);
23031 }
23032
23033 #[simd_test(enable = "avx512fp16")]
23034 unsafe fn test_mm512_maskz_getmant_round_ph() {
23035 let a = _mm512_set1_ph(10.0);
23036 let r = _mm512_maskz_getmant_round_ph::<
23037 _MM_MANT_NORM_P75_1P5,
23038 _MM_MANT_SIGN_NAN,
23039 _MM_FROUND_NO_EXC,
23040 >(0b01010101010101010101010101010101, a);
23041 let e = _mm512_set_ph(
23042 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23043 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25,
23044 );
23045 assert_eq_m512h(r, e);
23046 }
23047
23048 #[simd_test(enable = "avx512fp16,avx512vl")]
23049 unsafe fn test_mm_getmant_sh() {
23050 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23051 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23052 let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
23053 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23054 assert_eq_m128h(r, e);
23055 }
23056
23057 #[simd_test(enable = "avx512fp16,avx512vl")]
23058 unsafe fn test_mm_mask_getmant_sh() {
23059 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23060 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23061 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23062 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0, a, b);
23063 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23064 assert_eq_m128h(r, e);
23065 let r = _mm_mask_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 1, a, b);
23066 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23067 assert_eq_m128h(r, e);
23068 }
23069
23070 #[simd_test(enable = "avx512fp16,avx512vl")]
23071 unsafe fn test_mm_maskz_getmant_sh() {
23072 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23073 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23074 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
23075 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23076 assert_eq_m128h(r, e);
23077 let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(1, a, b);
23078 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23079 assert_eq_m128h(r, e);
23080 }
23081
23082 #[simd_test(enable = "avx512fp16,avx512vl")]
23083 unsafe fn test_mm_getmant_round_sh() {
23084 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23085 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23086 let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
23087 a, b,
23088 );
23089 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23090 assert_eq_m128h(r, e);
23091 }
23092
23093 #[simd_test(enable = "avx512fp16,avx512vl")]
23094 unsafe fn test_mm_mask_getmant_round_sh() {
23095 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23096 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23097 let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
23098 let r = _mm_mask_getmant_round_sh::<
23099 _MM_MANT_NORM_P75_1P5,
23100 _MM_MANT_SIGN_NAN,
23101 _MM_FROUND_NO_EXC,
23102 >(src, 0, a, b);
23103 let e = _mm_setr_ph(20.0, 10., 11., 12., 13., 14., 15., 16.);
23104 assert_eq_m128h(r, e);
23105 let r = _mm_mask_getmant_round_sh::<
23106 _MM_MANT_NORM_P75_1P5,
23107 _MM_MANT_SIGN_NAN,
23108 _MM_FROUND_NO_EXC,
23109 >(src, 1, a, b);
23110 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23111 assert_eq_m128h(r, e);
23112 }
23113
23114 #[simd_test(enable = "avx512fp16,avx512vl")]
23115 unsafe fn test_mm_maskz_getmant_round_sh() {
23116 let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
23117 let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
23118 let r = _mm_maskz_getmant_round_sh::<
23119 _MM_MANT_NORM_P75_1P5,
23120 _MM_MANT_SIGN_NAN,
23121 _MM_FROUND_NO_EXC,
23122 >(0, a, b);
23123 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23124 assert_eq_m128h(r, e);
23125 let r = _mm_maskz_getmant_round_sh::<
23126 _MM_MANT_NORM_P75_1P5,
23127 _MM_MANT_SIGN_NAN,
23128 _MM_FROUND_NO_EXC,
23129 >(1, a, b);
23130 let e = _mm_setr_ph(1.25, 10., 11., 12., 13., 14., 15., 16.);
23131 assert_eq_m128h(r, e);
23132 }
23133
23134 #[simd_test(enable = "avx512fp16,avx512vl")]
23135 unsafe fn test_mm_roundscale_ph() {
23136 let a = _mm_set1_ph(1.1);
23137 let r = _mm_roundscale_ph::<0>(a);
23138 let e = _mm_set1_ph(1.0);
23139 assert_eq_m128h(r, e);
23140 }
23141
23142 #[simd_test(enable = "avx512fp16,avx512vl")]
23143 unsafe fn test_mm_mask_roundscale_ph() {
23144 let a = _mm_set1_ph(1.1);
23145 let src = _mm_set1_ph(2.0);
23146 let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
23147 let e = _mm_set_ph(2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0);
23148 assert_eq_m128h(r, e);
23149 }
23150
23151 #[simd_test(enable = "avx512fp16,avx512vl")]
23152 unsafe fn test_mm_maskz_roundscale_ph() {
23153 let a = _mm_set1_ph(1.1);
23154 let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
23155 let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
23156 assert_eq_m128h(r, e);
23157 }
23158
23159 #[simd_test(enable = "avx512fp16,avx512vl")]
23160 unsafe fn test_mm256_roundscale_ph() {
23161 let a = _mm256_set1_ph(1.1);
23162 let r = _mm256_roundscale_ph::<0>(a);
23163 let e = _mm256_set1_ph(1.0);
23164 assert_eq_m256h(r, e);
23165 }
23166
23167 #[simd_test(enable = "avx512fp16,avx512vl")]
23168 unsafe fn test_mm256_mask_roundscale_ph() {
23169 let a = _mm256_set1_ph(1.1);
23170 let src = _mm256_set1_ph(2.0);
23171 let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
23172 let e = _mm256_set_ph(
23173 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23174 );
23175 assert_eq_m256h(r, e);
23176 }
23177
23178 #[simd_test(enable = "avx512fp16,avx512vl")]
23179 unsafe fn test_mm256_maskz_roundscale_ph() {
23180 let a = _mm256_set1_ph(1.1);
23181 let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
23182 let e = _mm256_set_ph(
23183 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23184 );
23185 assert_eq_m256h(r, e);
23186 }
23187
23188 #[simd_test(enable = "avx512fp16")]
23189 unsafe fn test_mm512_roundscale_ph() {
23190 let a = _mm512_set1_ph(1.1);
23191 let r = _mm512_roundscale_ph::<0>(a);
23192 let e = _mm512_set1_ph(1.0);
23193 assert_eq_m512h(r, e);
23194 }
23195
23196 #[simd_test(enable = "avx512fp16")]
23197 unsafe fn test_mm512_mask_roundscale_ph() {
23198 let a = _mm512_set1_ph(1.1);
23199 let src = _mm512_set1_ph(2.0);
23200 let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
23201 let e = _mm512_set_ph(
23202 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23203 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23204 );
23205 assert_eq_m512h(r, e);
23206 }
23207
23208 #[simd_test(enable = "avx512fp16")]
23209 unsafe fn test_mm512_maskz_roundscale_ph() {
23210 let a = _mm512_set1_ph(1.1);
23211 let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
23212 let e = _mm512_set_ph(
23213 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23214 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23215 );
23216 assert_eq_m512h(r, e);
23217 }
23218
23219 #[simd_test(enable = "avx512fp16")]
23220 unsafe fn test_mm512_roundscale_round_ph() {
23221 let a = _mm512_set1_ph(1.1);
23222 let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
23223 let e = _mm512_set1_ph(1.0);
23224 assert_eq_m512h(r, e);
23225 }
23226
23227 #[simd_test(enable = "avx512fp16")]
23228 unsafe fn test_mm512_mask_roundscale_round_ph() {
23229 let a = _mm512_set1_ph(1.1);
23230 let src = _mm512_set1_ph(2.0);
23231 let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23232 src,
23233 0b01010101010101010101010101010101,
23234 a,
23235 );
23236 let e = _mm512_set_ph(
23237 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0,
23238 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0,
23239 );
23240 assert_eq_m512h(r, e);
23241 }
23242
23243 #[simd_test(enable = "avx512fp16")]
23244 unsafe fn test_mm512_maskz_roundscale_round_ph() {
23245 let a = _mm512_set1_ph(1.1);
23246 let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
23247 0b01010101010101010101010101010101,
23248 a,
23249 );
23250 let e = _mm512_set_ph(
23251 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0,
23252 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0,
23253 );
23254 assert_eq_m512h(r, e);
23255 }
23256
23257 #[simd_test(enable = "avx512fp16,avx512vl")]
23258 unsafe fn test_mm_roundscale_sh() {
23259 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23260 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23261 let r = _mm_roundscale_sh::<0>(a, b);
23262 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23263 assert_eq_m128h(r, e);
23264 }
23265
23266 #[simd_test(enable = "avx512fp16,avx512vl")]
23267 unsafe fn test_mm_mask_roundscale_sh() {
23268 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23269 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23270 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23271 let r = _mm_mask_roundscale_sh::<0>(src, 0, a, b);
23272 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23273 assert_eq_m128h(r, e);
23274 let r = _mm_mask_roundscale_sh::<0>(src, 1, a, b);
23275 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23276 assert_eq_m128h(r, e);
23277 }
23278
23279 #[simd_test(enable = "avx512fp16,avx512vl")]
23280 unsafe fn test_mm_maskz_roundscale_sh() {
23281 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23282 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23283 let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
23284 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23285 assert_eq_m128h(r, e);
23286 let r = _mm_maskz_roundscale_sh::<0>(1, a, b);
23287 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23288 assert_eq_m128h(r, e);
23289 }
23290
23291 #[simd_test(enable = "avx512fp16,avx512vl")]
23292 unsafe fn test_mm_roundscale_round_sh() {
23293 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23294 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23295 let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
23296 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23297 assert_eq_m128h(r, e);
23298 }
23299
23300 #[simd_test(enable = "avx512fp16,avx512vl")]
23301 unsafe fn test_mm_mask_roundscale_round_sh() {
23302 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23303 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23304 let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
23305 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 0, a, b);
23306 let e = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23307 assert_eq_m128h(r, e);
23308 let r = _mm_mask_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(src, 1, a, b);
23309 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23310 assert_eq_m128h(r, e);
23311 }
23312
23313 #[simd_test(enable = "avx512fp16,avx512vl")]
23314 unsafe fn test_mm_maskz_roundscale_round_sh() {
23315 let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23316 let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
23317 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
23318 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23319 assert_eq_m128h(r, e);
23320 let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(1, a, b);
23321 let e = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23322 assert_eq_m128h(r, e);
23323 }
23324
23325 #[simd_test(enable = "avx512fp16,avx512vl")]
23326 unsafe fn test_mm_scalef_ph() {
23327 let a = _mm_set1_ph(1.);
23328 let b = _mm_set1_ph(3.);
23329 let r = _mm_scalef_ph(a, b);
23330 let e = _mm_set1_ph(8.0);
23331 assert_eq_m128h(r, e);
23332 }
23333
23334 #[simd_test(enable = "avx512fp16,avx512vl")]
23335 unsafe fn test_mm_mask_scalef_ph() {
23336 let a = _mm_set1_ph(1.);
23337 let b = _mm_set1_ph(3.);
23338 let src = _mm_set1_ph(2.);
23339 let r = _mm_mask_scalef_ph(src, 0b01010101, a, b);
23340 let e = _mm_set_ph(2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0);
23341 assert_eq_m128h(r, e);
23342 }
23343
23344 #[simd_test(enable = "avx512fp16,avx512vl")]
23345 unsafe fn test_mm_maskz_scalef_ph() {
23346 let a = _mm_set1_ph(1.);
23347 let b = _mm_set1_ph(3.);
23348 let r = _mm_maskz_scalef_ph(0b01010101, a, b);
23349 let e = _mm_set_ph(0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0);
23350 assert_eq_m128h(r, e);
23351 }
23352
23353 #[simd_test(enable = "avx512fp16,avx512vl")]
23354 unsafe fn test_mm256_scalef_ph() {
23355 let a = _mm256_set1_ph(1.);
23356 let b = _mm256_set1_ph(3.);
23357 let r = _mm256_scalef_ph(a, b);
23358 let e = _mm256_set1_ph(8.0);
23359 assert_eq_m256h(r, e);
23360 }
23361
23362 #[simd_test(enable = "avx512fp16,avx512vl")]
23363 unsafe fn test_mm256_mask_scalef_ph() {
23364 let a = _mm256_set1_ph(1.);
23365 let b = _mm256_set1_ph(3.);
23366 let src = _mm256_set1_ph(2.);
23367 let r = _mm256_mask_scalef_ph(src, 0b0101010101010101, a, b);
23368 let e = _mm256_set_ph(
23369 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23370 );
23371 assert_eq_m256h(r, e);
23372 }
23373
23374 #[simd_test(enable = "avx512fp16,avx512vl")]
23375 unsafe fn test_mm256_maskz_scalef_ph() {
23376 let a = _mm256_set1_ph(1.);
23377 let b = _mm256_set1_ph(3.);
23378 let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
23379 let e = _mm256_set_ph(
23380 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23381 );
23382 assert_eq_m256h(r, e);
23383 }
23384
23385 #[simd_test(enable = "avx512fp16")]
23386 unsafe fn test_mm512_scalef_ph() {
23387 let a = _mm512_set1_ph(1.);
23388 let b = _mm512_set1_ph(3.);
23389 let r = _mm512_scalef_ph(a, b);
23390 let e = _mm512_set1_ph(8.0);
23391 assert_eq_m512h(r, e);
23392 }
23393
23394 #[simd_test(enable = "avx512fp16")]
23395 unsafe fn test_mm512_mask_scalef_ph() {
23396 let a = _mm512_set1_ph(1.);
23397 let b = _mm512_set1_ph(3.);
23398 let src = _mm512_set1_ph(2.);
23399 let r = _mm512_mask_scalef_ph(src, 0b01010101010101010101010101010101, a, b);
23400 let e = _mm512_set_ph(
23401 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23402 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23403 );
23404 assert_eq_m512h(r, e);
23405 }
23406
23407 #[simd_test(enable = "avx512fp16")]
23408 unsafe fn test_mm512_maskz_scalef_ph() {
23409 let a = _mm512_set1_ph(1.);
23410 let b = _mm512_set1_ph(3.);
23411 let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
23412 let e = _mm512_set_ph(
23413 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23414 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23415 );
23416 assert_eq_m512h(r, e);
23417 }
23418
23419 #[simd_test(enable = "avx512fp16")]
23420 unsafe fn test_mm512_scalef_round_ph() {
23421 let a = _mm512_set1_ph(1.);
23422 let b = _mm512_set1_ph(3.);
23423 let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23424 let e = _mm512_set1_ph(8.0);
23425 assert_eq_m512h(r, e);
23426 }
23427
23428 #[simd_test(enable = "avx512fp16")]
23429 unsafe fn test_mm512_mask_scalef_round_ph() {
23430 let a = _mm512_set1_ph(1.);
23431 let b = _mm512_set1_ph(3.);
23432 let src = _mm512_set1_ph(2.);
23433 let r = _mm512_mask_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23434 src,
23435 0b01010101010101010101010101010101,
23436 a,
23437 b,
23438 );
23439 let e = _mm512_set_ph(
23440 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0,
23441 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0, 2.0, 8.0,
23442 );
23443 assert_eq_m512h(r, e);
23444 }
23445
23446 #[simd_test(enable = "avx512fp16")]
23447 unsafe fn test_mm512_maskz_scalef_round_ph() {
23448 let a = _mm512_set1_ph(1.);
23449 let b = _mm512_set1_ph(3.);
23450 let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23451 0b01010101010101010101010101010101,
23452 a,
23453 b,
23454 );
23455 let e = _mm512_set_ph(
23456 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0,
23457 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0, 0.0, 8.0,
23458 );
23459 assert_eq_m512h(r, e);
23460 }
23461
23462 #[simd_test(enable = "avx512fp16,avx512vl")]
23463 unsafe fn test_mm_scalef_sh() {
23464 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23465 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23466 let r = _mm_scalef_sh(a, b);
23467 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23468 assert_eq_m128h(r, e);
23469 }
23470
23471 #[simd_test(enable = "avx512fp16,avx512vl")]
23472 unsafe fn test_mm_mask_scalef_sh() {
23473 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23474 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23475 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23476 let r = _mm_mask_scalef_sh(src, 0, a, b);
23477 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23478 assert_eq_m128h(r, e);
23479 let r = _mm_mask_scalef_sh(src, 1, a, b);
23480 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23481 assert_eq_m128h(r, e);
23482 }
23483
23484 #[simd_test(enable = "avx512fp16,avx512vl")]
23485 unsafe fn test_mm_maskz_scalef_sh() {
23486 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23487 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23488 let r = _mm_maskz_scalef_sh(0, a, b);
23489 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23490 assert_eq_m128h(r, e);
23491 let r = _mm_maskz_scalef_sh(1, a, b);
23492 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23493 assert_eq_m128h(r, e);
23494 }
23495
23496 #[simd_test(enable = "avx512fp16,avx512vl")]
23497 unsafe fn test_mm_scalef_round_sh() {
23498 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23499 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23500 let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
23501 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23502 assert_eq_m128h(r, e);
23503 }
23504
23505 #[simd_test(enable = "avx512fp16,avx512vl")]
23506 unsafe fn test_mm_mask_scalef_round_sh() {
23507 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23508 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23509 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23510 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23511 src, 0, a, b,
23512 );
23513 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23514 assert_eq_m128h(r, e);
23515 let r = _mm_mask_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
23516 src, 1, a, b,
23517 );
23518 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23519 assert_eq_m128h(r, e);
23520 }
23521
23522 #[simd_test(enable = "avx512fp16,avx512vl")]
23523 unsafe fn test_mm_maskz_scalef_round_sh() {
23524 let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
23525 let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
23526 let r =
23527 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
23528 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23529 assert_eq_m128h(r, e);
23530 let r =
23531 _mm_maskz_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
23532 let e = _mm_setr_ph(8.0, 10., 11., 12., 13., 14., 15., 16.);
23533 assert_eq_m128h(r, e);
23534 }
23535
23536 #[simd_test(enable = "avx512fp16,avx512vl")]
23537 unsafe fn test_mm_reduce_ph() {
23538 let a = _mm_set1_ph(1.25);
23539 let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23540 let e = _mm_set1_ph(0.25);
23541 assert_eq_m128h(r, e);
23542 }
23543
23544 #[simd_test(enable = "avx512fp16,avx512vl")]
23545 unsafe fn test_mm_mask_reduce_ph() {
23546 let a = _mm_set1_ph(1.25);
23547 let src = _mm_set1_ph(2.0);
23548 let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
23549 let e = _mm_set_ph(2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25);
23550 assert_eq_m128h(r, e);
23551 }
23552
23553 #[simd_test(enable = "avx512fp16,avx512vl")]
23554 unsafe fn test_mm_maskz_reduce_ph() {
23555 let a = _mm_set1_ph(1.25);
23556 let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
23557 let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
23558 assert_eq_m128h(r, e);
23559 }
23560
23561 #[simd_test(enable = "avx512fp16,avx512vl")]
23562 unsafe fn test_mm256_reduce_ph() {
23563 let a = _mm256_set1_ph(1.25);
23564 let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23565 let e = _mm256_set1_ph(0.25);
23566 assert_eq_m256h(r, e);
23567 }
23568
23569 #[simd_test(enable = "avx512fp16,avx512vl")]
23570 unsafe fn test_mm256_mask_reduce_ph() {
23571 let a = _mm256_set1_ph(1.25);
23572 let src = _mm256_set1_ph(2.0);
23573 let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
23574 let e = _mm256_set_ph(
23575 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23576 );
23577 assert_eq_m256h(r, e);
23578 }
23579
23580 #[simd_test(enable = "avx512fp16,avx512vl")]
23581 unsafe fn test_mm256_maskz_reduce_ph() {
23582 let a = _mm256_set1_ph(1.25);
23583 let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
23584 let e = _mm256_set_ph(
23585 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23586 );
23587 assert_eq_m256h(r, e);
23588 }
23589
23590 #[simd_test(enable = "avx512fp16")]
23591 unsafe fn test_mm512_reduce_ph() {
23592 let a = _mm512_set1_ph(1.25);
23593 let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
23594 let e = _mm512_set1_ph(0.25);
23595 assert_eq_m512h(r, e);
23596 }
23597
23598 #[simd_test(enable = "avx512fp16")]
23599 unsafe fn test_mm512_mask_reduce_ph() {
23600 let a = _mm512_set1_ph(1.25);
23601 let src = _mm512_set1_ph(2.0);
23602 let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23603 src,
23604 0b01010101010101010101010101010101,
23605 a,
23606 );
23607 let e = _mm512_set_ph(
23608 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23609 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23610 );
23611 assert_eq_m512h(r, e);
23612 }
23613
23614 #[simd_test(enable = "avx512fp16")]
23615 unsafe fn test_mm512_maskz_reduce_ph() {
23616 let a = _mm512_set1_ph(1.25);
23617 let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
23618 0b01010101010101010101010101010101,
23619 a,
23620 );
23621 let e = _mm512_set_ph(
23622 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23623 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23624 );
23625 assert_eq_m512h(r, e);
23626 }
23627
23628 #[simd_test(enable = "avx512fp16")]
23629 unsafe fn test_mm512_reduce_round_ph() {
23630 let a = _mm512_set1_ph(1.25);
23631 let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
23632 let e = _mm512_set1_ph(0.25);
23633 assert_eq_m512h(r, e);
23634 }
23635
23636 #[simd_test(enable = "avx512fp16")]
23637 unsafe fn test_mm512_mask_reduce_round_ph() {
23638 let a = _mm512_set1_ph(1.25);
23639 let src = _mm512_set1_ph(2.0);
23640 let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23641 src,
23642 0b01010101010101010101010101010101,
23643 a,
23644 );
23645 let e = _mm512_set_ph(
23646 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23647 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25, 2.0, 0.25,
23648 );
23649 assert_eq_m512h(r, e);
23650 }
23651
23652 #[simd_test(enable = "avx512fp16")]
23653 unsafe fn test_mm512_maskz_reduce_round_ph() {
23654 let a = _mm512_set1_ph(1.25);
23655 let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23656 0b01010101010101010101010101010101,
23657 a,
23658 );
23659 let e = _mm512_set_ph(
23660 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23661 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25,
23662 );
23663 assert_eq_m512h(r, e);
23664 }
23665
23666 #[simd_test(enable = "avx512fp16,avx512vl")]
23667 unsafe fn test_mm_reduce_sh() {
23668 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23669 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23670 let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
23671 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23672 assert_eq_m128h(r, e);
23673 }
23674
23675 #[simd_test(enable = "avx512fp16,avx512vl")]
23676 unsafe fn test_mm_mask_reduce_sh() {
23677 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23678 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23679 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23680 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0, a, b);
23681 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23682 assert_eq_m128h(r, e);
23683 let r = _mm_mask_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 1, a, b);
23684 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23685 assert_eq_m128h(r, e);
23686 }
23687
23688 #[simd_test(enable = "avx512fp16,avx512vl")]
23689 unsafe fn test_mm_maskz_reduce_sh() {
23690 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23691 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23692 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
23693 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23694 assert_eq_m128h(r, e);
23695 let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(1, a, b);
23696 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23697 assert_eq_m128h(r, e);
23698 }
23699
23700 #[simd_test(enable = "avx512fp16,avx512vl")]
23701 unsafe fn test_mm_reduce_round_sh() {
23702 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23703 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23704 let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
23705 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23706 assert_eq_m128h(r, e);
23707 }
23708
23709 #[simd_test(enable = "avx512fp16,avx512vl")]
23710 unsafe fn test_mm_mask_reduce_round_sh() {
23711 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23712 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23713 let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
23714 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23715 src, 0, a, b,
23716 );
23717 let e = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
23718 assert_eq_m128h(r, e);
23719 let r = _mm_mask_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
23720 src, 1, a, b,
23721 );
23722 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23723 assert_eq_m128h(r, e);
23724 }
23725
23726 #[simd_test(enable = "avx512fp16,avx512vl")]
23727 unsafe fn test_mm_maskz_reduce_round_sh() {
23728 let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
23729 let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
23730 let r =
23731 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0, a, b);
23732 let e = _mm_setr_ph(0.0, 10., 11., 12., 13., 14., 15., 16.);
23733 assert_eq_m128h(r, e);
23734 let r =
23735 _mm_maskz_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(1, a, b);
23736 let e = _mm_setr_ph(0.25, 10., 11., 12., 13., 14., 15., 16.);
23737 assert_eq_m128h(r, e);
23738 }
23739
23740 #[simd_test(enable = "avx512fp16,avx512vl")]
23741 unsafe fn test_mm_reduce_add_ph() {
23742 let a = _mm_set1_ph(2.0);
23743 let r = _mm_reduce_add_ph(a);
23744 assert_eq!(r, 16.0);
23745 }
23746
23747 #[simd_test(enable = "avx512fp16,avx512vl")]
23748 unsafe fn test_mm256_reduce_add_ph() {
23749 let a = _mm256_set1_ph(2.0);
23750 let r = _mm256_reduce_add_ph(a);
23751 assert_eq!(r, 32.0);
23752 }
23753
23754 #[simd_test(enable = "avx512fp16")]
23755 unsafe fn test_mm512_reduce_add_ph() {
23756 let a = _mm512_set1_ph(2.0);
23757 let r = _mm512_reduce_add_ph(a);
23758 assert_eq!(r, 64.0);
23759 }
23760
23761 #[simd_test(enable = "avx512fp16,avx512vl")]
23762 unsafe fn test_mm_reduce_mul_ph() {
23763 let a = _mm_set1_ph(2.0);
23764 let r = _mm_reduce_mul_ph(a);
23765 assert_eq!(r, 256.0);
23766 }
23767
23768 #[simd_test(enable = "avx512fp16,avx512vl")]
23769 unsafe fn test_mm256_reduce_mul_ph() {
23770 let a = _mm256_set1_ph(2.0);
23771 let r = _mm256_reduce_mul_ph(a);
23772 assert_eq!(r, 65536.0);
23773 }
23774
23775 #[simd_test(enable = "avx512fp16")]
23776 unsafe fn test_mm512_reduce_mul_ph() {
23777 let a = _mm512_set1_ph(2.0);
23778 let r = _mm512_reduce_mul_ph(a);
23779 assert_eq!(r, 16777216.0);
23780 }
23781
23782 #[simd_test(enable = "avx512fp16,avx512vl")]
23783 unsafe fn test_mm_reduce_max_ph() {
23784 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23785 let r = _mm_reduce_max_ph(a);
23786 assert_eq!(r, 8.0);
23787 }
23788
23789 #[simd_test(enable = "avx512fp16,avx512vl")]
23790 unsafe fn test_mm256_reduce_max_ph() {
23791 let a = _mm256_set_ph(
23792 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23793 );
23794 let r = _mm256_reduce_max_ph(a);
23795 assert_eq!(r, 16.0);
23796 }
23797
23798 #[simd_test(enable = "avx512fp16")]
23799 unsafe fn test_mm512_reduce_max_ph() {
23800 let a = _mm512_set_ph(
23801 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23802 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23803 31.0, 32.0,
23804 );
23805 let r = _mm512_reduce_max_ph(a);
23806 assert_eq!(r, 32.0);
23807 }
23808
23809 #[simd_test(enable = "avx512fp16,avx512vl")]
23810 unsafe fn test_mm_reduce_min_ph() {
23811 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
23812 let r = _mm_reduce_min_ph(a);
23813 assert_eq!(r, 1.0);
23814 }
23815
23816 #[simd_test(enable = "avx512fp16,avx512vl")]
23817 unsafe fn test_mm256_reduce_min_ph() {
23818 let a = _mm256_set_ph(
23819 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23820 );
23821 let r = _mm256_reduce_min_ph(a);
23822 assert_eq!(r, 1.0);
23823 }
23824
23825 #[simd_test(enable = "avx512fp16")]
23826 unsafe fn test_mm512_reduce_min_ph() {
23827 let a = _mm512_set_ph(
23828 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
23829 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
23830 31.0, 32.0,
23831 );
23832 let r = _mm512_reduce_min_ph(a);
23833 assert_eq!(r, 1.0);
23834 }
23835
23836 #[simd_test(enable = "avx512fp16,avx512vl")]
23837 unsafe fn test_mm_fpclass_ph_mask() {
23838 let a = _mm_set_ph(
23839 1.,
23840 f16::INFINITY,
23841 f16::NEG_INFINITY,
23842 0.0,
23843 -0.0,
23844 -2.0,
23845 f16::NAN,
23846 5.9e-8, // Denormal
23847 );
23848 let r = _mm_fpclass_ph_mask::<0x18>(a); // infinities
23849 assert_eq!(r, 0b01100000);
23850 }
23851
23852 #[simd_test(enable = "avx512fp16,avx512vl")]
23853 unsafe fn test_mm_mask_fpclass_ph_mask() {
23854 let a = _mm_set_ph(
23855 1.,
23856 f16::INFINITY,
23857 f16::NEG_INFINITY,
23858 0.0,
23859 -0.0,
23860 -2.0,
23861 f16::NAN,
23862 5.9e-8, // Denormal
23863 );
23864 let r = _mm_mask_fpclass_ph_mask::<0x18>(0b01010101, a);
23865 assert_eq!(r, 0b01000000);
23866 }
23867
23868 #[simd_test(enable = "avx512fp16,avx512vl")]
23869 unsafe fn test_mm256_fpclass_ph_mask() {
23870 let a = _mm256_set_ph(
23871 1.,
23872 f16::INFINITY,
23873 f16::NEG_INFINITY,
23874 0.0,
23875 -0.0,
23876 -2.0,
23877 f16::NAN,
23878 5.9e-8, // Denormal
23879 1.,
23880 f16::INFINITY,
23881 f16::NEG_INFINITY,
23882 0.0,
23883 -0.0,
23884 -2.0,
23885 f16::NAN,
23886 5.9e-8, // Denormal
23887 );
23888 let r = _mm256_fpclass_ph_mask::<0x18>(a); // infinities
23889 assert_eq!(r, 0b0110000001100000);
23890 }
23891
23892 #[simd_test(enable = "avx512fp16,avx512vl")]
23893 unsafe fn test_mm256_mask_fpclass_ph_mask() {
23894 let a = _mm256_set_ph(
23895 1.,
23896 f16::INFINITY,
23897 f16::NEG_INFINITY,
23898 0.0,
23899 -0.0,
23900 -2.0,
23901 f16::NAN,
23902 5.9e-8, // Denormal
23903 1.,
23904 f16::INFINITY,
23905 f16::NEG_INFINITY,
23906 0.0,
23907 -0.0,
23908 -2.0,
23909 f16::NAN,
23910 5.9e-8, // Denormal
23911 );
23912 let r = _mm256_mask_fpclass_ph_mask::<0x18>(0b0101010101010101, a);
23913 assert_eq!(r, 0b0100000001000000);
23914 }
23915
23916 #[simd_test(enable = "avx512fp16")]
23917 unsafe fn test_mm512_fpclass_ph_mask() {
23918 let a = _mm512_set_ph(
23919 1.,
23920 f16::INFINITY,
23921 f16::NEG_INFINITY,
23922 0.0,
23923 -0.0,
23924 -2.0,
23925 f16::NAN,
23926 5.9e-8, // Denormal
23927 1.,
23928 f16::INFINITY,
23929 f16::NEG_INFINITY,
23930 0.0,
23931 -0.0,
23932 -2.0,
23933 f16::NAN,
23934 5.9e-8, // Denormal
23935 1.,
23936 f16::INFINITY,
23937 f16::NEG_INFINITY,
23938 0.0,
23939 -0.0,
23940 -2.0,
23941 f16::NAN,
23942 5.9e-8, // Denormal
23943 1.,
23944 f16::INFINITY,
23945 f16::NEG_INFINITY,
23946 0.0,
23947 -0.0,
23948 -2.0,
23949 f16::NAN,
23950 5.9e-8, // Denormal
23951 );
23952 let r = _mm512_fpclass_ph_mask::<0x18>(a); // infinities
23953 assert_eq!(r, 0b01100000011000000110000001100000);
23954 }
23955
23956 #[simd_test(enable = "avx512fp16")]
23957 unsafe fn test_mm512_mask_fpclass_ph_mask() {
23958 let a = _mm512_set_ph(
23959 1.,
23960 f16::INFINITY,
23961 f16::NEG_INFINITY,
23962 0.0,
23963 -0.0,
23964 -2.0,
23965 f16::NAN,
23966 5.9e-8, // Denormal
23967 1.,
23968 f16::INFINITY,
23969 f16::NEG_INFINITY,
23970 0.0,
23971 -0.0,
23972 -2.0,
23973 f16::NAN,
23974 5.9e-8, // Denormal
23975 1.,
23976 f16::INFINITY,
23977 f16::NEG_INFINITY,
23978 0.0,
23979 -0.0,
23980 -2.0,
23981 f16::NAN,
23982 5.9e-8, // Denormal
23983 1.,
23984 f16::INFINITY,
23985 f16::NEG_INFINITY,
23986 0.0,
23987 -0.0,
23988 -2.0,
23989 f16::NAN,
23990 5.9e-8, // Denormal
23991 );
23992 let r = _mm512_mask_fpclass_ph_mask::<0x18>(0b01010101010101010101010101010101, a);
23993 assert_eq!(r, 0b01000000010000000100000001000000);
23994 }
23995
23996 #[simd_test(enable = "avx512fp16")]
23997 unsafe fn test_mm_fpclass_sh_mask() {
23998 let a = _mm_set_sh(f16::INFINITY);
23999 let r = _mm_fpclass_sh_mask::<0x18>(a);
24000 assert_eq!(r, 1);
24001 }
24002
24003 #[simd_test(enable = "avx512fp16")]
24004 unsafe fn test_mm_mask_fpclass_sh_mask() {
24005 let a = _mm_set_sh(f16::INFINITY);
24006 let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
24007 assert_eq!(r, 0);
24008 let r = _mm_mask_fpclass_sh_mask::<0x18>(1, a);
24009 assert_eq!(r, 1);
24010 }
24011
24012 #[simd_test(enable = "avx512fp16,avx512vl")]
24013 unsafe fn test_mm_mask_blend_ph() {
24014 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24015 let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
24016 let r = _mm_mask_blend_ph(0b01010101, a, b);
24017 let e = _mm_set_ph(1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0);
24018 assert_eq_m128h(r, e);
24019 }
24020
24021 #[simd_test(enable = "avx512fp16,avx512vl")]
24022 unsafe fn test_mm256_mask_blend_ph() {
24023 let a = _mm256_set_ph(
24024 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24025 );
24026 let b = _mm256_set_ph(
24027 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
24028 -14.0, -15.0, -16.0,
24029 );
24030 let r = _mm256_mask_blend_ph(0b0101010101010101, a, b);
24031 let e = _mm256_set_ph(
24032 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
24033 -16.0,
24034 );
24035 assert_eq_m256h(r, e);
24036 }
24037
24038 #[simd_test(enable = "avx512fp16")]
24039 unsafe fn test_mm512_mask_blend_ph() {
24040 let a = _mm512_set_ph(
24041 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24042 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24043 31.0, 32.0,
24044 );
24045 let b = _mm512_set_ph(
24046 -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0,
24047 -14.0, -15.0, -16.0, -17.0, -18.0, -19.0, -20.0, -21.0, -22.0, -23.0, -24.0, -25.0,
24048 -26.0, -27.0, -28.0, -29.0, -30.0, -31.0, -32.0,
24049 );
24050 let r = _mm512_mask_blend_ph(0b01010101010101010101010101010101, a, b);
24051 let e = _mm512_set_ph(
24052 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0, -14.0, 15.0,
24053 -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0, 27.0, -28.0,
24054 29.0, -30.0, 31.0, -32.0,
24055 );
24056 assert_eq_m512h(r, e);
24057 }
24058
24059 #[simd_test(enable = "avx512fp16,avx512vl")]
24060 unsafe fn test_mm_permutex2var_ph() {
24061 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24062 let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
24063 let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
24064 let r = _mm_permutex2var_ph(a, idx, b);
24065 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0);
24066 assert_eq_m128h(r, e);
24067 }
24068
24069 #[simd_test(enable = "avx512fp16,avx512vl")]
24070 unsafe fn test_mm256_permutex2var_ph() {
24071 let a = _mm256_setr_ph(
24072 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24073 );
24074 let b = _mm256_setr_ph(
24075 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24076 31.0, 32.0,
24077 );
24078 let idx = _mm256_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
24079 let r = _mm256_permutex2var_ph(a, idx, b);
24080 let e = _mm256_setr_ph(
24081 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24082 31.0,
24083 );
24084 assert_eq_m256h(r, e);
24085 }
24086
24087 #[simd_test(enable = "avx512fp16")]
24088 unsafe fn test_mm512_permutex2var_ph() {
24089 let a = _mm512_setr_ph(
24090 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24091 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24092 31.0, 32.0,
24093 );
24094 let b = _mm512_setr_ph(
24095 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0,
24096 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
24097 61.0, 62.0, 63.0, 64.0,
24098 );
24099 let idx = _mm512_set_epi16(
24100 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20,
24101 18, 16, 14, 12, 10, 8, 6, 4, 2, 0,
24102 );
24103 let r = _mm512_permutex2var_ph(a, idx, b);
24104 let e = _mm512_setr_ph(
24105 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24106 31.0, 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0, 49.0, 51.0, 53.0, 55.0, 57.0,
24107 59.0, 61.0, 63.0,
24108 );
24109 assert_eq_m512h(r, e);
24110 }
24111
24112 #[simd_test(enable = "avx512fp16,avx512vl")]
24113 unsafe fn test_mm_permutexvar_ph() {
24114 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24115 let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
24116 let r = _mm_permutexvar_ph(idx, a);
24117 let e = _mm_setr_ph(1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0);
24118 assert_eq_m128h(r, e);
24119 }
24120
24121 #[simd_test(enable = "avx512fp16,avx512vl")]
24122 unsafe fn test_mm256_permutexvar_ph() {
24123 let a = _mm256_set_ph(
24124 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24125 );
24126 let idx = _mm256_set_epi16(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
24127 let r = _mm256_permutexvar_ph(idx, a);
24128 let e = _mm256_setr_ph(
24129 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
24130 );
24131 assert_eq_m256h(r, e);
24132 }
24133
24134 #[simd_test(enable = "avx512fp16")]
24135 unsafe fn test_mm512_permutexvar_ph() {
24136 let a = _mm512_set_ph(
24137 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24138 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24139 31.0, 32.0,
24140 );
24141 let idx = _mm512_set_epi16(
24142 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 1, 3, 5, 7, 9, 11, 13, 15,
24143 17, 19, 21, 23, 25, 27, 29, 31,
24144 );
24145 let r = _mm512_permutexvar_ph(idx, a);
24146 let e = _mm512_setr_ph(
24147 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23.0, 25.0, 27.0, 29.0,
24148 31.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 22.0, 24.0, 26.0, 28.0,
24149 30.0, 32.0,
24150 );
24151 assert_eq_m512h(r, e);
24152 }
24153
24154 #[simd_test(enable = "avx512fp16,avx512vl")]
24155 unsafe fn test_mm_cvtepi16_ph() {
24156 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24157 let r = _mm_cvtepi16_ph(a);
24158 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24159 assert_eq_m128h(r, e);
24160 }
24161
24162 #[simd_test(enable = "avx512fp16,avx512vl")]
24163 unsafe fn test_mm_mask_cvtepi16_ph() {
24164 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24165 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24166 let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
24167 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24168 assert_eq_m128h(r, e);
24169 }
24170
24171 #[simd_test(enable = "avx512fp16,avx512vl")]
24172 unsafe fn test_mm_maskz_cvtepi16_ph() {
24173 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24174 let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
24175 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24176 assert_eq_m128h(r, e);
24177 }
24178
24179 #[simd_test(enable = "avx512fp16,avx512vl")]
24180 unsafe fn test_mm256_cvtepi16_ph() {
24181 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24182 let r = _mm256_cvtepi16_ph(a);
24183 let e = _mm256_set_ph(
24184 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24185 );
24186 assert_eq_m256h(r, e);
24187 }
24188
24189 #[simd_test(enable = "avx512fp16,avx512vl")]
24190 unsafe fn test_mm256_mask_cvtepi16_ph() {
24191 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24192 let src = _mm256_set_ph(
24193 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24194 );
24195 let r = _mm256_mask_cvtepi16_ph(src, 0b0101010101010101, a);
24196 let e = _mm256_set_ph(
24197 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24198 );
24199 assert_eq_m256h(r, e);
24200 }
24201
24202 #[simd_test(enable = "avx512fp16,avx512vl")]
24203 unsafe fn test_mm256_maskz_cvtepi16_ph() {
24204 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24205 let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
24206 let e = _mm256_set_ph(
24207 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24208 );
24209 assert_eq_m256h(r, e);
24210 }
24211
24212 #[simd_test(enable = "avx512fp16")]
24213 unsafe fn test_mm512_cvtepi16_ph() {
24214 let a = _mm512_set_epi16(
24215 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24216 25, 26, 27, 28, 29, 30, 31, 32,
24217 );
24218 let r = _mm512_cvtepi16_ph(a);
24219 let e = _mm512_set_ph(
24220 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24221 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24222 31.0, 32.0,
24223 );
24224 assert_eq_m512h(r, e);
24225 }
24226
24227 #[simd_test(enable = "avx512fp16")]
24228 unsafe fn test_mm512_mask_cvtepi16_ph() {
24229 let a = _mm512_set_epi16(
24230 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24231 25, 26, 27, 28, 29, 30, 31, 32,
24232 );
24233 let src = _mm512_set_ph(
24234 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24235 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24236 );
24237 let r = _mm512_mask_cvtepi16_ph(src, 0b01010101010101010101010101010101, a);
24238 let e = _mm512_set_ph(
24239 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24240 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24241 );
24242 assert_eq_m512h(r, e);
24243 }
24244
24245 #[simd_test(enable = "avx512fp16")]
24246 unsafe fn test_mm512_maskz_cvtepi16_ph() {
24247 let a = _mm512_set_epi16(
24248 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24249 25, 26, 27, 28, 29, 30, 31, 32,
24250 );
24251 let r = _mm512_maskz_cvtepi16_ph(0b01010101010101010101010101010101, a);
24252 let e = _mm512_set_ph(
24253 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24254 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24255 );
24256 assert_eq_m512h(r, e);
24257 }
24258
24259 #[simd_test(enable = "avx512fp16")]
24260 unsafe fn test_mm512_cvt_roundepi16_ph() {
24261 let a = _mm512_set_epi16(
24262 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24263 25, 26, 27, 28, 29, 30, 31, 32,
24264 );
24265 let r = _mm512_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24266 let e = _mm512_set_ph(
24267 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24268 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24269 31.0, 32.0,
24270 );
24271 assert_eq_m512h(r, e);
24272 }
24273
24274 #[simd_test(enable = "avx512fp16")]
24275 unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
24276 let a = _mm512_set_epi16(
24277 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24278 25, 26, 27, 28, 29, 30, 31, 32,
24279 );
24280 let src = _mm512_set_ph(
24281 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24282 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24283 );
24284 let r = _mm512_mask_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24285 src,
24286 0b01010101010101010101010101010101,
24287 a,
24288 );
24289 let e = _mm512_set_ph(
24290 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24291 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24292 );
24293 assert_eq_m512h(r, e);
24294 }
24295
24296 #[simd_test(enable = "avx512fp16")]
24297 unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
24298 let a = _mm512_set_epi16(
24299 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24300 25, 26, 27, 28, 29, 30, 31, 32,
24301 );
24302 let r = _mm512_maskz_cvt_roundepi16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24303 0b01010101010101010101010101010101,
24304 a,
24305 );
24306 let e = _mm512_set_ph(
24307 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24308 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24309 );
24310 assert_eq_m512h(r, e);
24311 }
24312
24313 #[simd_test(enable = "avx512fp16,avx512vl")]
24314 unsafe fn test_mm_cvtepu16_ph() {
24315 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24316 let r = _mm_cvtepu16_ph(a);
24317 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24318 assert_eq_m128h(r, e);
24319 }
24320
24321 #[simd_test(enable = "avx512fp16,avx512vl")]
24322 unsafe fn test_mm_mask_cvtepu16_ph() {
24323 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24324 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24325 let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
24326 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24327 assert_eq_m128h(r, e);
24328 }
24329
24330 #[simd_test(enable = "avx512fp16,avx512vl")]
24331 unsafe fn test_mm_maskz_cvtepu16_ph() {
24332 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
24333 let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
24334 let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
24335 assert_eq_m128h(r, e);
24336 }
24337
24338 #[simd_test(enable = "avx512fp16,avx512vl")]
24339 unsafe fn test_mm256_cvtepu16_ph() {
24340 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24341 let r = _mm256_cvtepu16_ph(a);
24342 let e = _mm256_set_ph(
24343 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24344 );
24345 assert_eq_m256h(r, e);
24346 }
24347
24348 #[simd_test(enable = "avx512fp16,avx512vl")]
24349 unsafe fn test_mm256_mask_cvtepu16_ph() {
24350 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24351 let src = _mm256_set_ph(
24352 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24353 );
24354 let r = _mm256_mask_cvtepu16_ph(src, 0b0101010101010101, a);
24355 let e = _mm256_set_ph(
24356 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24357 );
24358 assert_eq_m256h(r, e);
24359 }
24360
24361 #[simd_test(enable = "avx512fp16,avx512vl")]
24362 unsafe fn test_mm256_maskz_cvtepu16_ph() {
24363 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24364 let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
24365 let e = _mm256_set_ph(
24366 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16.,
24367 );
24368 assert_eq_m256h(r, e);
24369 }
24370
24371 #[simd_test(enable = "avx512fp16")]
24372 unsafe fn test_mm512_cvtepu16_ph() {
24373 let a = _mm512_set_epi16(
24374 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24375 25, 26, 27, 28, 29, 30, 31, 32,
24376 );
24377 let r = _mm512_cvtepu16_ph(a);
24378 let e = _mm512_set_ph(
24379 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24380 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24381 31.0, 32.0,
24382 );
24383 assert_eq_m512h(r, e);
24384 }
24385
24386 #[simd_test(enable = "avx512fp16")]
24387 unsafe fn test_mm512_mask_cvtepu16_ph() {
24388 let a = _mm512_set_epi16(
24389 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24390 25, 26, 27, 28, 29, 30, 31, 32,
24391 );
24392 let src = _mm512_set_ph(
24393 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24394 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24395 );
24396 let r = _mm512_mask_cvtepu16_ph(src, 0b01010101010101010101010101010101, a);
24397 let e = _mm512_set_ph(
24398 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24399 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24400 );
24401 assert_eq_m512h(r, e);
24402 }
24403
24404 #[simd_test(enable = "avx512fp16")]
24405 unsafe fn test_mm512_maskz_cvtepu16_ph() {
24406 let a = _mm512_set_epi16(
24407 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24408 25, 26, 27, 28, 29, 30, 31, 32,
24409 );
24410 let r = _mm512_maskz_cvtepu16_ph(0b01010101010101010101010101010101, a);
24411 let e = _mm512_set_ph(
24412 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24413 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24414 );
24415 assert_eq_m512h(r, e);
24416 }
24417
24418 #[simd_test(enable = "avx512fp16")]
24419 unsafe fn test_mm512_cvt_roundepu16_ph() {
24420 let a = _mm512_set_epi16(
24421 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24422 25, 26, 27, 28, 29, 30, 31, 32,
24423 );
24424 let r = _mm512_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24425 let e = _mm512_set_ph(
24426 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24427 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
24428 31.0, 32.0,
24429 );
24430 assert_eq_m512h(r, e);
24431 }
24432
24433 #[simd_test(enable = "avx512fp16")]
24434 unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
24435 let a = _mm512_set_epi16(
24436 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24437 25, 26, 27, 28, 29, 30, 31, 32,
24438 );
24439 let src = _mm512_set_ph(
24440 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
24441 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
24442 );
24443 let r = _mm512_mask_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24444 src,
24445 0b01010101010101010101010101010101,
24446 a,
24447 );
24448 let e = _mm512_set_ph(
24449 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16., 26., 18.,
24450 28., 20., 30., 22., 32., 24., 34., 26., 36., 28., 38., 30., 40., 32.,
24451 );
24452 assert_eq_m512h(r, e);
24453 }
24454
24455 #[simd_test(enable = "avx512fp16")]
24456 unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
24457 let a = _mm512_set_epi16(
24458 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
24459 25, 26, 27, 28, 29, 30, 31, 32,
24460 );
24461 let r = _mm512_maskz_cvt_roundepu16_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24462 0b01010101010101010101010101010101,
24463 a,
24464 );
24465 let e = _mm512_set_ph(
24466 0., 2., 0., 4., 0., 6., 0., 8., 0., 10., 0., 12., 0., 14., 0., 16., 0., 18., 0., 20.,
24467 0., 22., 0., 24., 0., 26., 0., 28., 0., 30., 0., 32.,
24468 );
24469 assert_eq_m512h(r, e);
24470 }
24471
24472 #[simd_test(enable = "avx512fp16,avx512vl")]
24473 unsafe fn test_mm_cvtepi32_ph() {
24474 let a = _mm_set_epi32(1, 2, 3, 4);
24475 let r = _mm_cvtepi32_ph(a);
24476 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24477 assert_eq_m128h(r, e);
24478 }
24479
24480 #[simd_test(enable = "avx512fp16,avx512vl")]
24481 unsafe fn test_mm_mask_cvtepi32_ph() {
24482 let a = _mm_set_epi32(1, 2, 3, 4);
24483 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24484 let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
24485 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24486 assert_eq_m128h(r, e);
24487 }
24488
24489 #[simd_test(enable = "avx512fp16,avx512vl")]
24490 unsafe fn test_mm_maskz_cvtepi32_ph() {
24491 let a = _mm_set_epi32(1, 2, 3, 4);
24492 let r = _mm_maskz_cvtepi32_ph(0b0101, a);
24493 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24494 assert_eq_m128h(r, e);
24495 }
24496
24497 #[simd_test(enable = "avx512fp16,avx512vl")]
24498 unsafe fn test_mm256_cvtepi32_ph() {
24499 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24500 let r = _mm256_cvtepi32_ph(a);
24501 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24502 assert_eq_m128h(r, e);
24503 }
24504
24505 #[simd_test(enable = "avx512fp16,avx512vl")]
24506 unsafe fn test_mm256_mask_cvtepi32_ph() {
24507 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24508 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24509 let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
24510 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24511 assert_eq_m128h(r, e);
24512 }
24513
24514 #[simd_test(enable = "avx512fp16,avx512vl")]
24515 unsafe fn test_mm256_maskz_cvtepi32_ph() {
24516 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24517 let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
24518 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24519 assert_eq_m128h(r, e);
24520 }
24521
24522 #[simd_test(enable = "avx512fp16")]
24523 unsafe fn test_mm512_cvtepi32_ph() {
24524 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24525 let r = _mm512_cvtepi32_ph(a);
24526 let e = _mm256_set_ph(
24527 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24528 );
24529 assert_eq_m256h(r, e);
24530 }
24531
24532 #[simd_test(enable = "avx512fp16,avx512vl")]
24533 unsafe fn test_mm512_mask_cvtepi32_ph() {
24534 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24535 let src = _mm256_set_ph(
24536 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24537 );
24538 let r = _mm512_mask_cvtepi32_ph(src, 0b0101010101010101, a);
24539 let e = _mm256_set_ph(
24540 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24541 );
24542 assert_eq_m256h(r, e);
24543 }
24544
24545 #[simd_test(enable = "avx512fp16,avx512vl")]
24546 unsafe fn test_mm512_maskz_cvtepi32_ph() {
24547 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24548 let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
24549 let e = _mm256_set_ph(
24550 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24551 );
24552 assert_eq_m256h(r, e);
24553 }
24554
24555 #[simd_test(enable = "avx512fp16,avx512vl")]
24556 unsafe fn test_mm512_cvt_roundepi32_ph() {
24557 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24558 let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24559 let e = _mm256_set_ph(
24560 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24561 );
24562 assert_eq_m256h(r, e);
24563 }
24564
24565 #[simd_test(enable = "avx512fp16,avx512vl")]
24566 unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
24567 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24568 let src = _mm256_set_ph(
24569 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24570 );
24571 let r = _mm512_mask_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24572 src,
24573 0b0101010101010101,
24574 a,
24575 );
24576 let e = _mm256_set_ph(
24577 10., 2., 12., 4., 14., 6., 16., 8., 18., 10., 20., 12., 22., 14., 24., 16.,
24578 );
24579 assert_eq_m256h(r, e);
24580 }
24581
24582 #[simd_test(enable = "avx512fp16,avx512vl")]
24583 unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
24584 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24585 let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24586 0b0101010101010101,
24587 a,
24588 );
24589 let e = _mm256_set_ph(
24590 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24591 );
24592 assert_eq_m256h(r, e);
24593 }
24594
24595 #[simd_test(enable = "avx512fp16,avx512vl")]
24596 unsafe fn test_mm_cvti32_sh() {
24597 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24598 let r = _mm_cvti32_sh(a, 10);
24599 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24600 assert_eq_m128h(r, e);
24601 }
24602
24603 #[simd_test(enable = "avx512fp16,avx512vl")]
24604 unsafe fn test_mm_cvt_roundi32_sh() {
24605 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24606 let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24607 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24608 assert_eq_m128h(r, e);
24609 }
24610
24611 #[simd_test(enable = "avx512fp16,avx512vl")]
24612 unsafe fn test_mm_cvtepu32_ph() {
24613 let a = _mm_set_epi32(1, 2, 3, 4);
24614 let r = _mm_cvtepu32_ph(a);
24615 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24616 assert_eq_m128h(r, e);
24617 }
24618
24619 #[simd_test(enable = "avx512fp16,avx512vl")]
24620 unsafe fn test_mm_mask_cvtepu32_ph() {
24621 let a = _mm_set_epi32(1, 2, 3, 4);
24622 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24623 let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
24624 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2., 16., 4.);
24625 assert_eq_m128h(r, e);
24626 }
24627
24628 #[simd_test(enable = "avx512fp16,avx512vl")]
24629 unsafe fn test_mm_maskz_cvtepu32_ph() {
24630 let a = _mm_set_epi32(1, 2, 3, 4);
24631 let r = _mm_maskz_cvtepu32_ph(0b0101, a);
24632 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
24633 assert_eq_m128h(r, e);
24634 }
24635
24636 #[simd_test(enable = "avx512fp16,avx512vl")]
24637 unsafe fn test_mm256_cvtepu32_ph() {
24638 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24639 let r = _mm256_cvtepu32_ph(a);
24640 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24641 assert_eq_m128h(r, e);
24642 }
24643
24644 #[simd_test(enable = "avx512fp16,avx512vl")]
24645 unsafe fn test_mm256_mask_cvtepu32_ph() {
24646 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24647 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24648 let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
24649 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24650 assert_eq_m128h(r, e);
24651 }
24652
24653 #[simd_test(enable = "avx512fp16,avx512vl")]
24654 unsafe fn test_mm256_maskz_cvtepu32_ph() {
24655 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
24656 let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
24657 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
24658 assert_eq_m128h(r, e);
24659 }
24660
24661 #[simd_test(enable = "avx512fp16,avx512vl")]
24662 unsafe fn test_mm512_cvtepu32_ph() {
24663 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24664 let r = _mm512_cvtepu32_ph(a);
24665 let e = _mm256_set_ph(
24666 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24667 );
24668 assert_eq_m256h(r, e);
24669 }
24670
24671 #[simd_test(enable = "avx512fp16,avx512vl")]
24672 unsafe fn test_mm512_mask_cvtepu32_ph() {
24673 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24674 let src = _mm256_set_ph(
24675 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24676 );
24677 let r = _mm512_mask_cvtepu32_ph(src, 0b0101010101010101, a);
24678 let e = _mm256_set_ph(
24679 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
24680 );
24681 assert_eq_m256h(r, e);
24682 }
24683
24684 #[simd_test(enable = "avx512fp16,avx512vl")]
24685 unsafe fn test_mm512_maskz_cvtepu32_ph() {
24686 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24687 let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
24688 let e = _mm256_set_ph(
24689 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24690 );
24691 assert_eq_m256h(r, e);
24692 }
24693
24694 #[simd_test(enable = "avx512fp16,avx512vl")]
24695 unsafe fn test_mm512_cvt_roundepu32_ph() {
24696 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24697 let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24698 let e = _mm256_set_ph(
24699 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
24700 );
24701 assert_eq_m256h(r, e);
24702 }
24703
24704 #[simd_test(enable = "avx512fp16,avx512vl")]
24705 unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
24706 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24707 let src = _mm256_set_ph(
24708 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
24709 );
24710 let r = _mm512_mask_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24711 src,
24712 0b0101010101010101,
24713 a,
24714 );
24715 let e = _mm256_set_ph(
24716 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
24717 16.0,
24718 );
24719 assert_eq_m256h(r, e);
24720 }
24721
24722 #[simd_test(enable = "avx512fp16,avx512vl")]
24723 unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
24724 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
24725 let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24726 0b0101010101010101,
24727 a,
24728 );
24729 let e = _mm256_set_ph(
24730 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
24731 );
24732 assert_eq_m256h(r, e);
24733 }
24734
24735 #[simd_test(enable = "avx512fp16,avx512vl")]
24736 unsafe fn test_mm_cvtu32_sh() {
24737 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24738 let r = _mm_cvtu32_sh(a, 10);
24739 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24740 assert_eq_m128h(r, e);
24741 }
24742
24743 #[simd_test(enable = "avx512fp16,avx512vl")]
24744 unsafe fn test_mm_cvt_roundu32_sh() {
24745 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24746 let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
24747 let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24748 assert_eq_m128h(r, e);
24749 }
24750
24751 #[simd_test(enable = "avx512fp16,avx512vl")]
24752 unsafe fn test_mm_cvtepi64_ph() {
24753 let a = _mm_set_epi64x(1, 2);
24754 let r = _mm_cvtepi64_ph(a);
24755 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24756 assert_eq_m128h(r, e);
24757 }
24758
24759 #[simd_test(enable = "avx512fp16,avx512vl")]
24760 unsafe fn test_mm_mask_cvtepi64_ph() {
24761 let a = _mm_set_epi64x(1, 2);
24762 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24763 let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
24764 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24765 assert_eq_m128h(r, e);
24766 }
24767
24768 #[simd_test(enable = "avx512fp16,avx512vl")]
24769 unsafe fn test_mm_maskz_cvtepi64_ph() {
24770 let a = _mm_set_epi64x(1, 2);
24771 let r = _mm_maskz_cvtepi64_ph(0b01, a);
24772 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
24773 assert_eq_m128h(r, e);
24774 }
24775
24776 #[simd_test(enable = "avx512fp16,avx512vl")]
24777 unsafe fn test_mm256_cvtepi64_ph() {
24778 let a = _mm256_set_epi64x(1, 2, 3, 4);
24779 let r = _mm256_cvtepi64_ph(a);
24780 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24781 assert_eq_m128h(r, e);
24782 }
24783
24784 #[simd_test(enable = "avx512fp16,avx512vl")]
24785 unsafe fn test_mm256_mask_cvtepi64_ph() {
24786 let a = _mm256_set_epi64x(1, 2, 3, 4);
24787 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24788 let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
24789 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24790 assert_eq_m128h(r, e);
24791 }
24792
24793 #[simd_test(enable = "avx512fp16,avx512vl")]
24794 unsafe fn test_mm256_maskz_cvtepi64_ph() {
24795 let a = _mm256_set_epi64x(1, 2, 3, 4);
24796 let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
24797 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24798 assert_eq_m128h(r, e);
24799 }
24800
24801 #[simd_test(enable = "avx512fp16,avx512vl")]
24802 unsafe fn test_mm512_cvtepi64_ph() {
24803 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24804 let r = _mm512_cvtepi64_ph(a);
24805 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24806 assert_eq_m128h(r, e);
24807 }
24808
24809 #[simd_test(enable = "avx512fp16,avx512vl")]
24810 unsafe fn test_mm512_mask_cvtepi64_ph() {
24811 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24812 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24813 let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
24814 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24815 assert_eq_m128h(r, e);
24816 }
24817
24818 #[simd_test(enable = "avx512fp16,avx512vl")]
24819 unsafe fn test_mm512_maskz_cvtepi64_ph() {
24820 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24821 let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
24822 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24823 assert_eq_m128h(r, e);
24824 }
24825
24826 #[simd_test(enable = "avx512fp16,avx512vl")]
24827 unsafe fn test_mm512_cvt_roundepi64_ph() {
24828 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24829 let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24830 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24831 assert_eq_m128h(r, e);
24832 }
24833
24834 #[simd_test(enable = "avx512fp16")]
24835 unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
24836 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24837 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24838 let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24839 src, 0b01010101, a,
24840 );
24841 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24842 assert_eq_m128h(r, e);
24843 }
24844
24845 #[simd_test(enable = "avx512fp16,avx512vl")]
24846 unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
24847 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24848 let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24849 0b01010101, a,
24850 );
24851 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24852 assert_eq_m128h(r, e);
24853 }
24854
24855 #[simd_test(enable = "avx512fp16,avx512vl")]
24856 unsafe fn test_mm_cvtepu64_ph() {
24857 let a = _mm_set_epi64x(1, 2);
24858 let r = _mm_cvtepu64_ph(a);
24859 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
24860 assert_eq_m128h(r, e);
24861 }
24862
24863 #[simd_test(enable = "avx512fp16,avx512vl")]
24864 unsafe fn test_mm_mask_cvtepu64_ph() {
24865 let a = _mm_set_epi64x(1, 2);
24866 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24867 let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
24868 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
24869 assert_eq_m128h(r, e);
24870 }
24871
24872 #[simd_test(enable = "avx512fp16,avx512vl")]
24873 unsafe fn test_mm_maskz_cvtepu64_ph() {
24874 let a = _mm_set_epi64x(1, 2);
24875 let r = _mm_maskz_cvtepu64_ph(0b01, a);
24876 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
24877 assert_eq_m128h(r, e);
24878 }
24879
24880 #[simd_test(enable = "avx512fp16,avx512vl")]
24881 unsafe fn test_mm256_cvtepu64_ph() {
24882 let a = _mm256_set_epi64x(1, 2, 3, 4);
24883 let r = _mm256_cvtepu64_ph(a);
24884 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24885 assert_eq_m128h(r, e);
24886 }
24887
24888 #[simd_test(enable = "avx512fp16,avx512vl")]
24889 unsafe fn test_mm256_mask_cvtepu64_ph() {
24890 let a = _mm256_set_epi64x(1, 2, 3, 4);
24891 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24892 let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
24893 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
24894 assert_eq_m128h(r, e);
24895 }
24896
24897 #[simd_test(enable = "avx512fp16,avx512vl")]
24898 unsafe fn test_mm256_maskz_cvtepu64_ph() {
24899 let a = _mm256_set_epi64x(1, 2, 3, 4);
24900 let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
24901 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24902 assert_eq_m128h(r, e);
24903 }
24904
24905 #[simd_test(enable = "avx512fp16,avx512vl")]
24906 unsafe fn test_mm512_cvtepu64_ph() {
24907 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24908 let r = _mm512_cvtepu64_ph(a);
24909 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24910 assert_eq_m128h(r, e);
24911 }
24912
24913 #[simd_test(enable = "avx512fp16,avx512vl")]
24914 unsafe fn test_mm512_mask_cvtepu64_ph() {
24915 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24916 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24917 let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
24918 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24919 assert_eq_m128h(r, e);
24920 }
24921
24922 #[simd_test(enable = "avx512fp16,avx512vl")]
24923 unsafe fn test_mm512_maskz_cvtepu64_ph() {
24924 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24925 let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
24926 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24927 assert_eq_m128h(r, e);
24928 }
24929
24930 #[simd_test(enable = "avx512fp16,avx512vl")]
24931 unsafe fn test_mm512_cvt_roundepu64_ph() {
24932 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24933 let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
24934 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24935 assert_eq_m128h(r, e);
24936 }
24937
24938 #[simd_test(enable = "avx512fp16,avx512vl")]
24939 unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
24940 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24941 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24942 let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24943 src, 0b01010101, a,
24944 );
24945 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24946 assert_eq_m128h(r, e);
24947 }
24948
24949 #[simd_test(enable = "avx512fp16,avx512vl")]
24950 unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
24951 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
24952 let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
24953 0b01010101, a,
24954 );
24955 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
24956 assert_eq_m128h(r, e);
24957 }
24958
24959 #[simd_test(enable = "avx512fp16,avx512vl")]
24960 unsafe fn test_mm_cvtxps_ph() {
24961 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24962 let r = _mm_cvtxps_ph(a);
24963 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
24964 assert_eq_m128h(r, e);
24965 }
24966
24967 #[simd_test(enable = "avx512fp16,avx512vl")]
24968 unsafe fn test_mm_mask_cvtxps_ph() {
24969 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24970 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24971 let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
24972 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16., 4.0);
24973 assert_eq_m128h(r, e);
24974 }
24975
24976 #[simd_test(enable = "avx512fp16,avx512vl")]
24977 unsafe fn test_mm_maskz_cvtxps_ph() {
24978 let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
24979 let r = _mm_maskz_cvtxps_ph(0b0101, a);
24980 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
24981 assert_eq_m128h(r, e);
24982 }
24983
24984 #[simd_test(enable = "avx512fp16,avx512vl")]
24985 unsafe fn test_mm256_cvtxps_ph() {
24986 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24987 let r = _mm256_cvtxps_ph(a);
24988 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24989 assert_eq_m128h(r, e);
24990 }
24991
24992 #[simd_test(enable = "avx512fp16,avx512vl")]
24993 unsafe fn test_mm256_mask_cvtxps_ph() {
24994 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
24995 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
24996 let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
24997 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
24998 assert_eq_m128h(r, e);
24999 }
25000
25001 #[simd_test(enable = "avx512fp16,avx512vl")]
25002 unsafe fn test_mm256_maskz_cvtxps_ph() {
25003 let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25004 let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
25005 let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
25006 assert_eq_m128h(r, e);
25007 }
25008
25009 #[simd_test(enable = "avx512fp16,avx512vl")]
25010 unsafe fn test_mm512_cvtxps_ph() {
25011 let a = _mm512_set_ps(
25012 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25013 );
25014 let r = _mm512_cvtxps_ph(a);
25015 let e = _mm256_set_ph(
25016 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25017 );
25018 assert_eq_m256h(r, e);
25019 }
25020
25021 #[simd_test(enable = "avx512fp16,avx512vl")]
25022 unsafe fn test_mm512_mask_cvtxps_ph() {
25023 let a = _mm512_set_ps(
25024 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25025 );
25026 let src = _mm256_set_ph(
25027 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
25028 );
25029 let r = _mm512_mask_cvtxps_ph(src, 0b0101010101010101, a);
25030 let e = _mm256_set_ph(
25031 10., 2.0, 12., 4.0, 14., 6.0, 16., 8.0, 18., 10.0, 20., 12.0, 22., 14.0, 24., 16.0,
25032 );
25033 assert_eq_m256h(r, e);
25034 }
25035
25036 #[simd_test(enable = "avx512fp16,avx512vl")]
25037 unsafe fn test_mm512_maskz_cvtxps_ph() {
25038 let a = _mm512_set_ps(
25039 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25040 );
25041 let r = _mm512_maskz_cvtxps_ph(0b0101010101010101, a);
25042 let e = _mm256_set_ph(
25043 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25044 );
25045 assert_eq_m256h(r, e);
25046 }
25047
25048 #[simd_test(enable = "avx512fp16,avx512vl")]
25049 unsafe fn test_mm512_cvtx_roundps_ph() {
25050 let a = _mm512_set_ps(
25051 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25052 );
25053 let r = _mm512_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25054 let e = _mm256_set_ph(
25055 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25056 );
25057 assert_eq_m256h(r, e);
25058 }
25059
25060 #[simd_test(enable = "avx512fp16,avx512vl")]
25061 unsafe fn test_mm512_mask_cvtx_roundps_ph() {
25062 let a = _mm512_set_ps(
25063 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25064 );
25065 let src = _mm256_set_ph(
25066 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
25067 );
25068 let r = _mm512_mask_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25069 src,
25070 0b0101010101010101,
25071 a,
25072 );
25073 let e = _mm256_set_ph(
25074 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
25075 16.0,
25076 );
25077 assert_eq_m256h(r, e);
25078 }
25079
25080 #[simd_test(enable = "avx512fp16,avx512vl")]
25081 unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
25082 let a = _mm512_set_ps(
25083 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25084 );
25085 let r = _mm512_maskz_cvtx_roundps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25086 0b0101010101010101,
25087 a,
25088 );
25089 let e = _mm256_set_ph(
25090 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
25091 );
25092 assert_eq_m256h(r, e);
25093 }
25094
25095 #[simd_test(enable = "avx512fp16,avx512vl")]
25096 unsafe fn test_mm_cvtss_sh() {
25097 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25098 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25099 let r = _mm_cvtss_sh(a, b);
25100 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25101 assert_eq_m128h(r, e);
25102 }
25103
25104 #[simd_test(enable = "avx512fp16,avx512vl")]
25105 unsafe fn test_mm_mask_cvtss_sh() {
25106 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25107 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25108 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25109 let r = _mm_mask_cvtss_sh(src, 0, a, b);
25110 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25111 assert_eq_m128h(r, e);
25112 let r = _mm_mask_cvtss_sh(src, 1, a, b);
25113 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25114 assert_eq_m128h(r, e);
25115 }
25116
25117 #[simd_test(enable = "avx512fp16,avx512vl")]
25118 unsafe fn test_mm_maskz_cvtss_sh() {
25119 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25120 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25121 let r = _mm_maskz_cvtss_sh(0, a, b);
25122 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25123 assert_eq_m128h(r, e);
25124 let r = _mm_maskz_cvtss_sh(1, a, b);
25125 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25126 assert_eq_m128h(r, e);
25127 }
25128
25129 #[simd_test(enable = "avx512fp16,avx512vl")]
25130 unsafe fn test_mm_cvt_roundss_sh() {
25131 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25132 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25133 let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25134 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25135 assert_eq_m128h(r, e);
25136 }
25137
25138 #[simd_test(enable = "avx512fp16,avx512vl")]
25139 unsafe fn test_mm_mask_cvt_roundss_sh() {
25140 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25141 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25142 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25143 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25144 src, 0, a, b,
25145 );
25146 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25147 assert_eq_m128h(r, e);
25148 let r = _mm_mask_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25149 src, 1, a, b,
25150 );
25151 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25152 assert_eq_m128h(r, e);
25153 }
25154
25155 #[simd_test(enable = "avx512fp16,avx512vl")]
25156 unsafe fn test_mm_maskz_cvt_roundss_sh() {
25157 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25158 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
25159 let r =
25160 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25161 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25162 assert_eq_m128h(r, e);
25163 let r =
25164 _mm_maskz_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25165 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25166 assert_eq_m128h(r, e);
25167 }
25168
25169 #[simd_test(enable = "avx512fp16,avx512vl")]
25170 unsafe fn test_mm_cvtpd_ph() {
25171 let a = _mm_set_pd(1.0, 2.0);
25172 let r = _mm_cvtpd_ph(a);
25173 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
25174 assert_eq_m128h(r, e);
25175 }
25176
25177 #[simd_test(enable = "avx512fp16,avx512vl")]
25178 unsafe fn test_mm_mask_cvtpd_ph() {
25179 let a = _mm_set_pd(1.0, 2.0);
25180 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25181 let r = _mm_mask_cvtpd_ph(src, 0b01, a);
25182 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 16., 2.);
25183 assert_eq_m128h(r, e);
25184 }
25185
25186 #[simd_test(enable = "avx512fp16,avx512vl")]
25187 unsafe fn test_mm_maskz_cvtpd_ph() {
25188 let a = _mm_set_pd(1.0, 2.0);
25189 let r = _mm_maskz_cvtpd_ph(0b01, a);
25190 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
25191 assert_eq_m128h(r, e);
25192 }
25193
25194 #[simd_test(enable = "avx512fp16,avx512vl")]
25195 unsafe fn test_mm256_cvtpd_ph() {
25196 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25197 let r = _mm256_cvtpd_ph(a);
25198 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
25199 assert_eq_m128h(r, e);
25200 }
25201
25202 #[simd_test(enable = "avx512fp16,avx512vl")]
25203 unsafe fn test_mm256_mask_cvtpd_ph() {
25204 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25205 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25206 let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
25207 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 14., 2.0, 16.0, 4.0);
25208 assert_eq_m128h(r, e);
25209 }
25210
25211 #[simd_test(enable = "avx512fp16,avx512vl")]
25212 unsafe fn test_mm256_maskz_cvtpd_ph() {
25213 let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
25214 let r = _mm256_maskz_cvtpd_ph(0b0101, a);
25215 let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
25216 assert_eq_m128h(r, e);
25217 }
25218
25219 #[simd_test(enable = "avx512fp16,avx512vl")]
25220 unsafe fn test_mm512_cvtpd_ph() {
25221 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25222 let r = _mm512_cvtpd_ph(a);
25223 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25224 assert_eq_m128h(r, e);
25225 }
25226
25227 #[simd_test(enable = "avx512fp16,avx512vl")]
25228 unsafe fn test_mm512_mask_cvtpd_ph() {
25229 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25230 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25231 let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
25232 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25233 assert_eq_m128h(r, e);
25234 }
25235
25236 #[simd_test(enable = "avx512fp16,avx512vl")]
25237 unsafe fn test_mm512_maskz_cvtpd_ph() {
25238 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25239 let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
25240 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25241 assert_eq_m128h(r, e);
25242 }
25243
25244 #[simd_test(enable = "avx512fp16,avx512vl")]
25245 unsafe fn test_mm512_cvt_roundpd_ph() {
25246 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25247 let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25248 let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25249 assert_eq_m128h(r, e);
25250 }
25251
25252 #[simd_test(enable = "avx512fp16,avx512vl")]
25253 unsafe fn test_mm512_mask_cvt_roundpd_ph() {
25254 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25255 let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25256 let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25257 src, 0b01010101, a,
25258 );
25259 let e = _mm_set_ph(10., 2., 12., 4., 14., 6., 16., 8.);
25260 assert_eq_m128h(r, e);
25261 }
25262
25263 #[simd_test(enable = "avx512fp16,avx512vl")]
25264 unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
25265 let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25266 let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25267 0b01010101, a,
25268 );
25269 let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
25270 assert_eq_m128h(r, e);
25271 }
25272
25273 #[simd_test(enable = "avx512fp16,avx512vl")]
25274 unsafe fn test_mm_cvtsd_sh() {
25275 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25276 let b = _mm_setr_pd(1.0, 2.0);
25277 let r = _mm_cvtsd_sh(a, b);
25278 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25279 assert_eq_m128h(r, e);
25280 }
25281
25282 #[simd_test(enable = "avx512fp16,avx512vl")]
25283 unsafe fn test_mm_mask_cvtsd_sh() {
25284 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25285 let b = _mm_setr_pd(1.0, 2.0);
25286 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25287 let r = _mm_mask_cvtsd_sh(src, 0, a, b);
25288 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25289 assert_eq_m128h(r, e);
25290 let r = _mm_mask_cvtsd_sh(src, 1, a, b);
25291 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25292 assert_eq_m128h(r, e);
25293 }
25294
25295 #[simd_test(enable = "avx512fp16,avx512vl")]
25296 unsafe fn test_mm_maskz_cvtsd_sh() {
25297 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25298 let b = _mm_setr_pd(1.0, 2.0);
25299 let r = _mm_maskz_cvtsd_sh(0, a, b);
25300 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25301 assert_eq_m128h(r, e);
25302 let r = _mm_maskz_cvtsd_sh(1, a, b);
25303 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25304 assert_eq_m128h(r, e);
25305 }
25306
25307 #[simd_test(enable = "avx512fp16,avx512vl")]
25308 unsafe fn test_mm_cvt_roundsd_sh() {
25309 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25310 let b = _mm_setr_pd(1.0, 2.0);
25311 let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
25312 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25313 assert_eq_m128h(r, e);
25314 }
25315
25316 #[simd_test(enable = "avx512fp16,avx512vl")]
25317 unsafe fn test_mm_mask_cvt_roundsd_sh() {
25318 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25319 let b = _mm_setr_pd(1.0, 2.0);
25320 let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
25321 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25322 src, 0, a, b,
25323 );
25324 let e = _mm_setr_ph(20., 11., 12., 13., 14., 15., 16., 17.);
25325 assert_eq_m128h(r, e);
25326 let r = _mm_mask_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25327 src, 1, a, b,
25328 );
25329 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25330 assert_eq_m128h(r, e);
25331 }
25332
25333 #[simd_test(enable = "avx512fp16,avx512vl")]
25334 unsafe fn test_mm_maskz_cvt_roundsd_sh() {
25335 let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
25336 let b = _mm_setr_pd(1.0, 2.0);
25337 let r =
25338 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
25339 let e = _mm_setr_ph(0.0, 11., 12., 13., 14., 15., 16., 17.);
25340 assert_eq_m128h(r, e);
25341 let r =
25342 _mm_maskz_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(1, a, b);
25343 let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
25344 assert_eq_m128h(r, e);
25345 }
25346
25347 #[simd_test(enable = "avx512fp16,avx512vl")]
25348 unsafe fn test_mm_cvtph_epi16() {
25349 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25350 let r = _mm_cvttph_epi16(a);
25351 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25352 assert_eq_m128i(r, e);
25353 }
25354
25355 #[simd_test(enable = "avx512fp16,avx512vl")]
25356 unsafe fn test_mm_mask_cvtph_epi16() {
25357 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25358 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25359 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25360 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25361 assert_eq_m128i(r, e);
25362 }
25363
25364 #[simd_test(enable = "avx512fp16,avx512vl")]
25365 unsafe fn test_mm_maskz_cvtph_epi16() {
25366 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25367 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25368 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25369 assert_eq_m128i(r, e);
25370 }
25371
25372 #[simd_test(enable = "avx512fp16,avx512vl")]
25373 unsafe fn test_mm256_cvtph_epi16() {
25374 let a = _mm256_set_ph(
25375 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25376 );
25377 let r = _mm256_cvttph_epi16(a);
25378 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25379 assert_eq_m256i(r, e);
25380 }
25381
25382 #[simd_test(enable = "avx512fp16,avx512vl")]
25383 unsafe fn test_mm256_mask_cvtph_epi16() {
25384 let a = _mm256_set_ph(
25385 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25386 );
25387 let src = _mm256_set_epi16(
25388 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25389 );
25390 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25391 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25392 assert_eq_m256i(r, e);
25393 }
25394
25395 #[simd_test(enable = "avx512fp16,avx512vl")]
25396 unsafe fn test_mm256_maskz_cvtph_epi16() {
25397 let a = _mm256_set_ph(
25398 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25399 );
25400 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25401 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25402 assert_eq_m256i(r, e);
25403 }
25404
25405 #[simd_test(enable = "avx512fp16")]
25406 unsafe fn test_mm512_cvtph_epi16() {
25407 let a = _mm512_set_ph(
25408 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25409 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25410 31.0, 32.0,
25411 );
25412 let r = _mm512_cvttph_epi16(a);
25413 let e = _mm512_set_epi16(
25414 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25415 25, 26, 27, 28, 29, 30, 31, 32,
25416 );
25417 assert_eq_m512i(r, e);
25418 }
25419
25420 #[simd_test(enable = "avx512fp16")]
25421 unsafe fn test_mm512_mask_cvtph_epi16() {
25422 let a = _mm512_set_ph(
25423 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25424 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25425 31.0, 32.0,
25426 );
25427 let src = _mm512_set_epi16(
25428 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25429 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25430 );
25431 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25432 let e = _mm512_set_epi16(
25433 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25434 24, 34, 26, 36, 28, 38, 30, 40, 32,
25435 );
25436 assert_eq_m512i(r, e);
25437 }
25438
25439 #[simd_test(enable = "avx512fp16")]
25440 unsafe fn test_mm512_maskz_cvtph_epi16() {
25441 let a = _mm512_set_ph(
25442 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25443 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25444 31.0, 32.0,
25445 );
25446 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25447 let e = _mm512_set_epi16(
25448 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25449 0, 28, 0, 30, 0, 32,
25450 );
25451 assert_eq_m512i(r, e);
25452 }
25453
25454 #[simd_test(enable = "avx512fp16")]
25455 unsafe fn test_mm512_cvt_roundph_epi16() {
25456 let a = _mm512_set_ph(
25457 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25458 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25459 31.0, 32.0,
25460 );
25461 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25462 let e = _mm512_set_epi16(
25463 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25464 25, 26, 27, 28, 29, 30, 31, 32,
25465 );
25466 assert_eq_m512i(r, e);
25467 }
25468
25469 #[simd_test(enable = "avx512fp16")]
25470 unsafe fn test_mm512_mask_cvt_roundph_epi16() {
25471 let a = _mm512_set_ph(
25472 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25473 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25474 31.0, 32.0,
25475 );
25476 let src = _mm512_set_epi16(
25477 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25478 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25479 );
25480 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25481 src,
25482 0b01010101010101010101010101010101,
25483 a,
25484 );
25485 let e = _mm512_set_epi16(
25486 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25487 24, 34, 26, 36, 28, 38, 30, 40, 32,
25488 );
25489 assert_eq_m512i(r, e);
25490 }
25491
25492 #[simd_test(enable = "avx512fp16")]
25493 unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
25494 let a = _mm512_set_ph(
25495 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25496 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25497 31.0, 32.0,
25498 );
25499 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25500 0b01010101010101010101010101010101,
25501 a,
25502 );
25503 let e = _mm512_set_epi16(
25504 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25505 0, 28, 0, 30, 0, 32,
25506 );
25507 assert_eq_m512i(r, e);
25508 }
25509
25510 #[simd_test(enable = "avx512fp16,avx512vl")]
25511 unsafe fn test_mm_cvtph_epu16() {
25512 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25513 let r = _mm_cvttph_epu16(a);
25514 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25515 assert_eq_m128i(r, e);
25516 }
25517
25518 #[simd_test(enable = "avx512fp16,avx512vl")]
25519 unsafe fn test_mm_mask_cvtph_epu16() {
25520 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25521 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25522 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25523 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25524 assert_eq_m128i(r, e);
25525 }
25526
25527 #[simd_test(enable = "avx512fp16,avx512vl")]
25528 unsafe fn test_mm_maskz_cvtph_epu16() {
25529 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25530 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25531 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25532 assert_eq_m128i(r, e);
25533 }
25534
25535 #[simd_test(enable = "avx512fp16,avx512vl")]
25536 unsafe fn test_mm256_cvtph_epu16() {
25537 let a = _mm256_set_ph(
25538 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25539 );
25540 let r = _mm256_cvttph_epu16(a);
25541 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25542 assert_eq_m256i(r, e);
25543 }
25544
25545 #[simd_test(enable = "avx512fp16,avx512vl")]
25546 unsafe fn test_mm256_mask_cvtph_epu16() {
25547 let a = _mm256_set_ph(
25548 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25549 );
25550 let src = _mm256_set_epi16(
25551 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25552 );
25553 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25554 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25555 assert_eq_m256i(r, e);
25556 }
25557
25558 #[simd_test(enable = "avx512fp16,avx512vl")]
25559 unsafe fn test_mm256_maskz_cvtph_epu16() {
25560 let a = _mm256_set_ph(
25561 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25562 );
25563 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25564 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25565 assert_eq_m256i(r, e);
25566 }
25567
25568 #[simd_test(enable = "avx512fp16")]
25569 unsafe fn test_mm512_cvtph_epu16() {
25570 let a = _mm512_set_ph(
25571 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25572 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25573 31.0, 32.0,
25574 );
25575 let r = _mm512_cvttph_epu16(a);
25576 let e = _mm512_set_epi16(
25577 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25578 25, 26, 27, 28, 29, 30, 31, 32,
25579 );
25580 assert_eq_m512i(r, e);
25581 }
25582
25583 #[simd_test(enable = "avx512fp16")]
25584 unsafe fn test_mm512_mask_cvtph_epu16() {
25585 let a = _mm512_set_ph(
25586 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25587 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25588 31.0, 32.0,
25589 );
25590 let src = _mm512_set_epi16(
25591 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25592 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25593 );
25594 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25595 let e = _mm512_set_epi16(
25596 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25597 24, 34, 26, 36, 28, 38, 30, 40, 32,
25598 );
25599 assert_eq_m512i(r, e);
25600 }
25601
25602 #[simd_test(enable = "avx512fp16")]
25603 unsafe fn test_mm512_maskz_cvtph_epu16() {
25604 let a = _mm512_set_ph(
25605 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25606 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25607 31.0, 32.0,
25608 );
25609 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25610 let e = _mm512_set_epi16(
25611 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25612 0, 28, 0, 30, 0, 32,
25613 );
25614 assert_eq_m512i(r, e);
25615 }
25616
25617 #[simd_test(enable = "avx512fp16")]
25618 unsafe fn test_mm512_cvt_roundph_epu16() {
25619 let a = _mm512_set_ph(
25620 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25621 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25622 31.0, 32.0,
25623 );
25624 let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
25625 let e = _mm512_set_epi16(
25626 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25627 25, 26, 27, 28, 29, 30, 31, 32,
25628 );
25629 assert_eq_m512i(r, e);
25630 }
25631
25632 #[simd_test(enable = "avx512fp16")]
25633 unsafe fn test_mm512_mask_cvt_roundph_epu16() {
25634 let a = _mm512_set_ph(
25635 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25636 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25637 31.0, 32.0,
25638 );
25639 let src = _mm512_set_epi16(
25640 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25641 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25642 );
25643 let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25644 src,
25645 0b01010101010101010101010101010101,
25646 a,
25647 );
25648 let e = _mm512_set_epi16(
25649 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25650 24, 34, 26, 36, 28, 38, 30, 40, 32,
25651 );
25652 assert_eq_m512i(r, e);
25653 }
25654
25655 #[simd_test(enable = "avx512fp16")]
25656 unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
25657 let a = _mm512_set_ph(
25658 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25659 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25660 31.0, 32.0,
25661 );
25662 let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
25663 0b01010101010101010101010101010101,
25664 a,
25665 );
25666 let e = _mm512_set_epi16(
25667 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25668 0, 28, 0, 30, 0, 32,
25669 );
25670 assert_eq_m512i(r, e);
25671 }
25672
25673 #[simd_test(enable = "avx512fp16,avx512vl")]
25674 unsafe fn test_mm_cvttph_epi16() {
25675 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25676 let r = _mm_cvttph_epi16(a);
25677 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25678 assert_eq_m128i(r, e);
25679 }
25680
25681 #[simd_test(enable = "avx512fp16,avx512vl")]
25682 unsafe fn test_mm_mask_cvttph_epi16() {
25683 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25684 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25685 let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
25686 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25687 assert_eq_m128i(r, e);
25688 }
25689
25690 #[simd_test(enable = "avx512fp16,avx512vl")]
25691 unsafe fn test_mm_maskz_cvttph_epi16() {
25692 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25693 let r = _mm_maskz_cvttph_epi16(0b01010101, a);
25694 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25695 assert_eq_m128i(r, e);
25696 }
25697
25698 #[simd_test(enable = "avx512fp16,avx512vl")]
25699 unsafe fn test_mm256_cvttph_epi16() {
25700 let a = _mm256_set_ph(
25701 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25702 );
25703 let r = _mm256_cvttph_epi16(a);
25704 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25705 assert_eq_m256i(r, e);
25706 }
25707
25708 #[simd_test(enable = "avx512fp16,avx512vl")]
25709 unsafe fn test_mm256_mask_cvttph_epi16() {
25710 let a = _mm256_set_ph(
25711 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25712 );
25713 let src = _mm256_set_epi16(
25714 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25715 );
25716 let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
25717 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25718 assert_eq_m256i(r, e);
25719 }
25720
25721 #[simd_test(enable = "avx512fp16,avx512vl")]
25722 unsafe fn test_mm256_maskz_cvttph_epi16() {
25723 let a = _mm256_set_ph(
25724 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25725 );
25726 let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
25727 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25728 assert_eq_m256i(r, e);
25729 }
25730
25731 #[simd_test(enable = "avx512fp16")]
25732 unsafe fn test_mm512_cvttph_epi16() {
25733 let a = _mm512_set_ph(
25734 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25735 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25736 31.0, 32.0,
25737 );
25738 let r = _mm512_cvttph_epi16(a);
25739 let e = _mm512_set_epi16(
25740 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25741 25, 26, 27, 28, 29, 30, 31, 32,
25742 );
25743 assert_eq_m512i(r, e);
25744 }
25745
25746 #[simd_test(enable = "avx512fp16")]
25747 unsafe fn test_mm512_mask_cvttph_epi16() {
25748 let a = _mm512_set_ph(
25749 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25750 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25751 31.0, 32.0,
25752 );
25753 let src = _mm512_set_epi16(
25754 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25755 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25756 );
25757 let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
25758 let e = _mm512_set_epi16(
25759 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25760 24, 34, 26, 36, 28, 38, 30, 40, 32,
25761 );
25762 assert_eq_m512i(r, e);
25763 }
25764
25765 #[simd_test(enable = "avx512fp16")]
25766 unsafe fn test_mm512_maskz_cvttph_epi16() {
25767 let a = _mm512_set_ph(
25768 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25769 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25770 31.0, 32.0,
25771 );
25772 let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
25773 let e = _mm512_set_epi16(
25774 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25775 0, 28, 0, 30, 0, 32,
25776 );
25777 assert_eq_m512i(r, e);
25778 }
25779
25780 #[simd_test(enable = "avx512fp16")]
25781 unsafe fn test_mm512_cvtt_roundph_epi16() {
25782 let a = _mm512_set_ph(
25783 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25784 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25785 31.0, 32.0,
25786 );
25787 let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
25788 let e = _mm512_set_epi16(
25789 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25790 25, 26, 27, 28, 29, 30, 31, 32,
25791 );
25792 assert_eq_m512i(r, e);
25793 }
25794
25795 #[simd_test(enable = "avx512fp16")]
25796 unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
25797 let a = _mm512_set_ph(
25798 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25799 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25800 31.0, 32.0,
25801 );
25802 let src = _mm512_set_epi16(
25803 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25804 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25805 );
25806 let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25807 src,
25808 0b01010101010101010101010101010101,
25809 a,
25810 );
25811 let e = _mm512_set_epi16(
25812 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25813 24, 34, 26, 36, 28, 38, 30, 40, 32,
25814 );
25815 assert_eq_m512i(r, e);
25816 }
25817
25818 #[simd_test(enable = "avx512fp16")]
25819 unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
25820 let a = _mm512_set_ph(
25821 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25822 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25823 31.0, 32.0,
25824 );
25825 let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
25826 0b01010101010101010101010101010101,
25827 a,
25828 );
25829 let e = _mm512_set_epi16(
25830 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25831 0, 28, 0, 30, 0, 32,
25832 );
25833 assert_eq_m512i(r, e);
25834 }
25835
25836 #[simd_test(enable = "avx512fp16,avx512vl")]
25837 unsafe fn test_mm_cvttph_epu16() {
25838 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25839 let r = _mm_cvttph_epu16(a);
25840 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
25841 assert_eq_m128i(r, e);
25842 }
25843
25844 #[simd_test(enable = "avx512fp16,avx512vl")]
25845 unsafe fn test_mm_mask_cvttph_epu16() {
25846 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25847 let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
25848 let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
25849 let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
25850 assert_eq_m128i(r, e);
25851 }
25852
25853 #[simd_test(enable = "avx512fp16,avx512vl")]
25854 unsafe fn test_mm_maskz_cvttph_epu16() {
25855 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
25856 let r = _mm_maskz_cvttph_epu16(0b01010101, a);
25857 let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
25858 assert_eq_m128i(r, e);
25859 }
25860
25861 #[simd_test(enable = "avx512fp16,avx512vl")]
25862 unsafe fn test_mm256_cvttph_epu16() {
25863 let a = _mm256_set_ph(
25864 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25865 );
25866 let r = _mm256_cvttph_epu16(a);
25867 let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
25868 assert_eq_m256i(r, e);
25869 }
25870
25871 #[simd_test(enable = "avx512fp16,avx512vl")]
25872 unsafe fn test_mm256_mask_cvttph_epu16() {
25873 let a = _mm256_set_ph(
25874 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25875 );
25876 let src = _mm256_set_epi16(
25877 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
25878 );
25879 let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
25880 let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
25881 assert_eq_m256i(r, e);
25882 }
25883
25884 #[simd_test(enable = "avx512fp16,avx512vl")]
25885 unsafe fn test_mm256_maskz_cvttph_epu16() {
25886 let a = _mm256_set_ph(
25887 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25888 );
25889 let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
25890 let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
25891 assert_eq_m256i(r, e);
25892 }
25893
25894 #[simd_test(enable = "avx512fp16")]
25895 unsafe fn test_mm512_cvttph_epu16() {
25896 let a = _mm512_set_ph(
25897 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25898 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25899 31.0, 32.0,
25900 );
25901 let r = _mm512_cvttph_epu16(a);
25902 let e = _mm512_set_epi16(
25903 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25904 25, 26, 27, 28, 29, 30, 31, 32,
25905 );
25906 assert_eq_m512i(r, e);
25907 }
25908
25909 #[simd_test(enable = "avx512fp16")]
25910 unsafe fn test_mm512_mask_cvttph_epu16() {
25911 let a = _mm512_set_ph(
25912 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25913 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25914 31.0, 32.0,
25915 );
25916 let src = _mm512_set_epi16(
25917 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25918 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25919 );
25920 let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
25921 let e = _mm512_set_epi16(
25922 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25923 24, 34, 26, 36, 28, 38, 30, 40, 32,
25924 );
25925 assert_eq_m512i(r, e);
25926 }
25927
25928 #[simd_test(enable = "avx512fp16")]
25929 unsafe fn test_mm512_maskz_cvttph_epu16() {
25930 let a = _mm512_set_ph(
25931 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25932 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25933 31.0, 32.0,
25934 );
25935 let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
25936 let e = _mm512_set_epi16(
25937 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25938 0, 28, 0, 30, 0, 32,
25939 );
25940 assert_eq_m512i(r, e);
25941 }
25942
25943 #[simd_test(enable = "avx512fp16")]
25944 unsafe fn test_mm512_cvtt_roundph_epu16() {
25945 let a = _mm512_set_ph(
25946 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25947 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25948 31.0, 32.0,
25949 );
25950 let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
25951 let e = _mm512_set_epi16(
25952 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25953 25, 26, 27, 28, 29, 30, 31, 32,
25954 );
25955 assert_eq_m512i(r, e);
25956 }
25957
25958 #[simd_test(enable = "avx512fp16")]
25959 unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
25960 let a = _mm512_set_ph(
25961 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25962 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25963 31.0, 32.0,
25964 );
25965 let src = _mm512_set_epi16(
25966 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
25967 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
25968 );
25969 let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25970 src,
25971 0b01010101010101010101010101010101,
25972 a,
25973 );
25974 let e = _mm512_set_epi16(
25975 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
25976 24, 34, 26, 36, 28, 38, 30, 40, 32,
25977 );
25978 assert_eq_m512i(r, e);
25979 }
25980
25981 #[simd_test(enable = "avx512fp16")]
25982 unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
25983 let a = _mm512_set_ph(
25984 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
25985 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
25986 31.0, 32.0,
25987 );
25988 let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
25989 0b01010101010101010101010101010101,
25990 a,
25991 );
25992 let e = _mm512_set_epi16(
25993 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
25994 0, 28, 0, 30, 0, 32,
25995 );
25996 assert_eq_m512i(r, e);
25997 }
25998
25999 #[simd_test(enable = "avx512fp16,avx512vl")]
26000 unsafe fn test_mm_cvtph_epi32() {
26001 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26002 let r = _mm_cvtph_epi32(a);
26003 let e = _mm_set_epi32(1, 2, 3, 4);
26004 assert_eq_m128i(r, e);
26005 }
26006
26007 #[simd_test(enable = "avx512fp16,avx512vl")]
26008 unsafe fn test_mm_mask_cvtph_epi32() {
26009 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26010 let src = _mm_set_epi32(10, 11, 12, 13);
26011 let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
26012 let e = _mm_set_epi32(10, 2, 12, 4);
26013 assert_eq_m128i(r, e);
26014 }
26015
26016 #[simd_test(enable = "avx512fp16,avx512vl")]
26017 unsafe fn test_mm_maskz_cvtph_epi32() {
26018 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26019 let r = _mm_maskz_cvtph_epi32(0b0101, a);
26020 let e = _mm_set_epi32(0, 2, 0, 4);
26021 assert_eq_m128i(r, e);
26022 }
26023
26024 #[simd_test(enable = "avx512fp16,avx512vl")]
26025 unsafe fn test_mm256_cvtph_epi32() {
26026 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26027 let r = _mm256_cvtph_epi32(a);
26028 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26029 assert_eq_m256i(r, e);
26030 }
26031
26032 #[simd_test(enable = "avx512fp16,avx512vl")]
26033 unsafe fn test_mm256_mask_cvtph_epi32() {
26034 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26035 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26036 let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
26037 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26038 assert_eq_m256i(r, e);
26039 }
26040
26041 #[simd_test(enable = "avx512fp16,avx512vl")]
26042 unsafe fn test_mm256_maskz_cvtph_epi32() {
26043 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26044 let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
26045 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26046 assert_eq_m256i(r, e);
26047 }
26048
26049 #[simd_test(enable = "avx512fp16")]
26050 unsafe fn test_mm512_cvtph_epi32() {
26051 let a = _mm256_set_ph(
26052 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26053 );
26054 let r = _mm512_cvtph_epi32(a);
26055 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26056 assert_eq_m512i(r, e);
26057 }
26058
26059 #[simd_test(enable = "avx512fp16")]
26060 unsafe fn test_mm512_mask_cvtph_epi32() {
26061 let a = _mm256_set_ph(
26062 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26063 );
26064 let src = _mm512_set_epi32(
26065 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26066 );
26067 let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a);
26068 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26069 assert_eq_m512i(r, e);
26070 }
26071
26072 #[simd_test(enable = "avx512fp16")]
26073 unsafe fn test_mm512_maskz_cvtph_epi32() {
26074 let a = _mm256_set_ph(
26075 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26076 );
26077 let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a);
26078 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26079 assert_eq_m512i(r, e);
26080 }
26081
26082 #[simd_test(enable = "avx512fp16")]
26083 unsafe fn test_mm512_cvt_roundph_epi32() {
26084 let a = _mm256_set_ph(
26085 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26086 );
26087 let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26088 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26089 assert_eq_m512i(r, e);
26090 }
26091
26092 #[simd_test(enable = "avx512fp16")]
26093 unsafe fn test_mm512_mask_cvt_roundph_epi32() {
26094 let a = _mm256_set_ph(
26095 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26096 );
26097 let src = _mm512_set_epi32(
26098 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26099 );
26100 let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26101 src,
26102 0b0101010101010101,
26103 a,
26104 );
26105 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26106 assert_eq_m512i(r, e);
26107 }
26108
26109 #[simd_test(enable = "avx512fp16")]
26110 unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
26111 let a = _mm256_set_ph(
26112 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26113 );
26114 let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26115 0b0101010101010101,
26116 a,
26117 );
26118 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26119 assert_eq_m512i(r, e);
26120 }
26121
26122 #[simd_test(enable = "avx512fp16")]
26123 unsafe fn test_mm_cvtsh_i32() {
26124 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26125 let r = _mm_cvtsh_i32(a);
26126 assert_eq!(r, 1);
26127 }
26128
26129 #[simd_test(enable = "avx512fp16")]
26130 unsafe fn test_mm_cvt_roundsh_i32() {
26131 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26132 let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26133 assert_eq!(r, 1);
26134 }
26135
26136 #[simd_test(enable = "avx512fp16,avx512vl")]
26137 unsafe fn test_mm_cvtph_epu32() {
26138 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26139 let r = _mm_cvtph_epu32(a);
26140 let e = _mm_set_epi32(1, 2, 3, 4);
26141 assert_eq_m128i(r, e);
26142 }
26143
26144 #[simd_test(enable = "avx512fp16,avx512vl")]
26145 unsafe fn test_mm_mask_cvtph_epu32() {
26146 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26147 let src = _mm_set_epi32(10, 11, 12, 13);
26148 let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
26149 let e = _mm_set_epi32(10, 2, 12, 4);
26150 assert_eq_m128i(r, e);
26151 }
26152
26153 #[simd_test(enable = "avx512fp16,avx512vl")]
26154 unsafe fn test_mm_maskz_cvtph_epu32() {
26155 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26156 let r = _mm_maskz_cvtph_epu32(0b0101, a);
26157 let e = _mm_set_epi32(0, 2, 0, 4);
26158 assert_eq_m128i(r, e);
26159 }
26160
26161 #[simd_test(enable = "avx512fp16,avx512vl")]
26162 unsafe fn test_mm256_cvtph_epu32() {
26163 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26164 let r = _mm256_cvtph_epu32(a);
26165 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26166 assert_eq_m256i(r, e);
26167 }
26168
26169 #[simd_test(enable = "avx512fp16,avx512vl")]
26170 unsafe fn test_mm256_mask_cvtph_epu32() {
26171 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26172 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26173 let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
26174 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26175 assert_eq_m256i(r, e);
26176 }
26177
26178 #[simd_test(enable = "avx512fp16,avx512vl")]
26179 unsafe fn test_mm256_maskz_cvtph_epu32() {
26180 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26181 let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
26182 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26183 assert_eq_m256i(r, e);
26184 }
26185
26186 #[simd_test(enable = "avx512fp16")]
26187 unsafe fn test_mm512_cvtph_epu32() {
26188 let a = _mm256_set_ph(
26189 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26190 );
26191 let r = _mm512_cvtph_epu32(a);
26192 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26193 assert_eq_m512i(r, e);
26194 }
26195
26196 #[simd_test(enable = "avx512fp16")]
26197 unsafe fn test_mm512_mask_cvtph_epu32() {
26198 let a = _mm256_set_ph(
26199 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26200 );
26201 let src = _mm512_set_epi32(
26202 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26203 );
26204 let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a);
26205 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26206 assert_eq_m512i(r, e);
26207 }
26208
26209 #[simd_test(enable = "avx512fp16")]
26210 unsafe fn test_mm512_maskz_cvtph_epu32() {
26211 let a = _mm256_set_ph(
26212 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26213 );
26214 let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a);
26215 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26216 assert_eq_m512i(r, e);
26217 }
26218
26219 #[simd_test(enable = "avx512fp16")]
26220 unsafe fn test_mm512_cvt_roundph_epu32() {
26221 let a = _mm256_set_ph(
26222 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26223 );
26224 let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26225 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26226 assert_eq_m512i(r, e);
26227 }
26228
26229 #[simd_test(enable = "avx512fp16")]
26230 unsafe fn test_mm512_mask_cvt_roundph_epu32() {
26231 let a = _mm256_set_ph(
26232 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26233 );
26234 let src = _mm512_set_epi32(
26235 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26236 );
26237 let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26238 src,
26239 0b0101010101010101,
26240 a,
26241 );
26242 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26243 assert_eq_m512i(r, e);
26244 }
26245
26246 #[simd_test(enable = "avx512fp16")]
26247 unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
26248 let a = _mm256_set_ph(
26249 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26250 );
26251 let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26252 0b0101010101010101,
26253 a,
26254 );
26255 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26256 assert_eq_m512i(r, e);
26257 }
26258
26259 #[simd_test(enable = "avx512fp16")]
26260 unsafe fn test_mm_cvtsh_u32() {
26261 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26262 let r = _mm_cvtsh_u32(a);
26263 assert_eq!(r, 1);
26264 }
26265
26266 #[simd_test(enable = "avx512fp16")]
26267 unsafe fn test_mm_cvt_roundsh_u32() {
26268 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26269 let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26270 assert_eq!(r, 1);
26271 }
26272
26273 #[simd_test(enable = "avx512fp16,avx512vl")]
26274 unsafe fn test_mm_cvttph_epi32() {
26275 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26276 let r = _mm_cvttph_epi32(a);
26277 let e = _mm_set_epi32(1, 2, 3, 4);
26278 assert_eq_m128i(r, e);
26279 }
26280
26281 #[simd_test(enable = "avx512fp16,avx512vl")]
26282 unsafe fn test_mm_mask_cvttph_epi32() {
26283 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26284 let src = _mm_set_epi32(10, 11, 12, 13);
26285 let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
26286 let e = _mm_set_epi32(10, 2, 12, 4);
26287 assert_eq_m128i(r, e);
26288 }
26289
26290 #[simd_test(enable = "avx512fp16,avx512vl")]
26291 unsafe fn test_mm_maskz_cvttph_epi32() {
26292 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26293 let r = _mm_maskz_cvttph_epi32(0b0101, a);
26294 let e = _mm_set_epi32(0, 2, 0, 4);
26295 assert_eq_m128i(r, e);
26296 }
26297
26298 #[simd_test(enable = "avx512fp16,avx512vl")]
26299 unsafe fn test_mm256_cvttph_epi32() {
26300 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26301 let r = _mm256_cvttph_epi32(a);
26302 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26303 assert_eq_m256i(r, e);
26304 }
26305
26306 #[simd_test(enable = "avx512fp16,avx512vl")]
26307 unsafe fn test_mm256_mask_cvttph_epi32() {
26308 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26309 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26310 let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
26311 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26312 assert_eq_m256i(r, e);
26313 }
26314
26315 #[simd_test(enable = "avx512fp16,avx512vl")]
26316 unsafe fn test_mm256_maskz_cvttph_epi32() {
26317 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26318 let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
26319 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26320 assert_eq_m256i(r, e);
26321 }
26322
26323 #[simd_test(enable = "avx512fp16")]
26324 unsafe fn test_mm512_cvttph_epi32() {
26325 let a = _mm256_set_ph(
26326 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26327 );
26328 let r = _mm512_cvttph_epi32(a);
26329 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26330 assert_eq_m512i(r, e);
26331 }
26332
26333 #[simd_test(enable = "avx512fp16")]
26334 unsafe fn test_mm512_mask_cvttph_epi32() {
26335 let a = _mm256_set_ph(
26336 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26337 );
26338 let src = _mm512_set_epi32(
26339 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26340 );
26341 let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a);
26342 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26343 assert_eq_m512i(r, e);
26344 }
26345
26346 #[simd_test(enable = "avx512fp16")]
26347 unsafe fn test_mm512_maskz_cvttph_epi32() {
26348 let a = _mm256_set_ph(
26349 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26350 );
26351 let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a);
26352 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26353 assert_eq_m512i(r, e);
26354 }
26355
26356 #[simd_test(enable = "avx512fp16")]
26357 unsafe fn test_mm512_cvtt_roundph_epi32() {
26358 let a = _mm256_set_ph(
26359 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26360 );
26361 let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
26362 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26363 assert_eq_m512i(r, e);
26364 }
26365
26366 #[simd_test(enable = "avx512fp16")]
26367 unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
26368 let a = _mm256_set_ph(
26369 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26370 );
26371 let src = _mm512_set_epi32(
26372 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26373 );
26374 let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26375 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26376 assert_eq_m512i(r, e);
26377 }
26378
26379 #[simd_test(enable = "avx512fp16")]
26380 unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
26381 let a = _mm256_set_ph(
26382 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26383 );
26384 let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26385 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26386 assert_eq_m512i(r, e);
26387 }
26388
26389 #[simd_test(enable = "avx512fp16")]
26390 unsafe fn test_mm_cvttsh_i32() {
26391 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26392 let r = _mm_cvttsh_i32(a);
26393 assert_eq!(r, 1);
26394 }
26395
26396 #[simd_test(enable = "avx512fp16")]
26397 unsafe fn test_mm_cvtt_roundsh_i32() {
26398 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26399 let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
26400 assert_eq!(r, 1);
26401 }
26402
26403 #[simd_test(enable = "avx512fp16,avx512vl")]
26404 unsafe fn test_mm_cvttph_epu32() {
26405 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26406 let r = _mm_cvttph_epu32(a);
26407 let e = _mm_set_epi32(1, 2, 3, 4);
26408 assert_eq_m128i(r, e);
26409 }
26410
26411 #[simd_test(enable = "avx512fp16,avx512vl")]
26412 unsafe fn test_mm_mask_cvttph_epu32() {
26413 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26414 let src = _mm_set_epi32(10, 11, 12, 13);
26415 let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
26416 let e = _mm_set_epi32(10, 2, 12, 4);
26417 assert_eq_m128i(r, e);
26418 }
26419
26420 #[simd_test(enable = "avx512fp16,avx512vl")]
26421 unsafe fn test_mm_maskz_cvttph_epu32() {
26422 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26423 let r = _mm_maskz_cvttph_epu32(0b0101, a);
26424 let e = _mm_set_epi32(0, 2, 0, 4);
26425 assert_eq_m128i(r, e);
26426 }
26427
26428 #[simd_test(enable = "avx512fp16,avx512vl")]
26429 unsafe fn test_mm256_cvttph_epu32() {
26430 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26431 let r = _mm256_cvttph_epu32(a);
26432 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
26433 assert_eq_m256i(r, e);
26434 }
26435
26436 #[simd_test(enable = "avx512fp16,avx512vl")]
26437 unsafe fn test_mm256_mask_cvttph_epu32() {
26438 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26439 let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
26440 let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
26441 let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
26442 assert_eq_m256i(r, e);
26443 }
26444
26445 #[simd_test(enable = "avx512fp16,avx512vl")]
26446 unsafe fn test_mm256_maskz_cvttph_epu32() {
26447 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26448 let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
26449 let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
26450 assert_eq_m256i(r, e);
26451 }
26452
26453 #[simd_test(enable = "avx512fp16")]
26454 unsafe fn test_mm512_cvttph_epu32() {
26455 let a = _mm256_set_ph(
26456 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26457 );
26458 let r = _mm512_cvttph_epu32(a);
26459 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26460 assert_eq_m512i(r, e);
26461 }
26462
26463 #[simd_test(enable = "avx512fp16")]
26464 unsafe fn test_mm512_mask_cvttph_epu32() {
26465 let a = _mm256_set_ph(
26466 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26467 );
26468 let src = _mm512_set_epi32(
26469 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26470 );
26471 let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a);
26472 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26473 assert_eq_m512i(r, e);
26474 }
26475
26476 #[simd_test(enable = "avx512fp16")]
26477 unsafe fn test_mm512_maskz_cvttph_epu32() {
26478 let a = _mm256_set_ph(
26479 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26480 );
26481 let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a);
26482 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26483 assert_eq_m512i(r, e);
26484 }
26485
26486 #[simd_test(enable = "avx512fp16")]
26487 unsafe fn test_mm512_cvtt_roundph_epu32() {
26488 let a = _mm256_set_ph(
26489 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26490 );
26491 let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
26492 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
26493 assert_eq_m512i(r, e);
26494 }
26495
26496 #[simd_test(enable = "avx512fp16")]
26497 unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
26498 let a = _mm256_set_ph(
26499 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26500 );
26501 let src = _mm512_set_epi32(
26502 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26503 );
26504 let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
26505 let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
26506 assert_eq_m512i(r, e);
26507 }
26508
26509 #[simd_test(enable = "avx512fp16")]
26510 unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
26511 let a = _mm256_set_ph(
26512 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26513 );
26514 let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
26515 let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
26516 assert_eq_m512i(r, e);
26517 }
26518
26519 #[simd_test(enable = "avx512fp16")]
26520 unsafe fn test_mm_cvttsh_u32() {
26521 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26522 let r = _mm_cvttsh_u32(a);
26523 assert_eq!(r, 1);
26524 }
26525
26526 #[simd_test(enable = "avx512fp16")]
26527 unsafe fn test_mm_cvtt_roundsh_u32() {
26528 let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26529 let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
26530 assert_eq!(r, 1);
26531 }
26532
26533 #[simd_test(enable = "avx512fp16,avx512vl")]
26534 unsafe fn test_mm_cvtph_epi64() {
26535 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26536 let r = _mm_cvtph_epi64(a);
26537 let e = _mm_set_epi64x(1, 2);
26538 assert_eq_m128i(r, e);
26539 }
26540
26541 #[simd_test(enable = "avx512fp16,avx512vl")]
26542 unsafe fn test_mm_mask_cvtph_epi64() {
26543 let src = _mm_set_epi64x(3, 4);
26544 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26545 let r = _mm_mask_cvtph_epi64(src, 0b01, a);
26546 let e = _mm_set_epi64x(3, 2);
26547 assert_eq_m128i(r, e);
26548 }
26549
26550 #[simd_test(enable = "avx512fp16,avx512vl")]
26551 unsafe fn test_mm_maskz_cvtph_epi64() {
26552 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26553 let r = _mm_maskz_cvtph_epi64(0b01, a);
26554 let e = _mm_set_epi64x(0, 2);
26555 assert_eq_m128i(r, e);
26556 }
26557
26558 #[simd_test(enable = "avx512fp16,avx512vl")]
26559 unsafe fn test_mm256_cvtph_epi64() {
26560 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26561 let r = _mm256_cvtph_epi64(a);
26562 let e = _mm256_set_epi64x(1, 2, 3, 4);
26563 assert_eq_m256i(r, e);
26564 }
26565
26566 #[simd_test(enable = "avx512fp16,avx512vl")]
26567 unsafe fn test_mm256_mask_cvtph_epi64() {
26568 let src = _mm256_set_epi64x(5, 6, 7, 8);
26569 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26570 let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
26571 let e = _mm256_set_epi64x(5, 2, 7, 4);
26572 assert_eq_m256i(r, e);
26573 }
26574
26575 #[simd_test(enable = "avx512fp16,avx512vl")]
26576 unsafe fn test_mm256_maskz_cvtph_epi64() {
26577 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26578 let r = _mm256_maskz_cvtph_epi64(0b0101, a);
26579 let e = _mm256_set_epi64x(0, 2, 0, 4);
26580 assert_eq_m256i(r, e);
26581 }
26582
26583 #[simd_test(enable = "avx512fp16")]
26584 unsafe fn test_mm512_cvtph_epi64() {
26585 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26586 let r = _mm512_cvtph_epi64(a);
26587 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26588 assert_eq_m512i(r, e);
26589 }
26590
26591 #[simd_test(enable = "avx512fp16")]
26592 unsafe fn test_mm512_mask_cvtph_epi64() {
26593 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26594 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26595 let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
26596 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26597 assert_eq_m512i(r, e);
26598 }
26599
26600 #[simd_test(enable = "avx512fp16")]
26601 unsafe fn test_mm512_maskz_cvtph_epi64() {
26602 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26603 let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
26604 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26605 assert_eq_m512i(r, e);
26606 }
26607
26608 #[simd_test(enable = "avx512fp16")]
26609 unsafe fn test_mm512_cvt_roundph_epi64() {
26610 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26611 let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26612 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26613 assert_eq_m512i(r, e);
26614 }
26615
26616 #[simd_test(enable = "avx512fp16")]
26617 unsafe fn test_mm512_mask_cvt_roundph_epi64() {
26618 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26619 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26620 let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26621 src, 0b01010101, a,
26622 );
26623 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26624 assert_eq_m512i(r, e);
26625 }
26626
26627 #[simd_test(enable = "avx512fp16")]
26628 unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
26629 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26630 let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26631 0b01010101, a,
26632 );
26633 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26634 assert_eq_m512i(r, e);
26635 }
26636
26637 #[simd_test(enable = "avx512fp16,avx512vl")]
26638 unsafe fn test_mm_cvtph_epu64() {
26639 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26640 let r = _mm_cvtph_epu64(a);
26641 let e = _mm_set_epi64x(1, 2);
26642 assert_eq_m128i(r, e);
26643 }
26644
26645 #[simd_test(enable = "avx512fp16,avx512vl")]
26646 unsafe fn test_mm_mask_cvtph_epu64() {
26647 let src = _mm_set_epi64x(3, 4);
26648 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26649 let r = _mm_mask_cvtph_epu64(src, 0b01, a);
26650 let e = _mm_set_epi64x(3, 2);
26651 assert_eq_m128i(r, e);
26652 }
26653
26654 #[simd_test(enable = "avx512fp16,avx512vl")]
26655 unsafe fn test_mm_maskz_cvtph_epu64() {
26656 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26657 let r = _mm_maskz_cvtph_epu64(0b01, a);
26658 let e = _mm_set_epi64x(0, 2);
26659 assert_eq_m128i(r, e);
26660 }
26661
26662 #[simd_test(enable = "avx512fp16,avx512vl")]
26663 unsafe fn test_mm256_cvtph_epu64() {
26664 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26665 let r = _mm256_cvtph_epu64(a);
26666 let e = _mm256_set_epi64x(1, 2, 3, 4);
26667 assert_eq_m256i(r, e);
26668 }
26669
26670 #[simd_test(enable = "avx512fp16,avx512vl")]
26671 unsafe fn test_mm256_mask_cvtph_epu64() {
26672 let src = _mm256_set_epi64x(5, 6, 7, 8);
26673 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26674 let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
26675 let e = _mm256_set_epi64x(5, 2, 7, 4);
26676 assert_eq_m256i(r, e);
26677 }
26678
26679 #[simd_test(enable = "avx512fp16,avx512vl")]
26680 unsafe fn test_mm256_maskz_cvtph_epu64() {
26681 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26682 let r = _mm256_maskz_cvtph_epu64(0b0101, a);
26683 let e = _mm256_set_epi64x(0, 2, 0, 4);
26684 assert_eq_m256i(r, e);
26685 }
26686
26687 #[simd_test(enable = "avx512fp16")]
26688 unsafe fn test_mm512_cvtph_epu64() {
26689 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26690 let r = _mm512_cvtph_epu64(a);
26691 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26692 assert_eq_m512i(r, e);
26693 }
26694
26695 #[simd_test(enable = "avx512fp16")]
26696 unsafe fn test_mm512_mask_cvtph_epu64() {
26697 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26698 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26699 let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
26700 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26701 assert_eq_m512i(r, e);
26702 }
26703
26704 #[simd_test(enable = "avx512fp16")]
26705 unsafe fn test_mm512_maskz_cvtph_epu64() {
26706 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26707 let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
26708 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26709 assert_eq_m512i(r, e);
26710 }
26711
26712 #[simd_test(enable = "avx512fp16")]
26713 unsafe fn test_mm512_cvt_roundph_epu64() {
26714 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26715 let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
26716 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26717 assert_eq_m512i(r, e);
26718 }
26719
26720 #[simd_test(enable = "avx512fp16")]
26721 unsafe fn test_mm512_mask_cvt_roundph_epu64() {
26722 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26723 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26724 let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26725 src, 0b01010101, a,
26726 );
26727 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26728 assert_eq_m512i(r, e);
26729 }
26730
26731 #[simd_test(enable = "avx512fp16")]
26732 unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
26733 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26734 let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
26735 0b01010101, a,
26736 );
26737 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26738 assert_eq_m512i(r, e);
26739 }
26740
26741 #[simd_test(enable = "avx512fp16,avx512vl")]
26742 unsafe fn test_mm_cvttph_epi64() {
26743 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26744 let r = _mm_cvttph_epi64(a);
26745 let e = _mm_set_epi64x(1, 2);
26746 assert_eq_m128i(r, e);
26747 }
26748
26749 #[simd_test(enable = "avx512fp16,avx512vl")]
26750 unsafe fn test_mm_mask_cvttph_epi64() {
26751 let src = _mm_set_epi64x(3, 4);
26752 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26753 let r = _mm_mask_cvttph_epi64(src, 0b01, a);
26754 let e = _mm_set_epi64x(3, 2);
26755 assert_eq_m128i(r, e);
26756 }
26757
26758 #[simd_test(enable = "avx512fp16,avx512vl")]
26759 unsafe fn test_mm_maskz_cvttph_epi64() {
26760 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26761 let r = _mm_maskz_cvttph_epi64(0b01, a);
26762 let e = _mm_set_epi64x(0, 2);
26763 assert_eq_m128i(r, e);
26764 }
26765
26766 #[simd_test(enable = "avx512fp16,avx512vl")]
26767 unsafe fn test_mm256_cvttph_epi64() {
26768 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26769 let r = _mm256_cvttph_epi64(a);
26770 let e = _mm256_set_epi64x(1, 2, 3, 4);
26771 assert_eq_m256i(r, e);
26772 }
26773
26774 #[simd_test(enable = "avx512fp16,avx512vl")]
26775 unsafe fn test_mm256_mask_cvttph_epi64() {
26776 let src = _mm256_set_epi64x(5, 6, 7, 8);
26777 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26778 let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
26779 let e = _mm256_set_epi64x(5, 2, 7, 4);
26780 assert_eq_m256i(r, e);
26781 }
26782
26783 #[simd_test(enable = "avx512fp16,avx512vl")]
26784 unsafe fn test_mm256_maskz_cvttph_epi64() {
26785 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26786 let r = _mm256_maskz_cvttph_epi64(0b0101, a);
26787 let e = _mm256_set_epi64x(0, 2, 0, 4);
26788 assert_eq_m256i(r, e);
26789 }
26790
26791 #[simd_test(enable = "avx512fp16")]
26792 unsafe fn test_mm512_cvttph_epi64() {
26793 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26794 let r = _mm512_cvttph_epi64(a);
26795 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26796 assert_eq_m512i(r, e);
26797 }
26798
26799 #[simd_test(enable = "avx512fp16")]
26800 unsafe fn test_mm512_mask_cvttph_epi64() {
26801 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26802 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26803 let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
26804 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26805 assert_eq_m512i(r, e);
26806 }
26807
26808 #[simd_test(enable = "avx512fp16")]
26809 unsafe fn test_mm512_maskz_cvttph_epi64() {
26810 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26811 let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
26812 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26813 assert_eq_m512i(r, e);
26814 }
26815
26816 #[simd_test(enable = "avx512fp16")]
26817 unsafe fn test_mm512_cvtt_roundph_epi64() {
26818 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26819 let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
26820 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26821 assert_eq_m512i(r, e);
26822 }
26823
26824 #[simd_test(enable = "avx512fp16")]
26825 unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
26826 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26827 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26828 let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26829 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26830 assert_eq_m512i(r, e);
26831 }
26832
26833 #[simd_test(enable = "avx512fp16")]
26834 unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
26835 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26836 let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26837 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26838 assert_eq_m512i(r, e);
26839 }
26840
26841 #[simd_test(enable = "avx512fp16,avx512vl")]
26842 unsafe fn test_mm_cvttph_epu64() {
26843 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26844 let r = _mm_cvttph_epu64(a);
26845 let e = _mm_set_epi64x(1, 2);
26846 assert_eq_m128i(r, e);
26847 }
26848
26849 #[simd_test(enable = "avx512fp16,avx512vl")]
26850 unsafe fn test_mm_mask_cvttph_epu64() {
26851 let src = _mm_set_epi64x(3, 4);
26852 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26853 let r = _mm_mask_cvttph_epu64(src, 0b01, a);
26854 let e = _mm_set_epi64x(3, 2);
26855 assert_eq_m128i(r, e);
26856 }
26857
26858 #[simd_test(enable = "avx512fp16,avx512vl")]
26859 unsafe fn test_mm_maskz_cvttph_epu64() {
26860 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
26861 let r = _mm_maskz_cvttph_epu64(0b01, a);
26862 let e = _mm_set_epi64x(0, 2);
26863 assert_eq_m128i(r, e);
26864 }
26865
26866 #[simd_test(enable = "avx512fp16,avx512vl")]
26867 unsafe fn test_mm256_cvttph_epu64() {
26868 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26869 let r = _mm256_cvttph_epu64(a);
26870 let e = _mm256_set_epi64x(1, 2, 3, 4);
26871 assert_eq_m256i(r, e);
26872 }
26873
26874 #[simd_test(enable = "avx512fp16,avx512vl")]
26875 unsafe fn test_mm256_mask_cvttph_epu64() {
26876 let src = _mm256_set_epi64x(5, 6, 7, 8);
26877 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26878 let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
26879 let e = _mm256_set_epi64x(5, 2, 7, 4);
26880 assert_eq_m256i(r, e);
26881 }
26882
26883 #[simd_test(enable = "avx512fp16,avx512vl")]
26884 unsafe fn test_mm256_maskz_cvttph_epu64() {
26885 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26886 let r = _mm256_maskz_cvttph_epu64(0b0101, a);
26887 let e = _mm256_set_epi64x(0, 2, 0, 4);
26888 assert_eq_m256i(r, e);
26889 }
26890
26891 #[simd_test(enable = "avx512fp16")]
26892 unsafe fn test_mm512_cvttph_epu64() {
26893 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26894 let r = _mm512_cvttph_epu64(a);
26895 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26896 assert_eq_m512i(r, e);
26897 }
26898
26899 #[simd_test(enable = "avx512fp16")]
26900 unsafe fn test_mm512_mask_cvttph_epu64() {
26901 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26902 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26903 let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
26904 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26905 assert_eq_m512i(r, e);
26906 }
26907
26908 #[simd_test(enable = "avx512fp16")]
26909 unsafe fn test_mm512_maskz_cvttph_epu64() {
26910 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26911 let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
26912 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26913 assert_eq_m512i(r, e);
26914 }
26915
26916 #[simd_test(enable = "avx512fp16")]
26917 unsafe fn test_mm512_cvtt_roundph_epu64() {
26918 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26919 let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
26920 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
26921 assert_eq_m512i(r, e);
26922 }
26923
26924 #[simd_test(enable = "avx512fp16")]
26925 unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
26926 let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
26927 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26928 let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
26929 let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
26930 assert_eq_m512i(r, e);
26931 }
26932
26933 #[simd_test(enable = "avx512fp16")]
26934 unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
26935 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26936 let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
26937 let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
26938 assert_eq_m512i(r, e);
26939 }
26940
26941 #[simd_test(enable = "avx512fp16,avx512vl")]
26942 unsafe fn test_mm_cvtxph_ps() {
26943 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26944 let r = _mm_cvtxph_ps(a);
26945 let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
26946 assert_eq_m128(r, e);
26947 }
26948
26949 #[simd_test(enable = "avx512fp16,avx512vl")]
26950 unsafe fn test_mm_mask_cvtxph_ps() {
26951 let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
26952 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26953 let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
26954 let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0);
26955 assert_eq_m128(r, e);
26956 }
26957
26958 #[simd_test(enable = "avx512fp16,avx512vl")]
26959 unsafe fn test_mm_maskz_cvtxph_ps() {
26960 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
26961 let r = _mm_maskz_cvtxph_ps(0b0101, a);
26962 let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
26963 assert_eq_m128(r, e);
26964 }
26965
26966 #[simd_test(enable = "avx512fp16,avx512vl")]
26967 unsafe fn test_mm256_cvtxph_ps() {
26968 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26969 let r = _mm256_cvtxph_ps(a);
26970 let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26971 assert_eq_m256(r, e);
26972 }
26973
26974 #[simd_test(enable = "avx512fp16,avx512vl")]
26975 unsafe fn test_mm256_mask_cvtxph_ps() {
26976 let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
26977 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26978 let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
26979 let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
26980 assert_eq_m256(r, e);
26981 }
26982
26983 #[simd_test(enable = "avx512fp16,avx512vl")]
26984 unsafe fn test_mm256_maskz_cvtxph_ps() {
26985 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
26986 let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
26987 let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
26988 assert_eq_m256(r, e);
26989 }
26990
26991 #[simd_test(enable = "avx512fp16")]
26992 unsafe fn test_mm512_cvtxph_ps() {
26993 let a = _mm256_set_ph(
26994 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26995 );
26996 let r = _mm512_cvtxph_ps(a);
26997 let e = _mm512_set_ps(
26998 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
26999 );
27000 assert_eq_m512(r, e);
27001 }
27002
27003 #[simd_test(enable = "avx512fp16")]
27004 unsafe fn test_mm512_mask_cvtxph_ps() {
27005 let src = _mm512_set_ps(
27006 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
27007 24.0, 25.0,
27008 );
27009 let a = _mm256_set_ph(
27010 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27011 );
27012 let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a);
27013 let e = _mm512_set_ps(
27014 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
27015 16.0,
27016 );
27017 assert_eq_m512(r, e);
27018 }
27019
27020 #[simd_test(enable = "avx512fp16")]
27021 unsafe fn test_mm512_maskz_cvtxph_ps() {
27022 let a = _mm256_set_ph(
27023 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27024 );
27025 let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a);
27026 let e = _mm512_set_ps(
27027 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
27028 );
27029 assert_eq_m512(r, e);
27030 }
27031
27032 #[simd_test(enable = "avx512fp16")]
27033 unsafe fn test_mm512_cvtx_roundph_ps() {
27034 let a = _mm256_set_ph(
27035 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27036 );
27037 let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
27038 let e = _mm512_set_ps(
27039 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27040 );
27041 assert_eq_m512(r, e);
27042 }
27043
27044 #[simd_test(enable = "avx512fp16")]
27045 unsafe fn test_mm512_mask_cvtx_roundph_ps() {
27046 let src = _mm512_set_ps(
27047 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
27048 24.0, 25.0,
27049 );
27050 let a = _mm256_set_ph(
27051 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27052 );
27053 let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
27054 let e = _mm512_set_ps(
27055 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
27056 16.0,
27057 );
27058 assert_eq_m512(r, e);
27059 }
27060
27061 #[simd_test(enable = "avx512fp16")]
27062 unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
27063 let a = _mm256_set_ph(
27064 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27065 );
27066 let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
27067 let e = _mm512_set_ps(
27068 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
27069 );
27070 assert_eq_m512(r, e);
27071 }
27072
27073 #[simd_test(enable = "avx512fp16")]
27074 unsafe fn test_mm_cvtsh_ss() {
27075 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27076 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27077 let r = _mm_cvtsh_ss(a, b);
27078 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27079 assert_eq_m128(r, e);
27080 }
27081
27082 #[simd_test(enable = "avx512fp16")]
27083 unsafe fn test_mm_mask_cvtsh_ss() {
27084 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27085 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27086 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27087 let r = _mm_mask_cvtsh_ss(src, 0, a, b);
27088 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27089 assert_eq_m128(r, e);
27090 let r = _mm_mask_cvtsh_ss(src, 1, a, b);
27091 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27092 assert_eq_m128(r, e);
27093 }
27094
27095 #[simd_test(enable = "avx512fp16")]
27096 unsafe fn test_mm_maskz_cvtsh_ss() {
27097 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27098 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27099 let r = _mm_maskz_cvtsh_ss(0, a, b);
27100 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27101 assert_eq_m128(r, e);
27102 let r = _mm_maskz_cvtsh_ss(1, a, b);
27103 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27104 assert_eq_m128(r, e);
27105 }
27106
27107 #[simd_test(enable = "avx512fp16")]
27108 unsafe fn test_mm_cvt_roundsh_ss() {
27109 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27110 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27111 let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
27112 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27113 assert_eq_m128(r, e);
27114 }
27115
27116 #[simd_test(enable = "avx512fp16")]
27117 unsafe fn test_mm_mask_cvt_roundsh_ss() {
27118 let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
27119 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27120 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27121 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27122 let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
27123 assert_eq_m128(r, e);
27124 let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27125 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27126 assert_eq_m128(r, e);
27127 }
27128
27129 #[simd_test(enable = "avx512fp16")]
27130 unsafe fn test_mm_maskz_cvt_roundsh_ss() {
27131 let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
27132 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27133 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
27134 let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
27135 assert_eq_m128(r, e);
27136 let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b);
27137 let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
27138 assert_eq_m128(r, e);
27139 }
27140
27141 #[simd_test(enable = "avx512fp16,avx512vl")]
27142 unsafe fn test_mm_cvtph_pd() {
27143 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27144 let r = _mm_cvtph_pd(a);
27145 let e = _mm_set_pd(1.0, 2.0);
27146 assert_eq_m128d(r, e);
27147 }
27148
27149 #[simd_test(enable = "avx512fp16,avx512vl")]
27150 unsafe fn test_mm_mask_cvtph_pd() {
27151 let src = _mm_set_pd(10.0, 11.0);
27152 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27153 let r = _mm_mask_cvtph_pd(src, 0b01, a);
27154 let e = _mm_set_pd(10.0, 2.0);
27155 assert_eq_m128d(r, e);
27156 }
27157
27158 #[simd_test(enable = "avx512fp16,avx512vl")]
27159 unsafe fn test_mm_maskz_cvtph_pd() {
27160 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
27161 let r = _mm_maskz_cvtph_pd(0b01, a);
27162 let e = _mm_set_pd(0.0, 2.0);
27163 assert_eq_m128d(r, e);
27164 }
27165
27166 #[simd_test(enable = "avx512fp16,avx512vl")]
27167 unsafe fn test_mm256_cvtph_pd() {
27168 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27169 let r = _mm256_cvtph_pd(a);
27170 let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
27171 assert_eq_m256d(r, e);
27172 }
27173
27174 #[simd_test(enable = "avx512fp16,avx512vl")]
27175 unsafe fn test_mm256_mask_cvtph_pd() {
27176 let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
27177 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27178 let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
27179 let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0);
27180 assert_eq_m256d(r, e);
27181 }
27182
27183 #[simd_test(enable = "avx512fp16,avx512vl")]
27184 unsafe fn test_mm256_maskz_cvtph_pd() {
27185 let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
27186 let r = _mm256_maskz_cvtph_pd(0b0101, a);
27187 let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
27188 assert_eq_m256d(r, e);
27189 }
27190
27191 #[simd_test(enable = "avx512fp16")]
27192 unsafe fn test_mm512_cvtph_pd() {
27193 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27194 let r = _mm512_cvtph_pd(a);
27195 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27196 assert_eq_m512d(r, e);
27197 }
27198
27199 #[simd_test(enable = "avx512fp16")]
27200 unsafe fn test_mm512_mask_cvtph_pd() {
27201 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27202 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27203 let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
27204 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27205 assert_eq_m512d(r, e);
27206 }
27207
27208 #[simd_test(enable = "avx512fp16")]
27209 unsafe fn test_mm512_maskz_cvtph_pd() {
27210 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27211 let r = _mm512_maskz_cvtph_pd(0b01010101, a);
27212 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27213 assert_eq_m512d(r, e);
27214 }
27215
27216 #[simd_test(enable = "avx512fp16")]
27217 unsafe fn test_mm512_cvt_roundph_pd() {
27218 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27219 let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
27220 let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27221 assert_eq_m512d(r, e);
27222 }
27223
27224 #[simd_test(enable = "avx512fp16")]
27225 unsafe fn test_mm512_mask_cvt_roundph_pd() {
27226 let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
27227 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27228 let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
27229 let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
27230 assert_eq_m512d(r, e);
27231 }
27232
27233 #[simd_test(enable = "avx512fp16")]
27234 unsafe fn test_mm512_maskz_cvt_roundph_pd() {
27235 let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
27236 let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
27237 let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
27238 assert_eq_m512d(r, e);
27239 }
27240
27241 #[simd_test(enable = "avx512fp16")]
27242 unsafe fn test_mm_cvtsh_sd() {
27243 let a = _mm_setr_pd(2.0, 20.0);
27244 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27245 let r = _mm_cvtsh_sd(a, b);
27246 let e = _mm_setr_pd(1.0, 20.0);
27247 assert_eq_m128d(r, e);
27248 }
27249
27250 #[simd_test(enable = "avx512fp16")]
27251 unsafe fn test_mm_mask_cvtsh_sd() {
27252 let src = _mm_setr_pd(3.0, 11.0);
27253 let a = _mm_setr_pd(2.0, 20.0);
27254 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27255 let r = _mm_mask_cvtsh_sd(src, 0, a, b);
27256 let e = _mm_setr_pd(3.0, 20.0);
27257 assert_eq_m128d(r, e);
27258 let r = _mm_mask_cvtsh_sd(src, 1, a, b);
27259 let e = _mm_setr_pd(1.0, 20.0);
27260 assert_eq_m128d(r, e);
27261 }
27262
27263 #[simd_test(enable = "avx512fp16")]
27264 unsafe fn test_mm_maskz_cvtsh_sd() {
27265 let a = _mm_setr_pd(2.0, 20.0);
27266 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27267 let r = _mm_maskz_cvtsh_sd(0, a, b);
27268 let e = _mm_setr_pd(0.0, 20.0);
27269 assert_eq_m128d(r, e);
27270 let r = _mm_maskz_cvtsh_sd(1, a, b);
27271 let e = _mm_setr_pd(1.0, 20.0);
27272 assert_eq_m128d(r, e);
27273 }
27274
27275 #[simd_test(enable = "avx512fp16")]
27276 unsafe fn test_mm_cvt_roundsh_sd() {
27277 let a = _mm_setr_pd(2.0, 20.0);
27278 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27279 let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
27280 let e = _mm_setr_pd(1.0, 20.0);
27281 assert_eq_m128d(r, e);
27282 }
27283
27284 #[simd_test(enable = "avx512fp16")]
27285 unsafe fn test_mm_mask_cvt_roundsh_sd() {
27286 let src = _mm_setr_pd(3.0, 11.0);
27287 let a = _mm_setr_pd(2.0, 20.0);
27288 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27289 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b);
27290 let e = _mm_setr_pd(3.0, 20.0);
27291 assert_eq_m128d(r, e);
27292 let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b);
27293 let e = _mm_setr_pd(1.0, 20.0);
27294 assert_eq_m128d(r, e);
27295 }
27296
27297 #[simd_test(enable = "avx512fp16")]
27298 unsafe fn test_mm_maskz_cvt_roundsh_sd() {
27299 let a = _mm_setr_pd(2.0, 20.0);
27300 let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
27301 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
27302 let e = _mm_setr_pd(0.0, 20.0);
27303 assert_eq_m128d(r, e);
27304 let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b);
27305 let e = _mm_setr_pd(1.0, 20.0);
27306 assert_eq_m128d(r, e);
27307 }
27308
27309 #[simd_test(enable = "avx512fp16")]
27310 unsafe fn test_mm_cvtsh_h() {
27311 let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0);
27312 let r = _mm_cvtsh_h(a);
27313 assert_eq!(r, 1.0);
27314 }
27315
27316 #[simd_test(enable = "avx512fp16")]
27317 unsafe fn test_mm256_cvtsh_h() {
27318 let a = _mm256_setr_ph(
27319 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27320 );
27321 let r = _mm256_cvtsh_h(a);
27322 assert_eq!(r, 1.0);
27323 }
27324
27325 #[simd_test(enable = "avx512fp16")]
27326 unsafe fn test_mm512_cvtsh_h() {
27327 let a = _mm512_setr_ph(
27328 1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
27329 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
27330 31.0, 32.0,
27331 );
27332 let r = _mm512_cvtsh_h(a);
27333 assert_eq!(r, 1.0);
27334 }
27335
27336 #[simd_test(enable = "avx512fp16")]
27337 unsafe fn test_mm_cvtsi128_si16() {
27338 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
27339 let r = _mm_cvtsi128_si16(a);
27340 assert_eq!(r, 1);
27341 }
27342
27343 #[simd_test(enable = "avx512fp16")]
27344 unsafe fn test_mm_cvtsi16_si128() {
27345 let a = 1;
27346 let r = _mm_cvtsi16_si128(a);
27347 let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
27348 assert_eq_m128i(r, e);
27349 }
27350}
27351